-
Notifications
You must be signed in to change notification settings - Fork 202
Improve the workflow of xfs devices filesystem check and mount #132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,6 +43,12 @@ const ( | |
fsckErrorsCorrected = 1 | ||
// 'fsck' found errors but exited without correcting them | ||
fsckErrorsUncorrected = 4 | ||
// 'xfs_repair' found errors but exited without correcting them | ||
xfsRepairErrorsUncorrected = 1 | ||
// 'xfs_repair' was unable to proceed due to a dirty log | ||
xfsRepairErrorsDirtyLogs = 2 | ||
// Retry 'xfs_repair' three times on failure to make more repairs on successive runs. | ||
xfsRepairRetries = 3 | ||
) | ||
|
||
// Mounter provides the default implementation of mount.Interface | ||
|
@@ -257,7 +263,7 @@ func (mounter *Mounter) GetMountRefs(pathname string) ([]string, error) { | |
} | ||
|
||
// checkAndRepairFileSystem checks and repairs filesystems using command fsck. | ||
func (mounter *SafeFormatAndMount) checkAndRepairFilesystem(source string) error { | ||
func (mounter *SafeFormatAndMount) checkAndRepairFilesystem(source, target string) error { | ||
klog.V(4).Infof("Checking for issues with fsck on disk: %s", source) | ||
args := []string{"-a", source} | ||
out, err := mounter.Exec.Command("fsck", args...).CombinedOutput() | ||
|
@@ -278,7 +284,7 @@ func (mounter *SafeFormatAndMount) checkAndRepairFilesystem(source string) error | |
} | ||
|
||
// checkAndRepairXfsFilesystem checks and repairs xfs filesystem using command xfs_repair. | ||
func (mounter *SafeFormatAndMount) checkAndRepairXfsFilesystem(source string) error { | ||
func (mounter *SafeFormatAndMount) checkAndRepairXfsFilesystem(source, target string) error { | ||
klog.V(4).Infof("Checking for issues with xfs_repair on disk: %s", source) | ||
|
||
args := []string{source} | ||
|
@@ -292,18 +298,74 @@ func (mounter *SafeFormatAndMount) checkAndRepairXfsFilesystem(source string) er | |
return nil | ||
} else { | ||
klog.Warningf("Filesystem corruption was detected for %s, running xfs_repair to repair", source) | ||
out, err := mounter.Exec.Command("xfs_repair", args...).CombinedOutput() | ||
if err != nil { | ||
return NewMountError(HasFilesystemErrors, "'xfs_repair' found errors on device %s but could not correct them: %s\n", source, out) | ||
} else { | ||
klog.Infof("Device %s has errors which were corrected by xfs_repair.", source) | ||
return nil | ||
var uncorrectedErr error | ||
|
||
// If xfs_repair fails to repair the file system successfully, try giving the same xfs_repair | ||
// command twice more; xfs_repair may be able to make more repairs on successive runs. | ||
for i := 0; i < xfsRepairRetries; i++ { | ||
out, err := mounter.Exec.Command("xfs_repair", args...).CombinedOutput() | ||
if err == nil { | ||
klog.Infof("Device %s has errors which were corrected by xfs_repair.", source) | ||
return nil | ||
27149chen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
e, isExitError := err.(utilexec.ExitError) | ||
if !isExitError { | ||
return NewMountError(HasFilesystemErrors, "failed to run 'xfs_repair' on device %s: %v\n", source, err) | ||
|
||
} | ||
switch e.ExitStatus() { | ||
case xfsRepairErrorsUncorrected: | ||
uncorrectedErr = NewMountError(HasFilesystemErrors, "'xfs_repair' found errors on device %s but could not correct them: %s\n", source, string(out)) | ||
// If the exit status is 2, do not retry, replay the dirty logs instead. | ||
case xfsRepairErrorsDirtyLogs: | ||
return mounter.replayXfsDirtyLogs(source, target) | ||
default: | ||
return NewMountError(HasFilesystemErrors, "'xfs_repair' found errors on device %s but could not correct them: %s\n", source, string(out)) | ||
} | ||
} | ||
if uncorrectedErr != nil { | ||
return uncorrectedErr | ||
} | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
// replayXfsDirtyLogs tries to replay dirty logs by by mounting and | ||
// immediately unmounting the filesystem on the same class of machine | ||
// that crashed. | ||
func (mounter *SafeFormatAndMount) replayXfsDirtyLogs(source, target string) error { | ||
27149chen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
klog.V(4).Infof("Attempting to replay the dirty logs on device %s", source) | ||
msg := fmt.Sprintf("failed to replay dirty logs on device %s", source) | ||
|
||
// make sure unmount is always called after mount. | ||
err := func() (returnErr error) { | ||
defer func() { | ||
klog.V(4).Infof("Unmounting %s", target) | ||
if err := mounter.Interface.Unmount(target); err != nil { | ||
returnErr = NewMountError(HasFilesystemErrors, "%s: %v\n", msg, err) | ||
} | ||
}() | ||
klog.V(4).Infof("Attempting to mount disk %s at %s", source, target) | ||
if err := mounter.Interface.Mount(source, target, "", []string{"defaults"}); err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From man-page:
Are we being too aggressive in automatically fixing errors here? Can this make problems worse somehow? Should this be configurable? If we merge this PR as it is - it will become the new default even for in-tree Kubernetes drivers, so we have to be careful. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gnufied do you mean that the node might be bad or not the same class, so there is potential risk? |
||
return NewMountError(HasFilesystemErrors, "%s: %v\n", msg, err) | ||
} | ||
return nil | ||
}() | ||
if err != nil { | ||
return err | ||
} | ||
|
||
klog.V(4).Infof("Checking for issues with xfs_repair on disk %s again", source) | ||
out, err := mounter.Exec.Command("xfs_repair", source).CombinedOutput() | ||
if err != nil { | ||
return NewMountError(HasFilesystemErrors, "'xfs_repair' found errors on device %s but could not correct them: %s\n", source, out) | ||
} else { | ||
klog.Infof("Device %s has errors which were corrected by xfs_repair.", source) | ||
return nil | ||
} | ||
} | ||
|
||
// formatAndMount uses unix utils to format and mount the given disk | ||
27149chen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
func (mounter *SafeFormatAndMount) formatAndMount(source string, target string, fstype string, options []string) error { | ||
readOnly := false | ||
|
@@ -365,9 +427,9 @@ func (mounter *SafeFormatAndMount) formatAndMount(source string, target string, | |
var err error | ||
switch existingFormat { | ||
case "xfs": | ||
err = mounter.checkAndRepairXfsFilesystem(source) | ||
err = mounter.checkAndRepairXfsFilesystem(source, target) | ||
default: | ||
err = mounter.checkAndRepairFilesystem(source) | ||
err = mounter.checkAndRepairFilesystem(source, target) | ||
} | ||
|
||
if err != nil { | ||
|
Uh oh!
There was an error while loading. Please reload this page.