diff --git a/changelog/unreleased/issue-2254 b/changelog/unreleased/issue-2254 new file mode 100644 index 000000000..ca884bbbc --- /dev/null +++ b/changelog/unreleased/issue-2254 @@ -0,0 +1,9 @@ +Bugfix: Fix tar issues when dumping `/` + +We've fixed an issue with dumping either `/` or files on the first sublevel +e.g. `/foo` to tar. This also fixes tar dumping issues on Windows where this +issue could also happen. + +https://github.com/restic/restic/issues/2254 +https://github.com/restic/restic/issues/2357 +https://github.com/restic/restic/pull/2255 diff --git a/cmd/restic/cmd_dump.go b/cmd/restic/cmd_dump.go index f5fbc1fd5..4a9e57a37 100644 --- a/cmd/restic/cmd_dump.go +++ b/cmd/restic/cmd_dump.go @@ -1,19 +1,16 @@ package main import ( - "archive/tar" "context" "fmt" - "io" "os" "path" "path/filepath" - "strings" "github.com/restic/restic/internal/debug" + "github.com/restic/restic/internal/dump" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/restic" - "github.com/restic/restic/internal/walker" "github.com/spf13/cobra" ) @@ -22,8 +19,10 @@ var cmdDump = &cobra.Command{ Use: "dump [flags] snapshotID file", Short: "Print a backed-up file to stdout", Long: ` -The "dump" command extracts a single file from a snapshot from the repository and -prints its contents to stdout. +The "dump" command extracts files from a snapshot from the repository. If a +single file is selected, it prints its contents to stdout. Folders are output +as a tar file containing the contents of the specified folder. Pass "/" as +file name to dump the whole snapshot as a tar file. The special snapshot "latest" can be used to use the latest snapshot in the repository. @@ -59,17 +58,14 @@ func init() { func splitPath(p string) []string { d, f := path.Split(p) - if d == "" { + if d == "" || d == "/" { return []string{f} } - if d == "/" { - return []string{d} - } - s := splitPath(path.Clean(d)) + s := splitPath(path.Join("/", d)) return append(s, f) } -func printFromTree(ctx context.Context, tree *restic.Tree, repo restic.Repository, prefix string, pathComponents []string, pathToPrint string) error { +func printFromTree(ctx context.Context, tree *restic.Tree, repo restic.Repository, prefix string, pathComponents []string) error { if tree == nil { return fmt.Errorf("called with a nil tree") @@ -81,24 +77,42 @@ func printFromTree(ctx context.Context, tree *restic.Tree, repo restic.Repositor if l == 0 { return fmt.Errorf("empty path components") } + + // If we print / we need to assume that there are multiple nodes at that + // level in the tree. + if pathComponents[0] == "" { + if err := checkStdoutTar(); err != nil { + return err + } + return dump.WriteTar(ctx, repo, tree, "/", os.Stdout) + } + item := filepath.Join(prefix, pathComponents[0]) for _, node := range tree.Nodes { - if node.Name == pathComponents[0] || pathComponents[0] == "/" { + // If dumping something in the highest level it will just take the + // first item it finds and dump that according to the switch case below. + if node.Name == pathComponents[0] { switch { - case l == 1 && node.Type == "file": - return getNodeData(ctx, os.Stdout, repo, node) - case l > 1 && node.Type == "dir": + case l == 1 && dump.IsFile(node): + return dump.GetNodeData(ctx, os.Stdout, repo, node) + case l > 1 && dump.IsDir(node): subtree, err := repo.LoadTree(ctx, *node.Subtree) if err != nil { return errors.Wrapf(err, "cannot load subtree for %q", item) } - return printFromTree(ctx, subtree, repo, item, pathComponents[1:], pathToPrint) - case node.Type == "dir": - node.Path = pathToPrint - return tarTree(ctx, repo, node, pathToPrint) + return printFromTree(ctx, subtree, repo, item, pathComponents[1:]) + case dump.IsDir(node): + if err := checkStdoutTar(); err != nil { + return err + } + subtree, err := repo.LoadTree(ctx, *node.Subtree) + if err != nil { + return err + } + return dump.WriteTar(ctx, repo, subtree, item, os.Stdout) case l > 1: return fmt.Errorf("%q should be a dir, but is a %q", item, node.Type) - case node.Type != "file": + case !dump.IsFile(node): return fmt.Errorf("%q should be a file, but is a %q", item, node.Type) } } @@ -162,7 +176,7 @@ func runDump(opts DumpOptions, gopts GlobalOptions, args []string) error { Exitf(2, "loading tree for snapshot %q failed: %v", snapshotIDString, err) } - err = printFromTree(ctx, tree, repo, "", splittedPath, pathToPrint) + err = printFromTree(ctx, tree, repo, "/", splittedPath) if err != nil { Exitf(2, "cannot dump file: %v", err) } @@ -170,126 +184,9 @@ func runDump(opts DumpOptions, gopts GlobalOptions, args []string) error { return nil } -func getNodeData(ctx context.Context, output io.Writer, repo restic.Repository, node *restic.Node) error { - var ( - buf []byte - err error - ) - for _, id := range node.Content { - buf, err = repo.LoadBlob(ctx, restic.DataBlob, id, buf) - if err != nil { - return err - } - - _, err = output.Write(buf) - if err != nil { - return errors.Wrap(err, "Write") - } - - } - return nil -} - -func tarTree(ctx context.Context, repo restic.Repository, rootNode *restic.Node, rootPath string) error { - +func checkStdoutTar() error { if stdoutIsTerminal() { return fmt.Errorf("stdout is the terminal, please redirect output") } - - tw := tar.NewWriter(os.Stdout) - defer tw.Close() - - // If we want to dump "/" we'll need to add the name of the first node, too - // as it would get lost otherwise. - if rootNode.Path == "/" { - rootNode.Path = path.Join(rootNode.Path, rootNode.Name) - rootPath = rootNode.Path - } - - // we know that rootNode is a folder and walker.Walk will already process - // the next node, so we have to tar this one first, too - if err := tarNode(ctx, tw, rootNode, repo); err != nil { - return err - } - - err := walker.Walk(ctx, repo, *rootNode.Subtree, nil, func(_ restic.ID, nodepath string, node *restic.Node, err error) (bool, error) { - if err != nil { - return false, err - } - if node == nil { - return false, nil - } - - node.Path = path.Join(rootPath, nodepath) - - if node.Type == "file" || node.Type == "symlink" || node.Type == "dir" { - err := tarNode(ctx, tw, node, repo) - if err != nil { - return false, err - } - } - - return false, nil - }) - - return err -} - -func tarNode(ctx context.Context, tw *tar.Writer, node *restic.Node, repo restic.Repository) error { - - header := &tar.Header{ - Name: node.Path, - Size: int64(node.Size), - Mode: int64(node.Mode), - Uid: int(node.UID), - Gid: int(node.GID), - ModTime: node.ModTime, - AccessTime: node.AccessTime, - ChangeTime: node.ChangeTime, - PAXRecords: parseXattrs(node.ExtendedAttributes), - } - - if node.Type == "symlink" { - header.Typeflag = tar.TypeSymlink - header.Linkname = node.LinkTarget - } - - if node.Type == "dir" { - header.Typeflag = tar.TypeDir - } - - err := tw.WriteHeader(header) - - if err != nil { - return errors.Wrap(err, "TarHeader ") - } - - return getNodeData(ctx, tw, repo, node) - -} - -func parseXattrs(xattrs []restic.ExtendedAttribute) map[string]string { - tmpMap := make(map[string]string) - - for _, attr := range xattrs { - attrString := string(attr.Value) - - if strings.HasPrefix(attr.Name, "system.posix_acl_") { - na := acl{} - na.decode(attr.Value) - - if na.String() != "" { - if strings.Contains(attr.Name, "system.posix_acl_access") { - tmpMap["SCHILY.acl.access"] = na.String() - } else if strings.Contains(attr.Name, "system.posix_acl_default") { - tmpMap["SCHILY.acl.default"] = na.String() - } - } - - } else { - tmpMap["SCHILY.xattr."+attr.Name] = attrString - } - } - - return tmpMap + return nil } diff --git a/cmd/restic/cmd_dump_test.go b/cmd/restic/cmd_dump_test.go new file mode 100644 index 000000000..aa43117ee --- /dev/null +++ b/cmd/restic/cmd_dump_test.go @@ -0,0 +1,27 @@ +package main + +import ( + "testing" + + rtest "github.com/restic/restic/internal/test" +) + +func TestDumpSplitPath(t *testing.T) { + testPaths := []struct { + path string + result []string + }{ + {"", []string{""}}, + {"test", []string{"test"}}, + {"test/dir", []string{"test", "dir"}}, + {"test/dir/sub", []string{"test", "dir", "sub"}}, + {"/", []string{""}}, + {"/test", []string{"test"}}, + {"/test/dir", []string{"test", "dir"}}, + {"/test/dir/sub", []string{"test", "dir", "sub"}}, + } + for _, path := range testPaths { + parts := splitPath(path.path) + rtest.Equals(t, path.result, parts) + } +} diff --git a/cmd/restic/acl.go b/internal/dump/acl.go similarity index 99% rename from cmd/restic/acl.go rename to internal/dump/acl.go index 31356392c..9c5fd95de 100644 --- a/cmd/restic/acl.go +++ b/internal/dump/acl.go @@ -1,4 +1,4 @@ -package main +package dump // Adapted from https://github.com/maxymania/go-system/blob/master/posix_acl/posix_acl.go diff --git a/cmd/restic/acl_test.go b/internal/dump/acl_test.go similarity index 99% rename from cmd/restic/acl_test.go rename to internal/dump/acl_test.go index 1e069d168..fe930c986 100644 --- a/cmd/restic/acl_test.go +++ b/internal/dump/acl_test.go @@ -1,4 +1,4 @@ -package main +package dump import ( "reflect" diff --git a/internal/dump/tar.go b/internal/dump/tar.go new file mode 100644 index 000000000..86786654a --- /dev/null +++ b/internal/dump/tar.go @@ -0,0 +1,164 @@ +package dump + +import ( + "archive/tar" + "context" + "io" + "path" + "path/filepath" + "strings" + + "github.com/restic/restic/internal/errors" + "github.com/restic/restic/internal/restic" + "github.com/restic/restic/internal/walker" +) + +// WriteTar will write the contents of the given tree, encoded as a tar to the given destination. +// It will loop over all nodes in the tree and dump them recursively. +func WriteTar(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error { + tw := tar.NewWriter(dst) + + for _, rootNode := range tree.Nodes { + rootNode.Path = rootPath + err := tarTree(ctx, repo, rootNode, rootPath, tw) + if err != nil { + _ = tw.Close() + return err + } + } + return tw.Close() +} + +func tarTree(ctx context.Context, repo restic.Repository, rootNode *restic.Node, rootPath string, tw *tar.Writer) error { + rootNode.Path = path.Join(rootNode.Path, rootNode.Name) + rootPath = rootNode.Path + + if err := tarNode(ctx, tw, rootNode, repo); err != nil { + return err + } + + // If this is no directory we are finished + if !IsDir(rootNode) { + return nil + } + + err := walker.Walk(ctx, repo, *rootNode.Subtree, nil, func(_ restic.ID, nodepath string, node *restic.Node, err error) (bool, error) { + if err != nil { + return false, err + } + if node == nil { + return false, nil + } + + node.Path = path.Join(rootPath, nodepath) + + if IsFile(node) || IsLink(node) || IsDir(node) { + err := tarNode(ctx, tw, node, repo) + if err != nil { + return false, err + } + } + + return false, nil + }) + + return err +} + +func tarNode(ctx context.Context, tw *tar.Writer, node *restic.Node, repo restic.Repository) error { + relPath, err := filepath.Rel("/", node.Path) + if err != nil { + return err + } + + header := &tar.Header{ + Name: filepath.ToSlash(relPath), + Size: int64(node.Size), + Mode: int64(node.Mode), + Uid: int(node.UID), + Gid: int(node.GID), + ModTime: node.ModTime, + AccessTime: node.AccessTime, + ChangeTime: node.ChangeTime, + PAXRecords: parseXattrs(node.ExtendedAttributes), + } + + if IsLink(node) { + header.Typeflag = tar.TypeSymlink + header.Linkname = node.LinkTarget + } + + if IsDir(node) { + header.Typeflag = tar.TypeDir + } + + err = tw.WriteHeader(header) + + if err != nil { + return errors.Wrap(err, "TarHeader ") + } + + return GetNodeData(ctx, tw, repo, node) +} + +func parseXattrs(xattrs []restic.ExtendedAttribute) map[string]string { + tmpMap := make(map[string]string) + + for _, attr := range xattrs { + attrString := string(attr.Value) + + if strings.HasPrefix(attr.Name, "system.posix_acl_") { + na := acl{} + na.decode(attr.Value) + + if na.String() != "" { + if strings.Contains(attr.Name, "system.posix_acl_access") { + tmpMap["SCHILY.acl.access"] = na.String() + } else if strings.Contains(attr.Name, "system.posix_acl_default") { + tmpMap["SCHILY.acl.default"] = na.String() + } + } + + } else { + tmpMap["SCHILY.xattr."+attr.Name] = attrString + } + } + + return tmpMap +} + +// GetNodeData will write the contents of the node to the given output +func GetNodeData(ctx context.Context, output io.Writer, repo restic.Repository, node *restic.Node) error { + var ( + buf []byte + err error + ) + for _, id := range node.Content { + buf, err = repo.LoadBlob(ctx, restic.DataBlob, id, buf) + if err != nil { + return err + } + + _, err = output.Write(buf) + if err != nil { + return errors.Wrap(err, "Write") + } + + } + return nil +} + +// IsDir checks if the given node is a directory +func IsDir(node *restic.Node) bool { + return node.Type == "dir" +} + +// IsLink checks if the given node as a link +func IsLink(node *restic.Node) bool { + return node.Type == "symlink" +} + +// IsFile checks if the given node is a file +func IsFile(node *restic.Node) bool { + return node.Type == "file" +} diff --git a/internal/dump/tar_test.go b/internal/dump/tar_test.go new file mode 100644 index 000000000..fddbef7f0 --- /dev/null +++ b/internal/dump/tar_test.go @@ -0,0 +1,180 @@ +package dump + +import ( + "archive/tar" + "bytes" + "context" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "testing" + "time" + + "github.com/restic/restic/internal/archiver" + "github.com/restic/restic/internal/fs" + "github.com/restic/restic/internal/repository" + "github.com/restic/restic/internal/restic" + rtest "github.com/restic/restic/internal/test" +) + +func prepareTempdirRepoSrc(t testing.TB, src archiver.TestDir) (tempdir string, repo restic.Repository, cleanup func()) { + tempdir, removeTempdir := rtest.TempDir(t) + repo, removeRepository := repository.TestRepository(t) + + archiver.TestCreateFiles(t, tempdir, src) + + cleanup = func() { + removeRepository() + removeTempdir() + } + + return tempdir, repo, cleanup +} + +func TestWriteTar(t *testing.T) { + tests := []struct { + name string + args archiver.TestDir + target string + }{ + { + name: "single file in root", + args: archiver.TestDir{ + "file": archiver.TestFile{Content: "string"}, + }, + target: "/", + }, + { + name: "multiple files in root", + args: archiver.TestDir{ + "file1": archiver.TestFile{Content: "string"}, + "file2": archiver.TestFile{Content: "string"}, + }, + target: "/", + }, + { + name: "multiple files and folders in root", + args: archiver.TestDir{ + "file1": archiver.TestFile{Content: "string"}, + "file2": archiver.TestFile{Content: "string"}, + "firstDir": archiver.TestDir{ + "another": archiver.TestFile{Content: "string"}, + }, + "secondDir": archiver.TestDir{ + "another2": archiver.TestFile{Content: "string"}, + }, + }, + target: "/", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + tmpdir, repo, cleanup := prepareTempdirRepoSrc(t, tt.args) + defer cleanup() + + arch := archiver.New(repo, fs.Track{FS: fs.Local{}}, archiver.Options{}) + + back := fs.TestChdir(t, tmpdir) + defer back() + + sn, _, err := arch.Snapshot(ctx, []string{"."}, archiver.SnapshotOptions{}) + rtest.OK(t, err) + + tree, err := repo.LoadTree(ctx, *sn.Tree) + rtest.OK(t, err) + + dst := &bytes.Buffer{} + if err := WriteTar(ctx, repo, tree, tt.target, dst); err != nil { + t.Fatalf("WriteTar() error = %v", err) + } + if err := checkTar(t, tmpdir, dst); err != nil { + t.Errorf("WriteTar() = tar does not match: %v", err) + } + }) + } +} + +func checkTar(t *testing.T, testDir string, srcTar *bytes.Buffer) error { + tr := tar.NewReader(srcTar) + + fileNumber := 0 + tarFiles := 0 + + err := filepath.Walk(testDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.Name() != filepath.Base(testDir) { + fileNumber++ + } + return nil + }) + if err != nil { + return err + } + + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + t.Fatal(err) + } + + matchPath := filepath.Join(testDir, hdr.Name) + match, err := os.Stat(matchPath) + if err != nil { + return err + } + + // check metadata, tar header contains time rounded to seconds + fileTime := match.ModTime().Round(time.Second) + tarTime := hdr.ModTime + if !fileTime.Equal(tarTime) { + return fmt.Errorf("modTime does not match, got: %s, want: %s", fileTime, tarTime) + } + + if hdr.Typeflag == tar.TypeDir { + // this is a folder + if hdr.Name == "." { + // we don't need to check the root folder + continue + } + + filebase := filepath.ToSlash(match.Name()) + if filepath.Base(hdr.Name) != filebase { + return fmt.Errorf("foldernames don't match got %v want %v", filepath.Base(hdr.Name), filebase) + } + + } else { + if match.Size() != hdr.Size { + return fmt.Errorf("size does not match got %v want %v", hdr.Size, match.Size()) + } + contentsFile, err := ioutil.ReadFile(matchPath) + if err != nil { + t.Fatal(err) + } + contentsTar := &bytes.Buffer{} + _, err = io.Copy(contentsTar, tr) + if err != nil { + t.Fatal(err) + } + if contentsTar.String() != string(contentsFile) { + return fmt.Errorf("contents does not match, got %s want %s", contentsTar, contentsFile) + } + } + tarFiles++ + } + + if tarFiles != fileNumber { + return fmt.Errorf("not the same amount of files got %v want %v", tarFiles, fileNumber) + } + + return nil +}