From 366bf4eb0cf40b66d5d4fafdb5b12584e0f2e3fc Mon Sep 17 00:00:00 2001 From: Jaap Gordijn Date: Tue, 31 Jan 2017 00:14:20 +0100 Subject: [PATCH] Support hard links Closes #152 --- src/cmds/restic/integration_helpers_test.go | 2 + .../restic/integration_helpers_unix_test.go | 34 ++++++ .../integration_helpers_windows_test.go | 22 ++++ src/cmds/restic/integration_test.go | 97 ++++++++++++++++++ src/cmds/restic/testdata/test.hl.tar.gz | Bin 0 -> 198 bytes src/restic/fs/file.go | 6 ++ src/restic/hardlinks_index.go | 57 ++++++++++ src/restic/hardlinks_index_test.go | 35 +++++++ src/restic/node.go | 19 +++- src/restic/node_test.go | 4 +- src/restic/node_windows.go | 1 + src/restic/restorer.go | 15 +-- 12 files changed, 281 insertions(+), 11 deletions(-) create mode 100644 src/cmds/restic/testdata/test.hl.tar.gz create mode 100644 src/restic/hardlinks_index.go create mode 100644 src/restic/hardlinks_index_test.go diff --git a/src/cmds/restic/integration_helpers_test.go b/src/cmds/restic/integration_helpers_test.go index 72fb09f44..c71b12067 100644 --- a/src/cmds/restic/integration_helpers_test.go +++ b/src/cmds/restic/integration_helpers_test.go @@ -15,6 +15,7 @@ import ( type dirEntry struct { path string fi os.FileInfo + link uint64 } func walkDir(dir string) <-chan *dirEntry { @@ -36,6 +37,7 @@ func walkDir(dir string) <-chan *dirEntry { ch <- &dirEntry{ path: name, fi: info, + link: nlink(info), } return nil diff --git a/src/cmds/restic/integration_helpers_unix_test.go b/src/cmds/restic/integration_helpers_unix_test.go index a182898e8..01a0fd5a5 100644 --- a/src/cmds/restic/integration_helpers_unix_test.go +++ b/src/cmds/restic/integration_helpers_unix_test.go @@ -4,7 +4,9 @@ package main import ( "fmt" + "io/ioutil" "os" + "path/filepath" "syscall" ) @@ -37,5 +39,37 @@ func (e *dirEntry) equals(other *dirEntry) bool { return false } + if stat.Nlink != stat2.Nlink { + fmt.Fprintf(os.Stderr, "%v: Number of links do not match (%v != %v)\n", e.path, stat.Nlink, stat2.Nlink) + return false + } + return true } + +func nlink(info os.FileInfo) uint64 { + stat, _ := info.Sys().(*syscall.Stat_t) + return uint64(stat.Nlink) +} + +func inode(info os.FileInfo) uint64 { + stat, _ := info.Sys().(*syscall.Stat_t) + return uint64(stat.Ino) +} + +func createFileSetPerHardlink(dir string) map[uint64][]string { + var stat syscall.Stat_t + linkTests := make(map[uint64][]string) + files, err := ioutil.ReadDir(dir) + if err != nil { + return nil + } + for _, f := range files { + + if err := syscall.Stat(filepath.Join(dir, f.Name()), &stat); err != nil { + return nil + } + linkTests[uint64(stat.Ino)] = append(linkTests[uint64(stat.Ino)], f.Name()) + } + return linkTests +} diff --git a/src/cmds/restic/integration_helpers_windows_test.go b/src/cmds/restic/integration_helpers_windows_test.go index d67e9ca11..9e3fbac9b 100644 --- a/src/cmds/restic/integration_helpers_windows_test.go +++ b/src/cmds/restic/integration_helpers_windows_test.go @@ -4,6 +4,7 @@ package main import ( "fmt" + "io/ioutil" "os" ) @@ -25,3 +26,24 @@ func (e *dirEntry) equals(other *dirEntry) bool { return true } + +func nlink(info os.FileInfo) uint64 { + return 1 +} + +func inode(info os.FileInfo) uint64 { + return uint64(0) +} + +func createFileSetPerHardlink(dir string) map[uint64][]string { + linkTests := make(map[uint64][]string) + files, err := ioutil.ReadDir(dir) + if err != nil { + return nil + } + for i, f := range files { + linkTests[uint64(i)] = append(linkTests[uint64(i)], f.Name()) + i++ + } + return linkTests +} diff --git a/src/cmds/restic/integration_test.go b/src/cmds/restic/integration_test.go index 426367724..a598a525a 100644 --- a/src/cmds/restic/integration_test.go +++ b/src/cmds/restic/integration_test.go @@ -1011,3 +1011,100 @@ func TestPrune(t *testing.T) { testRunCheck(t, gopts) }) } + +func TestHardLink(t *testing.T) { + // this test assumes a test set with a single directory containing hard linked files + withTestEnvironment(t, func(env *testEnvironment, gopts GlobalOptions) { + datafile := filepath.Join("testdata", "test.hl.tar.gz") + fd, err := os.Open(datafile) + if os.IsNotExist(errors.Cause(err)) { + t.Skipf("unable to find data file %q, skipping", datafile) + return + } + OK(t, err) + OK(t, fd.Close()) + + testRunInit(t, gopts) + + SetupTarTestFixture(t, env.testdata, datafile) + + linkTests := createFileSetPerHardlink(env.testdata) + + opts := BackupOptions{} + + // first backup + testRunBackup(t, []string{env.testdata}, opts, gopts) + snapshotIDs := testRunList(t, "snapshots", gopts) + Assert(t, len(snapshotIDs) == 1, + "expected one snapshot, got %v", snapshotIDs) + + testRunCheck(t, gopts) + + // restore all backups and compare + for i, snapshotID := range snapshotIDs { + restoredir := filepath.Join(env.base, fmt.Sprintf("restore%d", i)) + t.Logf("restoring snapshot %v to %v", snapshotID.Str(), restoredir) + testRunRestore(t, gopts, restoredir, snapshotIDs[0]) + Assert(t, directoriesEqualContents(env.testdata, filepath.Join(restoredir, "testdata")), + "directories are not equal") + + linkResults := createFileSetPerHardlink(filepath.Join(restoredir, "testdata")) + Assert(t, linksEqual(linkTests, linkResults), + "links are not equal") + } + + testRunCheck(t, gopts) + }) +} + +func linksEqual(source, dest map[uint64][]string) bool { + for _, vs := range source { + found := false + for kd, vd := range dest { + if linkEqual(vs, vd) { + delete(dest, kd) + found = true + break + } + } + if !found { + return false + } + } + + if len(dest) != 0 { + return false + } + + return true +} + +func linkEqual(source, dest []string) bool { + // equal if sliced are equal without considering order + if source == nil && dest == nil { + return true + } + + if source == nil || dest == nil { + return false + } + + if len(source) != len(dest) { + return false + } + + for i := range source { + found := false + for j := range dest { + if source[i] == dest[j] { + found = true + break + } + } + if !found { + return false + } + } + + return true +} diff --git a/src/cmds/restic/testdata/test.hl.tar.gz b/src/cmds/restic/testdata/test.hl.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..30257819923ca077d0b3664ff2d5c34e770aa12e GIT binary patch literal 198 zcmb2|=HOr`m=nRkT#{N`qL-1QSCUx7@b-!?*C7LewuhEpdI_^l>YsN;eAw=F`QH7I zn4JMJOBD8>t~&q zv)n{W_4t&$_s`0%dt31ozdOF?SY&qQt(E65r_Dch_ou;}Rc|7g_|Dayv6pzecDri5 y^jg8Ct7ASl{h9t}yXN<+Wi|8n>&rK4Y(Dn*&*wjk3<&Vy_PgNfNOJ}a1_l5^M_Z@> literal 0 HcmV?d00001 diff --git a/src/restic/fs/file.go b/src/restic/fs/file.go index b7e81a38b..2ba5d138e 100644 --- a/src/restic/fs/file.go +++ b/src/restic/fs/file.go @@ -102,6 +102,12 @@ func Symlink(oldname, newname string) error { return os.Symlink(fixpath(oldname), fixpath(newname)) } +// Link creates newname as a hard link to oldname. +// If there is an error, it will be of type *LinkError. +func Link(oldname, newname string) error { + return os.Link(fixpath(oldname), fixpath(newname)) +} + // Stat returns a FileInfo structure describing the named file. // If there is an error, it will be of type *PathError. func Stat(name string) (os.FileInfo, error) { diff --git a/src/restic/hardlinks_index.go b/src/restic/hardlinks_index.go new file mode 100644 index 000000000..0874f32a4 --- /dev/null +++ b/src/restic/hardlinks_index.go @@ -0,0 +1,57 @@ +package restic + +import ( + "sync" +) + +// HardlinkKey is a composed key for finding inodes on a specific device. +type HardlinkKey struct { + Inode, Device uint64 +} + +// HardlinkIndex contains a list of inodes, devices these inodes are one, and associated file names. +type HardlinkIndex struct { + m sync.Mutex + Index map[HardlinkKey]string +} + +// NewHardlinkIndex create a new index for hard links +func NewHardlinkIndex() *HardlinkIndex { + return &HardlinkIndex{ + Index: make(map[HardlinkKey]string), + } +} + +// Has checks wether the link already exist in the index. +func (idx *HardlinkIndex) Has(inode uint64, device uint64) bool { + idx.m.Lock() + defer idx.m.Unlock() + _, ok := idx.Index[HardlinkKey{inode, device}] + + return ok +} + +// Add adds a link to the index. +func (idx *HardlinkIndex) Add(inode uint64, device uint64, name string) { + idx.m.Lock() + defer idx.m.Unlock() + _, ok := idx.Index[HardlinkKey{inode, device}] + + if !ok { + idx.Index[HardlinkKey{inode, device}] = name + } +} + +// GetFilename obtains the filename from the index. +func (idx *HardlinkIndex) GetFilename(inode uint64, device uint64) string { + idx.m.Lock() + defer idx.m.Unlock() + return idx.Index[HardlinkKey{inode, device}] +} + +// Remove removes a link from the index. +func (idx *HardlinkIndex) Remove(inode uint64, device uint64) { + idx.m.Lock() + defer idx.m.Unlock() + delete(idx.Index, HardlinkKey{inode, device}) +} diff --git a/src/restic/hardlinks_index_test.go b/src/restic/hardlinks_index_test.go new file mode 100644 index 000000000..c0a675611 --- /dev/null +++ b/src/restic/hardlinks_index_test.go @@ -0,0 +1,35 @@ +package restic_test + +import ( + "testing" + + "restic" + . "restic/test" +) + +// TestHardLinks contains various tests for HardlinkIndex. +func TestHardLinks(t *testing.T) { + + idx := restic.NewHardlinkIndex() + + idx.Add(1, 2, "inode1-file1-on-device2") + idx.Add(2, 3, "inode2-file2-on-device3") + + var sresult string + sresult = idx.GetFilename(1, 2) + Equals(t, sresult, "inode1-file1-on-device2") + + sresult = idx.GetFilename(2, 3) + Equals(t, sresult, "inode2-file2-on-device3") + + var bresult bool + bresult = idx.Has(1, 2) + Equals(t, bresult, true) + + bresult = idx.Has(1, 3) + Equals(t, bresult, false) + + idx.Remove(1, 2) + bresult = idx.Has(1, 2) + Equals(t, bresult, false) +} diff --git a/src/restic/node.go b/src/restic/node.go index bf41f4201..253a33e9a 100644 --- a/src/restic/node.go +++ b/src/restic/node.go @@ -97,7 +97,7 @@ func nodeTypeFromFileInfo(fi os.FileInfo) string { } // CreateAt creates the node at the given path and restores all the meta data. -func (node *Node) CreateAt(path string, repo Repository) error { +func (node *Node) CreateAt(path string, repo Repository, idx *HardlinkIndex) error { debug.Log("create node %v at %v", node.Name, path) switch node.Type { @@ -106,7 +106,7 @@ func (node *Node) CreateAt(path string, repo Repository) error { return err } case "file": - if err := node.createFileAt(path, repo); err != nil { + if err := node.createFileAt(path, repo, idx); err != nil { return err } case "symlink": @@ -191,7 +191,15 @@ func (node Node) createDirAt(path string) error { return nil } -func (node Node) createFileAt(path string, repo Repository) error { +func (node Node) createFileAt(path string, repo Repository, idx *HardlinkIndex) error { + if node.Links > 1 && idx.Has(node.Inode, node.Device) { + err := fs.Link(idx.GetFilename(node.Inode, node.Device), path) + if err != nil { + return errors.Wrap(err, "CreateHardlink") + } + return nil + } + f, err := fs.OpenFile(path, os.O_CREATE|os.O_WRONLY, 0600) defer f.Close() @@ -223,6 +231,8 @@ func (node Node) createFileAt(path string, repo Repository) error { } } + idx.Add(node.Inode, node.Device, path) + return nil } @@ -485,11 +495,14 @@ func (node *Node) fillExtra(path string, fi os.FileInfo) error { case "dir": case "symlink": node.LinkTarget, err = fs.Readlink(path) + node.Links = uint64(stat.nlink()) err = errors.Wrap(err, "Readlink") case "dev": node.Device = uint64(stat.rdev()) + node.Links = uint64(stat.nlink()) case "chardev": node.Device = uint64(stat.rdev()) + node.Links = uint64(stat.nlink()) case "fifo": case "socket": default: diff --git a/src/restic/node_test.go b/src/restic/node_test.go index e219d8926..16414150f 100644 --- a/src/restic/node_test.go +++ b/src/restic/node_test.go @@ -176,9 +176,11 @@ func TestNodeRestoreAt(t *testing.T) { } }() + idx := restic.NewHardlinkIndex() + for _, test := range nodeTests { nodePath := filepath.Join(tempdir, test.Name) - OK(t, test.CreateAt(nodePath, nil)) + OK(t, test.CreateAt(nodePath, nil, idx)) if test.Type == "symlink" && runtime.GOOS == "windows" { continue diff --git a/src/restic/node_windows.go b/src/restic/node_windows.go index 050de8f27..43b6d9b62 100644 --- a/src/restic/node_windows.go +++ b/src/restic/node_windows.go @@ -24,6 +24,7 @@ func (node Node) restoreSymlinkTimestamps(path string, utimes [2]syscall.Timespe type statWin syscall.Win32FileAttributeData +//ToStatT call the Windows system call Win32FileAttributeData. func toStatT(i interface{}) (statT, bool) { if i == nil { return nil, false diff --git a/src/restic/restorer.go b/src/restic/restorer.go index 5397c8d52..4a271cec0 100644 --- a/src/restic/restorer.go +++ b/src/restic/restorer.go @@ -38,7 +38,7 @@ func NewRestorer(repo Repository, id ID) (*Restorer, error) { return r, nil } -func (res *Restorer) restoreTo(dst string, dir string, treeID ID) error { +func (res *Restorer) restoreTo(dst string, dir string, treeID ID, idx *HardlinkIndex) error { tree, err := res.repo.LoadTree(treeID) if err != nil { return res.Error(dir, nil, err) @@ -50,7 +50,7 @@ func (res *Restorer) restoreTo(dst string, dir string, treeID ID) error { debug.Log("SelectForRestore returned %v", selectedForRestore) if selectedForRestore { - err := res.restoreNodeTo(node, dir, dst) + err := res.restoreNodeTo(node, dir, dst, idx) if err != nil { return err } @@ -62,7 +62,7 @@ func (res *Restorer) restoreTo(dst string, dir string, treeID ID) error { } subp := filepath.Join(dir, node.Name) - err = res.restoreTo(dst, subp, *node.Subtree) + err = res.restoreTo(dst, subp, *node.Subtree, idx) if err != nil { err = res.Error(subp, node, err) if err != nil { @@ -83,11 +83,11 @@ func (res *Restorer) restoreTo(dst string, dir string, treeID ID) error { return nil } -func (res *Restorer) restoreNodeTo(node *Node, dir string, dst string) error { +func (res *Restorer) restoreNodeTo(node *Node, dir string, dst string, idx *HardlinkIndex) error { debug.Log("node %v, dir %v, dst %v", node.Name, dir, dst) dstPath := filepath.Join(dst, dir, node.Name) - err := node.CreateAt(dstPath, res.repo) + err := node.CreateAt(dstPath, res.repo, idx) if err != nil { debug.Log("node.CreateAt(%s) error %v", dstPath, err) } @@ -99,7 +99,7 @@ func (res *Restorer) restoreNodeTo(node *Node, dir string, dst string) error { // Create parent directories and retry err = fs.MkdirAll(filepath.Dir(dstPath), 0700) if err == nil || os.IsExist(errors.Cause(err)) { - err = node.CreateAt(dstPath, res.repo) + err = node.CreateAt(dstPath, res.repo, idx) } } @@ -119,7 +119,8 @@ func (res *Restorer) restoreNodeTo(node *Node, dir string, dst string) error { // RestoreTo creates the directories and files in the snapshot below dir. // Before an item is created, res.Filter is called. func (res *Restorer) RestoreTo(dir string) error { - return res.restoreTo(dir, "", *res.sn.Tree) + idx := NewHardlinkIndex() + return res.restoreTo(dir, "", *res.sn.Tree, idx) } // Snapshot returns the snapshot this restorer is configured to use.