diff options
-rw-r--r-- | gemato/recursiveloader.py | 54 | ||||
-rw-r--r-- | tests/test_recursiveloader.py | 173 |
2 files changed, 227 insertions, 0 deletions
diff --git a/gemato/recursiveloader.py b/gemato/recursiveloader.py index e5e9fdc..efc30ef 100644 --- a/gemato/recursiveloader.py +++ b/gemato/recursiveloader.py @@ -533,3 +533,57 @@ class ManifestRecursiveLoader(object): self.updated_manifests.add(mpath) had_entry = True break + + def get_deduplicated_file_entry_dict_for_update(self, path=''): + """ + Find all file entries that apply to paths starting with @path. + Remove all duplicate entries and queue the relevant Manifests + for update. Return a dictionary mapping relative paths + to entries. + + You need to invoke save_manifests() to store the Manifest + updates afterwards. However, note that the resulting tree + may no longer validate. + + If the path is referenced by multiple entries of incompatible + semantics, raises an exception. If the entries have compatible + semantics, all but the first (deepest) are removed, even + if they have colliding sizes or hashes. If the duplicate + entries use different hash sets, the preserved entry is updated + to have the union of their hashes. + """ + + self.load_manifests_for_path(path, recursive=True) + out = {} + for mpath, relpath, m in self._iter_manifests_for_path(path, + recursive=True): + entries_to_remove = [] + for e in m.entries: + if isinstance(e, gemato.manifest.ManifestEntryDIST): + # distfiles are not local files, so skip them + pass + elif isinstance(e, gemato.manifest.ManifestPathEntry): + fullpath = os.path.join(relpath, e.path) + if gemato.util.path_starts_with(fullpath, path): + if fullpath in out: + # compare the two entries + ret, diff = gemato.verify.verify_entry_compatibility( + out[fullpath], e) + # if semantically incompatible, throw + if not ret and diff[0][0] == '__type__': + raise (gemato.exceptions + .ManifestIncompatibleEntry( + out[fullpath], e, diff)) + # otherwise, make sure we have all checksums + out[fullpath].checksums.update(e.checksums) + # and drop the duplicate + entries_to_remove.append(e) + else: + out[fullpath] = e + + if entries_to_remove: + for e in entries_to_remove: + m.entries.remove(e) + self.updated_manifests.add(mpath) + + return out diff --git a/tests/test_recursiveloader.py b/tests/test_recursiveloader.py index a8b7384..f673c13 100644 --- a/tests/test_recursiveloader.py +++ b/tests/test_recursiveloader.py @@ -202,6 +202,25 @@ DATA test 0 MD5 d41d8cd98f00b204e9800998ecf8427e self.assertEqual(entries['sub/deeper/Manifest'].path, 'deeper/Manifest') self.assertEqual(entries['sub/deeper/test'].path, 'test') + def test_get_deduplicated_file_entry_dict_for_update(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest')) + entries = m.get_deduplicated_file_entry_dict_for_update('') + self.assertSetEqual(m.updated_manifests, set()) + self.assertSetEqual(frozenset(entries), + frozenset(( + 'other/Manifest', + 'sub/Manifest', + 'sub/nonstray', + 'sub/deeper/Manifest', + 'sub/deeper/test', + ))) + self.assertEqual(entries['other/Manifest'].path, 'other/Manifest') + self.assertEqual(entries['sub/Manifest'].path, 'sub/Manifest') + self.assertEqual(entries['sub/nonstray'].path, 'nonstray') + self.assertEqual(entries['sub/deeper/Manifest'].path, 'deeper/Manifest') + self.assertEqual(entries['sub/deeper/test'].path, 'test') + def test_get_file_entry_dict_for_sub(self): m = gemato.recursiveloader.ManifestRecursiveLoader( os.path.join(self.dir, 'Manifest')) @@ -218,11 +237,36 @@ DATA test 0 MD5 d41d8cd98f00b204e9800998ecf8427e self.assertEqual(entries['sub/deeper/Manifest'].path, 'deeper/Manifest') self.assertEqual(entries['sub/deeper/test'].path, 'test') + def test_get_deduplicated_file_entry_dict_for_update_for_sub(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest')) + entries = m.get_deduplicated_file_entry_dict_for_update('sub') + self.assertSetEqual(m.updated_manifests, set()) + self.assertSetEqual(frozenset(entries), + frozenset(( + 'sub/Manifest', + 'sub/nonstray', + 'sub/deeper/Manifest', + 'sub/deeper/test', + ))) + self.assertEqual(entries['sub/Manifest'].path, 'sub/Manifest') + self.assertEqual(entries['sub/nonstray'].path, 'nonstray') + self.assertEqual(entries['sub/deeper/Manifest'].path, 'deeper/Manifest') + self.assertEqual(entries['sub/deeper/test'].path, 'test') + def test_get_file_entry_dict_for_invalid(self): m = gemato.recursiveloader.ManifestRecursiveLoader( os.path.join(self.dir, 'Manifest')) self.assertDictEqual(m.get_file_entry_dict('nonexist'), {}) + def test_get_deduplicated_file_entry_dict_for_update_for_invalid(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest')) + self.assertDictEqual( + m.get_deduplicated_file_entry_dict_for_update('nonexist'), + {}) + self.assertSetEqual(m.updated_manifests, set()) + def test_assert_directory_verifies(self): m = gemato.recursiveloader.ManifestRecursiveLoader( os.path.join(self.dir, 'Manifest')) @@ -673,6 +717,22 @@ DATA test 0 MD5 d41d8cd98f00b204e9800998ecf8427e self.assertSetEqual(frozenset(entries), frozenset(('test',))) self.assertEqual(entries['test'].path, 'test') + def test_get_deduplicated_file_entry_dict_for_update(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest')) + entries = m.get_deduplicated_file_entry_dict_for_update('') + self.assertSetEqual(frozenset(entries), frozenset(('test',))) + self.assertEqual(entries['test'].path, 'test') + self.assertSetEqual(frozenset(entries['test'].checksums), + frozenset(('MD5',))) + + m.save_manifests() + m2 = gemato.manifest.ManifestFile() + with io.open(os.path.join(self.dir, 'Manifest'), 'r', + encoding='utf8') as f: + m2.load(f) + self.assertEqual(len(m2.entries), 1) + def test_assert_directory_verifies(self): m = gemato.recursiveloader.ManifestRecursiveLoader( os.path.join(self.dir, 'Manifest')) @@ -754,6 +814,23 @@ DATA test 0 MD5 d41d8cd98f00b204e9800998ecf8427e frozenset(('sub/test', 'sub/Manifest'))) self.assertEqual(entries['sub/test'].size, 0) + def test_get_deduplicated_file_entry_dict_for_update(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest')) + entries = m.get_deduplicated_file_entry_dict_for_update('') + self.assertSetEqual(frozenset(entries), + frozenset(('sub/test', 'sub/Manifest'))) + self.assertEqual(entries['sub/test'].path, 'test') + self.assertSetEqual(frozenset(entries['sub/test'].checksums), + frozenset(('MD5',))) + + m.save_manifests() + m2 = gemato.manifest.ManifestFile() + with io.open(os.path.join(self.dir, 'Manifest'), 'r', + encoding='utf8') as f: + m2.load(f) + self.assertEqual(len(m2.entries), 1) + class DuplicateCompatibleTypeFileEntryTest(TempDirTestCase): """ @@ -781,6 +858,20 @@ EBUILD test.ebuild 0 MD5 d41d8cd98f00b204e9800998ecf8427e self.assertSetEqual(frozenset(entries), frozenset(('test.ebuild',))) self.assertEqual(entries['test.ebuild'].path, 'test.ebuild') + def test_get_deduplicated_file_entry_dict_for_update(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest')) + entries = m.get_deduplicated_file_entry_dict_for_update('') + self.assertSetEqual(frozenset(entries), frozenset(('test.ebuild',))) + self.assertEqual(entries['test.ebuild'].path, 'test.ebuild') + + m.save_manifests() + m2 = gemato.manifest.ManifestFile() + with io.open(os.path.join(self.dir, 'Manifest'), 'r', + encoding='utf8') as f: + m2.load(f) + self.assertEqual(len(m2.entries), 1) + def test_assert_directory_verifies(self): m = gemato.recursiveloader.ManifestRecursiveLoader( os.path.join(self.dir, 'Manifest')) @@ -814,6 +905,20 @@ AUX test.patch 0 MD5 d41d8cd98f00b204e9800998ecf8427e self.assertSetEqual(frozenset(entries), frozenset(('files/test.patch',))) self.assertEqual(entries['files/test.patch'].path, 'files/test.patch') + def test_get_deduplicated_file_entry_dict_for_update(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest')) + entries = m.get_deduplicated_file_entry_dict_for_update('') + self.assertSetEqual(frozenset(entries), frozenset(('files/test.patch',))) + self.assertEqual(entries['files/test.patch'].path, 'files/test.patch') + + m.save_manifests() + m2 = gemato.manifest.ManifestFile() + with io.open(os.path.join(self.dir, 'Manifest'), 'r', + encoding='utf8') as f: + m2.load(f) + self.assertEqual(len(m2.entries), 1) + def test_assert_directory_verifies(self): m = gemato.recursiveloader.ManifestRecursiveLoader( os.path.join(self.dir, 'Manifest')) @@ -902,6 +1007,22 @@ DATA test 0 SHA1 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12 self.assertSetEqual(frozenset(entries['test'].checksums), frozenset(('MD5', 'SHA1'))) + def test_get_deduplicated_file_entry_dict_for_update(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest')) + entries = m.get_deduplicated_file_entry_dict_for_update('') + self.assertSetEqual(frozenset(entries), frozenset(('test',))) + self.assertEqual(entries['test'].path, 'test') + self.assertSetEqual(frozenset(entries['test'].checksums), + frozenset(('MD5', 'SHA1'))) + + m.save_manifests() + m2 = gemato.manifest.ManifestFile() + with io.open(os.path.join(self.dir, 'Manifest'), 'r', + encoding='utf8') as f: + m2.load(f) + self.assertEqual(len(m2.entries), 1) + def test_assert_directory_verifies(self): m = gemato.recursiveloader.ManifestRecursiveLoader( os.path.join(self.dir, 'Manifest')) @@ -957,6 +1078,12 @@ MISC test.ebuild 0 MD5 d41d8cd98f00b204e9800998ecf8427e self.assertRaises(gemato.exceptions.ManifestIncompatibleEntry, m.get_file_entry_dict, '') + def test_deduplicated_get_file_entry_dict_for_update(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest')) + self.assertRaises(gemato.exceptions.ManifestIncompatibleEntry, + m.get_deduplicated_file_entry_dict_for_update, '') + class DuplicateIncompatibleDataOptionalTypeFileEntryTest(TempDirTestCase): """ @@ -982,6 +1109,12 @@ OPTIONAL test.ebuild self.assertRaises(gemato.exceptions.ManifestIncompatibleEntry, m.get_file_entry_dict, '') + def test_deduplicated_get_file_entry_dict_for_update(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest')) + self.assertRaises(gemato.exceptions.ManifestIncompatibleEntry, + m.get_deduplicated_file_entry_dict_for_update, '') + class DuplicateIncompatibleMiscOptionalTypeFileEntryTest(TempDirTestCase): """ @@ -1007,6 +1140,12 @@ OPTIONAL test.ebuild self.assertRaises(gemato.exceptions.ManifestIncompatibleEntry, m.get_file_entry_dict, '') + def test_deduplicated_get_file_entry_dict_for_update(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest')) + self.assertRaises(gemato.exceptions.ManifestIncompatibleEntry, + m.get_deduplicated_file_entry_dict_for_update, '') + class DuplicateDifferentSizeFileEntryTest(TempDirTestCase): """ @@ -1032,6 +1171,22 @@ DATA test.ebuild 32 MD5 d41d8cd98f00b204e9800998ecf8427e self.assertRaises(gemato.exceptions.ManifestIncompatibleEntry, m.get_file_entry_dict, '') + def test_get_deduplicated_file_entry_dict_for_update(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest')) + entries = m.get_deduplicated_file_entry_dict_for_update('') + self.assertSetEqual(frozenset(entries), + frozenset(('test.ebuild',))) + self.assertIsInstance(entries['test.ebuild'], + gemato.manifest.ManifestEntryDATA) + + m.save_manifests() + m2 = gemato.manifest.ManifestFile() + with io.open(os.path.join(self.dir, 'Manifest'), 'r', + encoding='utf8') as f: + m2.load(f) + self.assertEqual(len(m2.entries), 1) + class DuplicateDifferentHashFileEntryTest(TempDirTestCase): """ @@ -1057,6 +1212,24 @@ DATA test.ebuild 0 MD5 9e107d9d372bb6826bd81d3542a419d6 self.assertRaises(gemato.exceptions.ManifestIncompatibleEntry, m.get_file_entry_dict, '') + def test_get_deduplicated_file_entry_dict_for_update(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest')) + entries = m.get_deduplicated_file_entry_dict_for_update('') + self.assertSetEqual(frozenset(entries), + frozenset(('test.ebuild',))) + self.assertIsInstance(entries['test.ebuild'], + gemato.manifest.ManifestEntryDATA) + self.assertSetEqual(frozenset(entries['test.ebuild'].checksums), + frozenset(('MD5',))) + + m.save_manifests() + m2 = gemato.manifest.ManifestFile() + with io.open(os.path.join(self.dir, 'Manifest'), 'r', + encoding='utf8') as f: + m2.load(f) + self.assertEqual(len(m2.entries), 1) + def test_assert_directory_verifies(self): m = gemato.recursiveloader.ManifestRecursiveLoader( os.path.join(self.dir, 'Manifest')) |