diff options
-rw-r--r-- | gemato/recursiveloader.py | 40 | ||||
-rw-r--r-- | tests/test_recursiveloader.py | 132 |
2 files changed, 170 insertions, 2 deletions
diff --git a/gemato/recursiveloader.py b/gemato/recursiveloader.py index 19d72b8..9e5d32e 100644 --- a/gemato/recursiveloader.py +++ b/gemato/recursiveloader.py @@ -116,6 +116,9 @@ class ManifestRecursiveLoader(object): If @sort is True, the Manifest entries will be sorted prior to saving. + + Returns the uncompressed size of the Manifest (number + of characters written). """ m = self.loaded_manifests[relpath] path = os.path.join(self.root_directory, relpath) @@ -132,6 +135,7 @@ class ManifestRecursiveLoader(object): m.dump(f, sign_openpgp=sign, sort=sort, openpgp_env=self.openpgp_env, openpgp_keyid=self.openpgp_keyid) + return f.tell() def _iter_manifests(self): """ @@ -403,7 +407,8 @@ class ManifestRecursiveLoader(object): return ret - def save_manifests(self, hashes=None, force=False, sort=False): + def save_manifests(self, hashes=None, force=False, sort=False, + compress_watermark=None, compress_format='gz'): """ Save the Manifests modified since the last save_manifests() call. @@ -419,6 +424,14 @@ class ManifestRecursiveLoader(object): If @sort is True, the Manifest entries will be sorted prior to saving. + + If @compress_watermark is not None, then the uncompressed + Manifest files whose size is larger than or equal to the value + will be compressed using @compress_format. The Manifest files + whose size is smaller will be uncompressed. To compress all + Manifest files, pass a size of 0. + + If @compress_watermark is None, the compression is left as-is. """ if hashes is None: @@ -427,12 +440,15 @@ class ManifestRecursiveLoader(object): self.load_manifests_for_path('', recursive=True) fixed_manifests = set() + renamed_manifests = {} for mpath, relpath, m in self._iter_manifests_for_path('', recursive=True): for e in m.entries: if e.tag != 'MANIFEST': continue + if e.path in renamed_manifests: + e.path = renamed_manifests[e.path] fullpath = os.path.join(relpath, e.path) if not force and fullpath not in self.updated_manifests: continue @@ -451,7 +467,27 @@ class ManifestRecursiveLoader(object): # we've apparently modified this Manifest, so store it now if force or mpath in self.updated_manifests: - self.save_manifest(mpath, sort=sort) + unc_size = self.save_manifest(mpath, sort=sort) + # let's see if we want to recompress it + if compress_watermark is not None: + compr = (gemato.compression + .get_compressed_suffix_from_filename(mpath)) + is_compr = compr is not None + is_large = unc_size >= compress_watermark + if is_compr != is_large: + if is_large: + # compress it! + new_mpath = mpath + '.' + compress_format + else: + new_mpath = mpath[:-len(compr)-1] + + # do the rename! + self.loaded_manifests[new_mpath] = m + self.save_manifest(new_mpath) + del self.loaded_manifests[mpath] + os.unlink(os.path.join(self.root_directory, + mpath)) + renamed_manifests[mpath] = new_mpath # now, discard all the Manifests whose entries we've updated self.updated_manifests -= fixed_manifests diff --git a/tests/test_recursiveloader.py b/tests/test_recursiveloader.py index 2b8fef9..9521c2e 100644 --- a/tests/test_recursiveloader.py +++ b/tests/test_recursiveloader.py @@ -855,6 +855,30 @@ DATA test 0 MD5 d41d8cd98f00b204e9800998ecf8427e os.path.join(self.dir, 'Manifest')) m.assert_directory_verifies('') + def test_compress_manifests_low_watermark(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest'), + hashes=['SHA256', 'SHA512']) + m.save_manifests(force=True, compress_watermark=0) + self.assertFalse(os.path.exists( + os.path.join(self.dir, 'Manifest'))) + self.assertTrue(os.path.exists( + os.path.join(self.dir, 'Manifest.gz'))) + + def test_compress_manifests_high_watermark(self): + """ + Try compression with watermark high enough to keep this one + uncompressed. + """ + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest'), + hashes=['SHA256', 'SHA512']) + m.save_manifests(force=True, compress_watermark=4096) + self.assertFalse(os.path.exists( + os.path.join(self.dir, 'Manifest.gz'))) + self.assertTrue(os.path.exists( + os.path.join(self.dir, 'Manifest'))) + class DuplicateManifestFileEntryTest(TempDirTestCase): """ @@ -1869,6 +1893,28 @@ DATA test 0 MD5 d41d8cd98f00b204e9800998ecf8427e self.assertNotEqual(f.read(), self.MANIFEST.lstrip()) m.assert_directory_verifies() + def test_decompress_manifests_low_watermark(self): + """ + Try decompression with watermark low enough to keep this one + compressed. + """ + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest.gz'), + hashes=['SHA256', 'SHA512']) + m.save_manifests(force=True, compress_watermark=0) + self.assertTrue(os.path.exists( + os.path.join(self.dir, 'Manifest.gz'))) + + def test_decompress_manifests_high_watermark(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest.gz'), + hashes=['SHA256', 'SHA512']) + m.save_manifests(force=True, compress_watermark=4096) + self.assertFalse(os.path.exists( + os.path.join(self.dir, 'Manifest.gz'))) + self.assertTrue(os.path.exists( + os.path.join(self.dir, 'Manifest'))) + class CompressedSubManifestTest(TempDirTestCase): """ @@ -1926,6 +1972,44 @@ MANIFEST sub/Manifest.gz 78 MD5 9c158f87b2445279d7c8aac439612fba base64.b64decode(self.SUB_MANIFEST)) m.assert_directory_verifies() + def test_recompress_manifests_low_watermark(self): + """ + Try decompression with watermark low enough to keep all + compressed. + """ + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest'), + hashes=['SHA256', 'SHA512']) + m.save_manifests(force=True, compress_watermark=0) + self.assertEqual(m.find_path_entry('sub/Manifest.gz').path, + 'sub/Manifest.gz') + self.assertIsNone(m.find_path_entry('sub/Manifest')) + self.assertTrue(os.path.exists( + os.path.join(self.dir, 'Manifest.gz'))) + self.assertTrue(os.path.exists( + os.path.join(self.dir, 'sub/Manifest.gz'))) + self.assertFalse(os.path.exists( + os.path.join(self.dir, 'Manifest'))) + self.assertFalse(os.path.exists( + os.path.join(self.dir, 'sub/Manifest'))) + + def test_recompress_manifests_high_watermark(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest'), + hashes=['SHA256', 'SHA512']) + m.save_manifests(force=True, compress_watermark=4096) + self.assertEqual(m.find_path_entry('sub/Manifest').path, + 'sub/Manifest') + self.assertIsNone(m.find_path_entry('sub/Manifest.gz')) + self.assertTrue(os.path.exists( + os.path.join(self.dir, 'Manifest'))) + self.assertTrue(os.path.exists( + os.path.join(self.dir, 'sub/Manifest'))) + self.assertFalse(os.path.exists( + os.path.join(self.dir, 'Manifest.gz'))) + self.assertFalse(os.path.exists( + os.path.join(self.dir, 'sub/Manifest.gz'))) + class CompressedManifestOrderingTest(TempDirTestCase): """ @@ -2209,6 +2293,32 @@ class CreateNewManifestTest(TempDirTestCase): self.dir]), 0) + def test_compress_manifests_low_watermark(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest'), + allow_create=True, + hashes=['SHA256', 'SHA512']) + m.save_manifests(force=True, compress_watermark=0) + self.assertFalse(os.path.exists( + os.path.join(self.dir, 'Manifest'))) + self.assertTrue(os.path.exists( + os.path.join(self.dir, 'Manifest.gz'))) + + def test_compress_manifests_high_watermark(self): + """ + Try compression with watermark high enough to keep this one + uncompressed. + """ + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest'), + allow_create=True, + hashes=['SHA256', 'SHA512']) + m.save_manifests(force=True, compress_watermark=4096) + self.assertFalse(os.path.exists( + os.path.join(self.dir, 'Manifest.gz'))) + self.assertTrue(os.path.exists( + os.path.join(self.dir, 'Manifest'))) + class CreateNewCompressedManifestTest(TempDirTestCase): DIRS = ['sub'] @@ -2269,3 +2379,25 @@ class CreateNewCompressedManifestTest(TempDirTestCase): gemato.cli.main(['gemato', 'verify', self.dir]), 0) + + def test_decompress_manifests_low_watermark(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest.gz'), + allow_create=True, + hashes=['SHA256', 'SHA512']) + m.save_manifests(force=True, compress_watermark=0) + self.assertFalse(os.path.exists( + os.path.join(self.dir, 'Manifest'))) + self.assertTrue(os.path.exists( + os.path.join(self.dir, 'Manifest.gz'))) + + def test_decompress_manifests_high_watermark(self): + m = gemato.recursiveloader.ManifestRecursiveLoader( + os.path.join(self.dir, 'Manifest.gz'), + allow_create=True, + hashes=['SHA256', 'SHA512']) + m.save_manifests(force=True, compress_watermark=4096) + self.assertFalse(os.path.exists( + os.path.join(self.dir, 'Manifest.gz'))) + self.assertTrue(os.path.exists( + os.path.join(self.dir, 'Manifest'))) |