summaryrefslogtreecommitdiff
path: root/utils/gen_fast_metamanifest.py
diff options
context:
space:
mode:
authorMichał Górny <mgorny@gentoo.org>2017-11-19 16:19:48 +0100
committerMichał Górny <mgorny@gentoo.org>2017-11-19 16:19:48 +0100
commit7a76c9f734790f1ab0985bc7a2539b8e18d78174 (patch)
tree62d38aa27ea6711073696f771ece8b5a039098b4 /utils/gen_fast_metamanifest.py
parent3124d8bd330dda6eed5a588439f470b30ae33bb3 (diff)
downloadgemato-7a76c9f734790f1ab0985bc7a2539b8e18d78174.tar.gz
gen_fast_metamanifest: Enable multiprocessing
Diffstat (limited to 'utils/gen_fast_metamanifest.py')
-rwxr-xr-xutils/gen_fast_metamanifest.py65
1 files changed, 39 insertions, 26 deletions
diff --git a/utils/gen_fast_metamanifest.py b/utils/gen_fast_metamanifest.py
index f0e0d90..60fdc76 100755
--- a/utils/gen_fast_metamanifest.py
+++ b/utils/gen_fast_metamanifest.py
@@ -7,6 +7,7 @@
import datetime
import glob
import io
+import multiprocessing
import os
import os.path
import subprocess
@@ -17,37 +18,38 @@ sys.path.insert(0, os.path.dirname(__file__))
import gen_fast_manifest
-def manifest_dir_generator():
+def manifest_dir_generator(iter_n):
with io.open('profiles/categories', 'r') as f:
categories = [x.strip() for x in f]
for c in categories:
- # all package directories
- for d in glob.glob(os.path.join(c, '*/')):
- yield d
- # category directory
- yield c
- # md5-cache for the category
- yield os.path.join('metadata/md5-cache', c)
-
- # few special metadata directories
- yield 'metadata/glsa'
- yield 'metadata/md5-cache'
- yield 'metadata/news'
-
- # top-level dirs
- yield 'metadata'
- yield 'eclass'
- yield 'licenses'
- yield 'profiles'
-
- # finally, the whole repo
- yield '.'
+ if iter_n == 1:
+ # all package directories
+ for d in glob.glob(os.path.join(c, '*/')):
+ yield d
+ # md5-cache for the category
+ yield os.path.join('metadata/md5-cache', c)
+ elif iter_n == 2:
+ # category directory
+ yield c
+
+ if iter_n == 1:
+ # few special metadata subdirectories
+ yield 'metadata/glsa'
+ yield 'metadata/md5-cache'
+ yield 'metadata/news'
+
+ # independent top-level dirs
+ yield 'eclass'
+ yield 'licenses'
+ yield 'profiles'
+ elif iter_n == 2:
+ # top-level dirs
+ yield 'metadata'
def gen_metamanifest(top_dir):
os.chdir(top_dir)
- alldirs = manifest_dir_generator()
# pre-populate IGNORE entries
with io.open('metadata/Manifest', 'wb') as f:
@@ -62,9 +64,20 @@ IGNORE local
IGNORE packages
''')
- # call the fast-gen routine
- for path in alldirs:
- gen_fast_manifest.gen_manifest(path)
+ p = multiprocessing.Pool()
+
+ # generate 1st batch of sub-Manifests
+ # expecting 20000+ items, so use iterator with a reasonably large
+ # chunksize
+ p.map(gen_fast_manifest.gen_manifest, manifest_dir_generator(1), chunksize=64)
+
+ # 2nd batch (files depending on results of 1st batch)
+ # this one is fast to generate, so let's pass a list and let map()
+ # choose optimal chunksize
+ p.map(gen_fast_manifest.gen_manifest, list(manifest_dir_generator(2)))
+
+ # finally, generate the top-level Manifest
+ gen_fast_manifest.gen_manifest('.')
# write timestamp
with io.open('Manifest', 'ab') as f: