diff options
author | Michał Górny <mgorny@gentoo.org> | 2017-11-19 16:19:48 +0100 |
---|---|---|
committer | Michał Górny <mgorny@gentoo.org> | 2017-11-19 16:19:48 +0100 |
commit | 7a76c9f734790f1ab0985bc7a2539b8e18d78174 (patch) | |
tree | 62d38aa27ea6711073696f771ece8b5a039098b4 /utils/gen_fast_metamanifest.py | |
parent | 3124d8bd330dda6eed5a588439f470b30ae33bb3 (diff) | |
download | gemato-7a76c9f734790f1ab0985bc7a2539b8e18d78174.tar.gz |
gen_fast_metamanifest: Enable multiprocessing
Diffstat (limited to 'utils/gen_fast_metamanifest.py')
-rwxr-xr-x | utils/gen_fast_metamanifest.py | 65 |
1 files changed, 39 insertions, 26 deletions
diff --git a/utils/gen_fast_metamanifest.py b/utils/gen_fast_metamanifest.py index f0e0d90..60fdc76 100755 --- a/utils/gen_fast_metamanifest.py +++ b/utils/gen_fast_metamanifest.py @@ -7,6 +7,7 @@ import datetime import glob import io +import multiprocessing import os import os.path import subprocess @@ -17,37 +18,38 @@ sys.path.insert(0, os.path.dirname(__file__)) import gen_fast_manifest -def manifest_dir_generator(): +def manifest_dir_generator(iter_n): with io.open('profiles/categories', 'r') as f: categories = [x.strip() for x in f] for c in categories: - # all package directories - for d in glob.glob(os.path.join(c, '*/')): - yield d - # category directory - yield c - # md5-cache for the category - yield os.path.join('metadata/md5-cache', c) - - # few special metadata directories - yield 'metadata/glsa' - yield 'metadata/md5-cache' - yield 'metadata/news' - - # top-level dirs - yield 'metadata' - yield 'eclass' - yield 'licenses' - yield 'profiles' - - # finally, the whole repo - yield '.' + if iter_n == 1: + # all package directories + for d in glob.glob(os.path.join(c, '*/')): + yield d + # md5-cache for the category + yield os.path.join('metadata/md5-cache', c) + elif iter_n == 2: + # category directory + yield c + + if iter_n == 1: + # few special metadata subdirectories + yield 'metadata/glsa' + yield 'metadata/md5-cache' + yield 'metadata/news' + + # independent top-level dirs + yield 'eclass' + yield 'licenses' + yield 'profiles' + elif iter_n == 2: + # top-level dirs + yield 'metadata' def gen_metamanifest(top_dir): os.chdir(top_dir) - alldirs = manifest_dir_generator() # pre-populate IGNORE entries with io.open('metadata/Manifest', 'wb') as f: @@ -62,9 +64,20 @@ IGNORE local IGNORE packages ''') - # call the fast-gen routine - for path in alldirs: - gen_fast_manifest.gen_manifest(path) + p = multiprocessing.Pool() + + # generate 1st batch of sub-Manifests + # expecting 20000+ items, so use iterator with a reasonably large + # chunksize + p.map(gen_fast_manifest.gen_manifest, manifest_dir_generator(1), chunksize=64) + + # 2nd batch (files depending on results of 1st batch) + # this one is fast to generate, so let's pass a list and let map() + # choose optimal chunksize + p.map(gen_fast_manifest.gen_manifest, list(manifest_dir_generator(2))) + + # finally, generate the top-level Manifest + gen_fast_manifest.gen_manifest('.') # write timestamp with io.open('Manifest', 'ab') as f: |