diff options
author | Michał Górny <mgorny@gentoo.org> | 2017-10-23 22:21:19 +0200 |
---|---|---|
committer | Michał Górny <mgorny@gentoo.org> | 2017-10-23 22:21:19 +0200 |
commit | c45078cc8ee6f268dc3ec64ef42eb10f1e824623 (patch) | |
tree | 47e7665a6c8a5b30cabaa1528199abd04d827bc0 | |
parent | f5adf65a3b1c81d09aee58a17816c762ccc84a0b (diff) | |
download | gemato-c45078cc8ee6f268dc3ec64ef42eb10f1e824623.tar.gz |
hash: Support using concurrent threads
-rw-r--r-- | gemato/hash.py | 43 |
1 files changed, 37 insertions, 6 deletions
diff --git a/gemato/hash.py b/gemato/hash.py index c7bd971..6ad0a27 100644 --- a/gemato/hash.py +++ b/gemato/hash.py @@ -5,6 +5,11 @@ import hashlib import io +try: + import queue +except ImportError: + import Queue as queue +import threading HASH_BUFFER_SIZE = 65536 @@ -69,18 +74,44 @@ def get_hash_by_name(name): raise UnsupportedHash(name) +def hash_one(hn, h, q, ret, retlock): + while True: + data = q.get() + if data is not None: + h.update(data) + if data is None: + break + + retlock.acquire() + ret[hn] = h.hexdigest() + retlock.release() + + def hash_file(f, hash_names): """ Hash the contents of file object @f using all hashes specified as @hash_names. Returns a dict of (hash_name -> hex value) mappings. """ - hashes = {} - for h in hash_names: - hashes[h] = get_hash_by_name(h) + queues = [] + threads = [] + ret = {} + retlock = threading.Lock() + for hn in hash_names: + h = get_hash_by_name(hn) + q = queue.Queue(8) + queues.append(q) + threads.append(threading.Thread(target=hash_one, + args=(hn, h, q, ret, retlock))) + for t in threads: + t.start() for block in iter(lambda: f.read(HASH_BUFFER_SIZE), b''): - for h in hashes.values(): - h.update(block) - return dict((k, h.hexdigest()) for k, h in hashes.items()) + for q in queues: + q.put(block) + for q in queues: + q.put(None) + for t in threads: + t.join() + return ret def hash_path(path, hash_names): |