summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichał Górny <mgorny@gentoo.org>2017-10-23 22:21:19 +0200
committerMichał Górny <mgorny@gentoo.org>2017-10-23 22:21:19 +0200
commitc45078cc8ee6f268dc3ec64ef42eb10f1e824623 (patch)
tree47e7665a6c8a5b30cabaa1528199abd04d827bc0
parentf5adf65a3b1c81d09aee58a17816c762ccc84a0b (diff)
downloadgemato-c45078cc8ee6f268dc3ec64ef42eb10f1e824623.tar.gz
hash: Support using concurrent threads
-rw-r--r--gemato/hash.py43
1 files changed, 37 insertions, 6 deletions
diff --git a/gemato/hash.py b/gemato/hash.py
index c7bd971..6ad0a27 100644
--- a/gemato/hash.py
+++ b/gemato/hash.py
@@ -5,6 +5,11 @@
import hashlib
import io
+try:
+ import queue
+except ImportError:
+ import Queue as queue
+import threading
HASH_BUFFER_SIZE = 65536
@@ -69,18 +74,44 @@ def get_hash_by_name(name):
raise UnsupportedHash(name)
+def hash_one(hn, h, q, ret, retlock):
+ while True:
+ data = q.get()
+ if data is not None:
+ h.update(data)
+ if data is None:
+ break
+
+ retlock.acquire()
+ ret[hn] = h.hexdigest()
+ retlock.release()
+
+
def hash_file(f, hash_names):
"""
Hash the contents of file object @f using all hashes specified
as @hash_names. Returns a dict of (hash_name -> hex value) mappings.
"""
- hashes = {}
- for h in hash_names:
- hashes[h] = get_hash_by_name(h)
+ queues = []
+ threads = []
+ ret = {}
+ retlock = threading.Lock()
+ for hn in hash_names:
+ h = get_hash_by_name(hn)
+ q = queue.Queue(8)
+ queues.append(q)
+ threads.append(threading.Thread(target=hash_one,
+ args=(hn, h, q, ret, retlock)))
+ for t in threads:
+ t.start()
for block in iter(lambda: f.read(HASH_BUFFER_SIZE), b''):
- for h in hashes.values():
- h.update(block)
- return dict((k, h.hexdigest()) for k, h in hashes.items())
+ for q in queues:
+ q.put(block)
+ for q in queues:
+ q.put(None)
+ for t in threads:
+ t.join()
+ return ret
def hash_path(path, hash_names):