summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gemato/hash.py19
-rw-r--r--gemato/verify.py3
2 files changed, 18 insertions, 4 deletions
diff --git a/gemato/hash.py b/gemato/hash.py
index b5b57cd..d8691e1 100644
--- a/gemato/hash.py
+++ b/gemato/hash.py
@@ -8,8 +8,8 @@ import io
import gemato.exceptions
-
HASH_BUFFER_SIZE = 65536
+MAX_SLURP_SIZE = 1048576
class SizeHash(object):
@@ -67,17 +67,30 @@ def get_hash_by_name(name):
raise gemato.exceptions.UnsupportedHash(name)
-def hash_file(f, hash_names):
+def hash_file(f, hash_names, _apparent_size=0):
"""
Hash the contents of file object @f using all hashes specified
as @hash_names. Returns a dict of (hash_name -> hex value) mappings.
+
+ @_apparent_size can be given as a tip on how large is the file
+ expected to be. This is a private API used to workaround bug in PyPy
+ and should not be relied on being present long-term.
"""
hashes = {}
for h in hash_names:
hashes[h] = get_hash_by_name(h)
- for block in iter(lambda: f.read1(HASH_BUFFER_SIZE), b''):
+ if _apparent_size != 0 and _apparent_size < MAX_SLURP_SIZE:
+ # if the file is reasonably small, read it all into one buffer;
+ # we do this since PyPy has some serious bug in dealing with
+ # passing buffers to C extensions and this apparently fails
+ # less; https://bitbucket.org/pypy/pypy/issues/2752
+ block = f.read()
for h in hashes.values():
h.update(block)
+ else:
+ for block in iter(lambda: f.read1(HASH_BUFFER_SIZE), b''):
+ for h in hashes.values():
+ h.update(block)
return dict((k, h.hexdigest()) for k, h in hashes.items())
diff --git a/gemato/verify.py b/gemato/verify.py
index 6193e76..7cef4e5 100644
--- a/gemato/verify.py
+++ b/gemato/verify.py
@@ -112,7 +112,8 @@ def get_file_metadata(path, hashes):
hashes = list(gemato.manifest.manifest_hashes_to_hashlib(e_hashes))
e_hashes.append('__size__')
hashes.append('__size__')
- checksums = gemato.hash.hash_file(f, hashes)
+ checksums = gemato.hash.hash_file(f, hashes,
+ _apparent_size=st.st_size)
ret = {}
for ek, k in zip(e_hashes, hashes):