structure saas with tools
This commit is contained in:
@@ -0,0 +1,64 @@
|
||||
"""Utilities to efficiently compute the SHA 256 hash of a bunch of bytes."""
|
||||
|
||||
from typing import BinaryIO, Optional
|
||||
|
||||
from .insecure_hashlib import sha1, sha256
|
||||
|
||||
|
||||
def sha_fileobj(fileobj: BinaryIO, chunk_size: Optional[int] = None) -> bytes:
|
||||
"""
|
||||
Computes the sha256 hash of the given file object, by chunks of size `chunk_size`.
|
||||
|
||||
Args:
|
||||
fileobj (file-like object):
|
||||
The File object to compute sha256 for, typically obtained with `open(path, "rb")`
|
||||
chunk_size (`int`, *optional*):
|
||||
The number of bytes to read from `fileobj` at once, defaults to 1MB.
|
||||
|
||||
Returns:
|
||||
`bytes`: `fileobj`'s sha256 hash as bytes
|
||||
"""
|
||||
chunk_size = chunk_size if chunk_size is not None else 1024 * 1024
|
||||
|
||||
sha = sha256()
|
||||
while True:
|
||||
chunk = fileobj.read(chunk_size)
|
||||
sha.update(chunk)
|
||||
if not chunk:
|
||||
break
|
||||
return sha.digest()
|
||||
|
||||
|
||||
def git_hash(data: bytes) -> str:
|
||||
"""
|
||||
Computes the git-sha1 hash of the given bytes, using the same algorithm as git.
|
||||
|
||||
This is equivalent to running `git hash-object`. See https://git-scm.com/docs/git-hash-object
|
||||
for more details.
|
||||
|
||||
Note: this method is valid for regular files. For LFS files, the proper git hash is supposed to be computed on the
|
||||
pointer file content, not the actual file content. However, for simplicity, we directly compare the sha256 of
|
||||
the LFS file content when we want to compare LFS files.
|
||||
|
||||
Args:
|
||||
data (`bytes`):
|
||||
The data to compute the git-hash for.
|
||||
|
||||
Returns:
|
||||
`str`: the git-hash of `data` as an hexadecimal string.
|
||||
|
||||
Example:
|
||||
```python
|
||||
>>> from huggingface_hub.utils.sha import git_hash
|
||||
>>> git_hash(b"Hello, World!")
|
||||
'b45ef6fec89518d314f546fd6c3025367b721684'
|
||||
```
|
||||
"""
|
||||
# Taken from https://gist.github.com/msabramo/763200
|
||||
# Note: no need to optimize by reading the file in chunks as we're not supposed to hash huge files (5MB maximum).
|
||||
sha = sha1()
|
||||
sha.update(b"blob ")
|
||||
sha.update(str(len(data)).encode())
|
||||
sha.update(b"\0")
|
||||
sha.update(data)
|
||||
return sha.hexdigest()
|
||||
Reference in New Issue
Block a user