structure saas with tools
This commit is contained in:
@@ -0,0 +1,141 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2022-present, the HuggingFace Inc. team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Contains utilities to handle paths in Huggingface Hub."""
|
||||
|
||||
from fnmatch import fnmatch
|
||||
from pathlib import Path
|
||||
from typing import Callable, Generator, Iterable, List, Optional, TypeVar, Union
|
||||
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
# Always ignore `.git` and `.cache/huggingface` folders in commits
|
||||
DEFAULT_IGNORE_PATTERNS = [
|
||||
".git",
|
||||
".git/*",
|
||||
"*/.git",
|
||||
"**/.git/**",
|
||||
".cache/huggingface",
|
||||
".cache/huggingface/*",
|
||||
"*/.cache/huggingface",
|
||||
"**/.cache/huggingface/**",
|
||||
]
|
||||
# Forbidden to commit these folders
|
||||
FORBIDDEN_FOLDERS = [".git", ".cache"]
|
||||
|
||||
|
||||
def filter_repo_objects(
|
||||
items: Iterable[T],
|
||||
*,
|
||||
allow_patterns: Optional[Union[List[str], str]] = None,
|
||||
ignore_patterns: Optional[Union[List[str], str]] = None,
|
||||
key: Optional[Callable[[T], str]] = None,
|
||||
) -> Generator[T, None, None]:
|
||||
"""Filter repo objects based on an allowlist and a denylist.
|
||||
|
||||
Input must be a list of paths (`str` or `Path`) or a list of arbitrary objects.
|
||||
In the later case, `key` must be provided and specifies a function of one argument
|
||||
that is used to extract a path from each element in iterable.
|
||||
|
||||
Patterns are Unix shell-style wildcards which are NOT regular expressions. See
|
||||
https://docs.python.org/3/library/fnmatch.html for more details.
|
||||
|
||||
Args:
|
||||
items (`Iterable`):
|
||||
List of items to filter.
|
||||
allow_patterns (`str` or `List[str]`, *optional*):
|
||||
Patterns constituting the allowlist. If provided, item paths must match at
|
||||
least one pattern from the allowlist.
|
||||
ignore_patterns (`str` or `List[str]`, *optional*):
|
||||
Patterns constituting the denylist. If provided, item paths must not match
|
||||
any patterns from the denylist.
|
||||
key (`Callable[[T], str]`, *optional*):
|
||||
Single-argument function to extract a path from each item. If not provided,
|
||||
the `items` must already be `str` or `Path`.
|
||||
|
||||
Returns:
|
||||
Filtered list of objects, as a generator.
|
||||
|
||||
Raises:
|
||||
:class:`ValueError`:
|
||||
If `key` is not provided and items are not `str` or `Path`.
|
||||
|
||||
Example usage with paths:
|
||||
```python
|
||||
>>> # Filter only PDFs that are not hidden.
|
||||
>>> list(filter_repo_objects(
|
||||
... ["aaa.PDF", "bbb.jpg", ".ccc.pdf", ".ddd.png"],
|
||||
... allow_patterns=["*.pdf"],
|
||||
... ignore_patterns=[".*"],
|
||||
... ))
|
||||
["aaa.pdf"]
|
||||
```
|
||||
|
||||
Example usage with objects:
|
||||
```python
|
||||
>>> list(filter_repo_objects(
|
||||
... [
|
||||
... CommitOperationAdd(path_or_fileobj="/tmp/aaa.pdf", path_in_repo="aaa.pdf")
|
||||
... CommitOperationAdd(path_or_fileobj="/tmp/bbb.jpg", path_in_repo="bbb.jpg")
|
||||
... CommitOperationAdd(path_or_fileobj="/tmp/.ccc.pdf", path_in_repo=".ccc.pdf")
|
||||
... CommitOperationAdd(path_or_fileobj="/tmp/.ddd.png", path_in_repo=".ddd.png")
|
||||
... ],
|
||||
... allow_patterns=["*.pdf"],
|
||||
... ignore_patterns=[".*"],
|
||||
... key=lambda x: x.repo_in_path
|
||||
... ))
|
||||
[CommitOperationAdd(path_or_fileobj="/tmp/aaa.pdf", path_in_repo="aaa.pdf")]
|
||||
```
|
||||
"""
|
||||
if isinstance(allow_patterns, str):
|
||||
allow_patterns = [allow_patterns]
|
||||
|
||||
if isinstance(ignore_patterns, str):
|
||||
ignore_patterns = [ignore_patterns]
|
||||
|
||||
if allow_patterns is not None:
|
||||
allow_patterns = [_add_wildcard_to_directories(p) for p in allow_patterns]
|
||||
if ignore_patterns is not None:
|
||||
ignore_patterns = [_add_wildcard_to_directories(p) for p in ignore_patterns]
|
||||
|
||||
if key is None:
|
||||
|
||||
def _identity(item: T) -> str:
|
||||
if isinstance(item, str):
|
||||
return item
|
||||
if isinstance(item, Path):
|
||||
return str(item)
|
||||
raise ValueError(f"Please provide `key` argument in `filter_repo_objects`: `{item}` is not a string.")
|
||||
|
||||
key = _identity # Items must be `str` or `Path`, otherwise raise ValueError
|
||||
|
||||
for item in items:
|
||||
path = key(item)
|
||||
|
||||
# Skip if there's an allowlist and path doesn't match any
|
||||
if allow_patterns is not None and not any(fnmatch(path, r) for r in allow_patterns):
|
||||
continue
|
||||
|
||||
# Skip if there's a denylist and path matches any
|
||||
if ignore_patterns is not None and any(fnmatch(path, r) for r in ignore_patterns):
|
||||
continue
|
||||
|
||||
yield item
|
||||
|
||||
|
||||
def _add_wildcard_to_directories(pattern: str) -> str:
|
||||
if pattern[-1] == "/":
|
||||
return pattern + "*"
|
||||
return pattern
|
||||
Reference in New Issue
Block a user