

chore: add `hf_utils` module
@8fbd6f0b387b1be0d6c4d21cd093a8a70c840ced
+++ src/faster_whisper_server/hf_utils.py
... | ... | @@ -0,0 +1,37 @@ |
1 | +from collections.abc import Generator | |
2 | +from pathlib import Path | |
3 | +import typing | |
4 | + | |
5 | +import huggingface_hub | |
6 | + | |
7 | +from faster_whisper_server.logger import logger | |
8 | + | |
9 | +LIBRARY_NAME = "ctranslate2" | |
10 | +TASK_NAME = "automatic-speech-recognition" | |
11 | + | |
12 | + | |
13 | +def does_local_model_exist(model_id: str) -> bool: | |
14 | + return any(model_id == model.repo_id for model, _ in list_local_models()) | |
15 | + | |
16 | + | |
17 | +def list_local_models() -> Generator[tuple[huggingface_hub.CachedRepoInfo, huggingface_hub.ModelCardData], None, None]: | |
18 | + hf_cache = huggingface_hub.scan_cache_dir() | |
19 | + hf_models = [repo for repo in list(hf_cache.repos) if repo.repo_type == "model"] | |
20 | + for model in hf_models: | |
21 | + revision = next(iter(model.revisions)) | |
22 | + cached_readme_file = next((f for f in revision.files if f.file_name == "README.md"), None) | |
23 | + if cached_readme_file: | |
24 | + readme_file_path = Path(cached_readme_file.file_path) | |
25 | + else: | |
26 | + # NOTE: the README.md doesn't get downloaded when `WhisperModel` is called | |
27 | + logger.debug(f"Model {model.repo_id} does not have a README.md file. Downloading it.") | |
28 | + readme_file_path = Path(huggingface_hub.hf_hub_download(model.repo_id, "README.md")) | |
29 | + | |
30 | + model_card = huggingface_hub.ModelCard.load(readme_file_path) | |
31 | + model_card_data = typing.cast(huggingface_hub.ModelCardData, model_card.data) | |
32 | + if ( | |
33 | + model_card_data.library_name == LIBRARY_NAME | |
34 | + and model_card_data.tags is not None | |
35 | + and TASK_NAME in model_card_data.tags | |
36 | + ): | |
37 | + yield model, model_card_data |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?