

feat: add more pre-commit hooks
@d0feed84e876122a18a648512f30adc5eddd1f6a
--- .pre-commit-config.yaml
+++ .pre-commit-config.yaml
... | ... | @@ -8,21 +8,41 @@ |
8 | 8 |
- id: end-of-file-fixer |
9 | 9 |
- id: check-yaml |
10 | 10 |
- id: check-added-large-files |
11 |
- # TODO: enable |
|
12 |
- # - repo: https://github.com/pre-commit/mirrors-mypy |
|
13 |
- # rev: v1.10.0 |
|
14 |
- # hooks: |
|
15 |
- # - id: mypy |
|
16 |
- # args: [--strict] |
|
17 |
- # TODO: enable |
|
18 |
- # - repo: https://github.com/RobertCraigie/pyright-python |
|
19 |
- # rev: v1.1.363 |
|
20 |
- # hooks: |
|
21 |
- # - id: pyright |
|
22 |
- # Disabled because it doesn't work on NixOS |
|
23 |
- # - repo: https://github.com/astral-sh/ruff-pre-commit |
|
24 |
- # rev: v0.4.4 |
|
25 |
- # hooks: |
|
26 |
- # - id: ruff # linter |
|
27 |
- # args: [--fix] |
|
28 |
- # - id: ruff-format |
|
11 |
+ - repo: https://github.com/python-jsonschema/check-jsonschema |
|
12 |
+ rev: 0.28.4 |
|
13 |
+ hooks: |
|
14 |
+ - id: check-taskfile |
|
15 |
+ - repo: https://github.com/rhysd/actionlint |
|
16 |
+ rev: v1.7.0 |
|
17 |
+ hooks: |
|
18 |
+ - id: actionlint |
|
19 |
+ - repo: https://github.com/IamTheFij/docker-pre-commit |
|
20 |
+ rev: v3.0.1 |
|
21 |
+ hooks: |
|
22 |
+ - id: docker-compose-check |
|
23 |
+ - repo: https://github.com/hadolint/hadolint |
|
24 |
+ rev: v2.12.0 |
|
25 |
+ hooks: |
|
26 |
+ - id: hadolint |
|
27 |
+ - repo: https://github.com/shellcheck-py/shellcheck-py |
|
28 |
+ rev: v0.10.0.1 |
|
29 |
+ hooks: |
|
30 |
+ - id: shellcheck |
|
31 |
+ # NOTE: not using https://github.com/RobertCraigie/pyright-python because it doesn't work with poetry virtual environments |
|
32 |
+ # NOTE: not using github.com/astral-sh/ruff-pre-commit because it doesn't work on NixOS |
|
33 |
+ - repo: local |
|
34 |
+ hooks: |
|
35 |
+ - id: pyright |
|
36 |
+ name: pyright |
|
37 |
+ entry: ./pre-commit-scripts/pyright.sh |
|
38 |
+ language: script |
|
39 |
+ pass_filenames: false |
|
40 |
+ - id: ruff-lint |
|
41 |
+ name: ruff-lint |
|
42 |
+ entry: ./pre-commit-scripts/ruff-lint.sh |
|
43 |
+ pass_filenames: false |
|
44 |
+ language: script |
|
45 |
+ - id: ruff-format |
|
46 |
+ name: ruff-format |
|
47 |
+ entry: ./pre-commit-scripts/ruff-format.sh |
|
48 |
+ language: script |
--- Dockerfile.cpu
+++ Dockerfile.cpu
... | ... | @@ -1,9 +1,12 @@ |
1 | 1 |
FROM ubuntu:22.04 |
2 |
+# hadolint ignore=DL3008,DL4006 |
|
2 | 3 |
RUN apt-get update && \ |
3 |
- apt-get install -y curl software-properties-common && \ |
|
4 |
+ apt-get install -y --no-install-recommends curl software-properties-common && \ |
|
4 | 5 |
add-apt-repository ppa:deadsnakes/ppa && \ |
5 | 6 |
apt-get update && \ |
6 |
- DEBIAN_FRONTEND=noninteractive apt-get -y install python3.11 python3.11-distutils && \ |
|
7 |
+ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3.11 python3.11-distutils && \ |
|
8 |
+ apt-get clean && \ |
|
9 |
+ rm -rf /var/lib/apt/lists/* && \ |
|
7 | 10 |
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 |
8 | 11 |
RUN pip install --no-cache-dir poetry==1.8.2 |
9 | 12 |
WORKDIR /root/speaches |
--- Dockerfile.cuda
+++ Dockerfile.cuda
... | ... | @@ -1,9 +1,12 @@ |
1 | 1 |
FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04 |
2 |
+# hadolint ignore=DL3008,DL4006 |
|
2 | 3 |
RUN apt-get update && \ |
3 |
- apt-get install -y curl software-properties-common && \ |
|
4 |
+ apt-get install -y --no-install-recommends curl software-properties-common && \ |
|
4 | 5 |
add-apt-repository ppa:deadsnakes/ppa && \ |
5 | 6 |
apt-get update && \ |
6 |
- DEBIAN_FRONTEND=noninteractive apt-get -y install python3.11 python3.11-distutils && \ |
|
7 |
+ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3.11 python3.11-distutils && \ |
|
8 |
+ apt-get clean && \ |
|
9 |
+ rm -rf /var/lib/apt/lists/* && \ |
|
7 | 10 |
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 |
8 | 11 |
RUN pip install --no-cache-dir poetry==1.8.2 |
9 | 12 |
WORKDIR /root/speaches |
+++ pre-commit-scripts/pyright.sh
... | ... | @@ -0,0 +1,4 @@ |
1 | +#!/usr/bin/env bash | |
2 | +# shellcheck disable=SC1091 | |
3 | +source "$(poetry env info --path)"/bin/activate | |
4 | +pyright |
+++ pre-commit-scripts/ruff-format.sh
... | ... | @@ -0,0 +1,2 @@ |
1 | +#!/usr/bin/env bash | |
2 | +ruff format |
+++ pre-commit-scripts/ruff-lint.sh
... | ... | @@ -0,0 +1,2 @@ |
1 | +#!/usr/bin/env bash | |
2 | +ruff check --fix |
--- pyproject.toml
+++ pyproject.toml
... | ... | @@ -22,6 +22,11 @@ |
22 | 22 |
[tool.ruff] |
23 | 23 |
target-version = "py311" |
24 | 24 |
|
25 |
+[tool.pyright] |
|
26 |
+# typeCheckingMode = "strict" |
|
27 |
+pythonVersion = "3.11" |
|
28 |
+pythonPlatform = "Linux" |
|
29 |
+ |
|
25 | 30 |
[build-system] |
26 | 31 |
requires = ["poetry-core"] |
27 | 32 |
build-backend = "poetry.core.masonry.api" |
--- speaches/main.py
+++ speaches/main.py
... | ... | @@ -6,8 +6,15 @@ |
6 | 6 |
from io import BytesIO |
7 | 7 |
from typing import Annotated, Literal, OrderedDict |
8 | 8 |
|
9 |
-from fastapi import (FastAPI, Form, Query, Response, UploadFile, WebSocket, |
|
10 |
- WebSocketDisconnect) |
|
9 |
+from fastapi import ( |
|
10 |
+ FastAPI, |
|
11 |
+ Form, |
|
12 |
+ Query, |
|
13 |
+ Response, |
|
14 |
+ UploadFile, |
|
15 |
+ WebSocket, |
|
16 |
+ WebSocketDisconnect, |
|
17 |
+) |
|
11 | 18 |
from fastapi.responses import StreamingResponse |
12 | 19 |
from fastapi.websockets import WebSocketState |
13 | 20 |
from faster_whisper import WhisperModel |
... | ... | @@ -16,11 +23,12 @@ |
16 | 23 |
from speaches import utils |
17 | 24 |
from speaches.asr import FasterWhisperASR |
18 | 25 |
from speaches.audio import AudioStream, audio_samples_from_file |
19 |
-from speaches.config import (SAMPLES_PER_SECOND, Language, Model, |
|
20 |
- ResponseFormat, config) |
|
26 |
+from speaches.config import SAMPLES_PER_SECOND, Language, Model, ResponseFormat, config |
|
21 | 27 |
from speaches.logger import logger |
22 |
-from speaches.server_models import (TranscriptionJsonResponse, |
|
23 |
- TranscriptionVerboseJsonResponse) |
|
28 |
+from speaches.server_models import ( |
|
29 |
+ TranscriptionJsonResponse, |
|
30 |
+ TranscriptionVerboseJsonResponse, |
|
31 |
+) |
|
24 | 32 |
from speaches.transcriber import audio_transcriber |
25 | 33 |
|
26 | 34 |
models: OrderedDict[Model, WhisperModel] = OrderedDict() |
--- speaches/server_models.py
+++ speaches/server_models.py
... | ... | @@ -85,7 +85,7 @@ |
85 | 85 |
text=segment.text, |
86 | 86 |
words=( |
87 | 87 |
[WordObject.from_word(word) for word in segment.words] |
88 |
- if type(segment.words) == list |
|
88 |
+ if isinstance(segment.words, list) |
|
89 | 89 |
else [] |
90 | 90 |
), |
91 | 91 |
segments=[SegmentObject.from_segment(segment)], |
--- tests/app_test.py
+++ tests/app_test.py
... | ... | @@ -12,7 +12,7 @@ |
12 | 12 |
|
13 | 13 |
from speaches.config import BYTES_PER_SECOND |
14 | 14 |
from speaches.main import app |
15 |
-from speaches.server_models import TranscriptionVerboseResponse |
|
15 |
+from speaches.server_models import TranscriptionVerboseJsonResponse |
|
16 | 16 |
|
17 | 17 |
SIMILARITY_THRESHOLD = 0.97 |
18 | 18 |
AUDIO_FILES_LIMIT = 5 |
... | ... | @@ -54,13 +54,13 @@ |
54 | 54 |
|
55 | 55 |
def transcribe_audio_data( |
56 | 56 |
client: TestClient, data: bytes |
57 |
-) -> TranscriptionVerboseResponse: |
|
57 |
+) -> TranscriptionVerboseJsonResponse: |
|
58 | 58 |
response = client.post( |
59 | 59 |
TRANSCRIBE_ENDPOINT, |
60 | 60 |
files={"file": ("audio.raw", data, "audio/raw")}, |
61 | 61 |
) |
62 | 62 |
data = json.loads(response.json()) # TODO: figure this out |
63 |
- return TranscriptionVerboseResponse(**data) # type: ignore |
|
63 |
+ return TranscriptionVerboseJsonResponse(**data) # type: ignore |
|
64 | 64 |
|
65 | 65 |
|
66 | 66 |
@pytest.mark.parametrize("file_path", file_paths) |
... | ... | @@ -70,14 +70,16 @@ |
70 | 70 |
with open(file_path, "rb") as file: |
71 | 71 |
data = file.read() |
72 | 72 |
|
73 |
- streaming_transcription: TranscriptionVerboseResponse = None # type: ignore |
|
73 |
+ streaming_transcription: TranscriptionVerboseJsonResponse = None # type: ignore |
|
74 | 74 |
thread = threading.Thread( |
75 | 75 |
target=stream_audio_data, args=(ws, data), kwargs={"speed": 4.0} |
76 | 76 |
) |
77 | 77 |
thread.start() |
78 | 78 |
while True: |
79 | 79 |
try: |
80 |
- streaming_transcription = TranscriptionVerboseResponse(**ws.receive_json()) |
|
80 |
+ streaming_transcription = TranscriptionVerboseJsonResponse( |
|
81 |
+ **ws.receive_json() |
|
82 |
+ ) |
|
81 | 83 |
except WebSocketDisconnect: |
82 | 84 |
break |
83 | 85 |
file_transcription = transcribe_audio_data(client, data) |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?