

feat: return 4xx on invalid files (#164)
@c3b4c8039a1c16eadd11f78afcae8a291819982c
--- src/faster_whisper_server/routers/stt.py
+++ src/faster_whisper_server/routers/stt.py
... | ... | @@ -5,8 +5,10 @@ |
5 | 5 |
import logging |
6 | 6 |
from typing import TYPE_CHECKING, Annotated |
7 | 7 |
|
8 |
+import av.error |
|
8 | 9 |
from fastapi import ( |
9 | 10 |
APIRouter, |
11 |
+ Depends, |
|
10 | 12 |
Form, |
11 | 13 |
Query, |
12 | 14 |
Request, |
... | ... | @@ -15,9 +17,13 @@ |
15 | 17 |
WebSocket, |
16 | 18 |
WebSocketDisconnect, |
17 | 19 |
) |
20 |
+from fastapi.exceptions import HTTPException |
|
18 | 21 |
from fastapi.responses import StreamingResponse |
19 | 22 |
from fastapi.websockets import WebSocketState |
23 |
+from faster_whisper.audio import decode_audio |
|
20 | 24 |
from faster_whisper.vad import VadOptions, get_speech_timestamps |
25 |
+from numpy import float32 |
|
26 |
+from numpy.typing import NDArray |
|
21 | 27 |
from pydantic import AfterValidator, Field |
22 | 28 |
|
23 | 29 |
from faster_whisper_server.api_models import ( |
... | ... | @@ -49,6 +55,35 @@ |
49 | 55 |
logger = logging.getLogger(__name__) |
50 | 56 |
|
51 | 57 |
router = APIRouter() |
58 |
+ |
|
59 |
+ |
|
60 |
+# TODO: test async vs sync performance |
|
61 |
+def audio_file_dependency( |
|
62 |
+ file: Annotated[UploadFile, Form()], |
|
63 |
+) -> NDArray[float32]: |
|
64 |
+ try: |
|
65 |
+ audio = decode_audio(file.file) |
|
66 |
+ except av.error.InvalidDataError as e: |
|
67 |
+ raise HTTPException( |
|
68 |
+ status_code=415, |
|
69 |
+ detail="Failed to decode audio. The provided file type is not supported.", |
|
70 |
+ ) from e |
|
71 |
+ except av.error.ValueError as e: |
|
72 |
+ raise HTTPException( |
|
73 |
+ status_code=400, |
|
74 |
+ # TODO: list supported file types |
|
75 |
+ detail="Failed to decode audio. The provided file is likely empty.", |
|
76 |
+ ) from e |
|
77 |
+ except Exception as e: |
|
78 |
+ logger.exception( |
|
79 |
+ "Failed to decode audio. This is likely a bug. Please create an issue at https://github.com/fedirz/faster-whisper-server/issues/new." |
|
80 |
+ ) |
|
81 |
+ raise HTTPException(status_code=500, detail="Failed to decode audio.") from e |
|
82 |
+ else: |
|
83 |
+ return audio # pyright: ignore reportReturnType |
|
84 |
+ |
|
85 |
+ |
|
86 |
+AudioFileDependency = Annotated[NDArray[float32], Depends(audio_file_dependency)] |
|
52 | 87 |
|
53 | 88 |
|
54 | 89 |
def segments_to_response( |
... | ... | @@ -140,7 +175,7 @@ |
140 | 175 |
def translate_file( |
141 | 176 |
config: ConfigDependency, |
142 | 177 |
model_manager: ModelManagerDependency, |
143 |
- file: Annotated[UploadFile, Form()], |
|
178 |
+ audio: AudioFileDependency, |
|
144 | 179 |
model: Annotated[ModelName | None, Form()] = None, |
145 | 180 |
prompt: Annotated[str | None, Form()] = None, |
146 | 181 |
response_format: Annotated[ResponseFormat | None, Form()] = None, |
... | ... | @@ -154,7 +189,7 @@ |
154 | 189 |
response_format = config.default_response_format |
155 | 190 |
with model_manager.load_model(model) as whisper: |
156 | 191 |
segments, transcription_info = whisper.transcribe( |
157 |
- file.file, |
|
192 |
+ audio, |
|
158 | 193 |
task=Task.TRANSLATE, |
159 | 194 |
initial_prompt=prompt, |
160 | 195 |
temperature=temperature, |
... | ... | @@ -190,7 +225,7 @@ |
190 | 225 |
config: ConfigDependency, |
191 | 226 |
model_manager: ModelManagerDependency, |
192 | 227 |
request: Request, |
193 |
- file: Annotated[UploadFile, Form()], |
|
228 |
+ audio: AudioFileDependency, |
|
194 | 229 |
model: Annotated[ModelName | None, Form()] = None, |
195 | 230 |
language: Annotated[Language | None, Form()] = None, |
196 | 231 |
prompt: Annotated[str | None, Form()] = None, |
... | ... | @@ -218,7 +253,7 @@ |
218 | 253 |
) |
219 | 254 |
with model_manager.load_model(model) as whisper: |
220 | 255 |
segments, transcription_info = whisper.transcribe( |
221 |
- file.file, |
|
256 |
+ audio, |
|
222 | 257 |
task=Task.TRANSCRIBE, |
223 | 258 |
language=language, |
224 | 259 |
initial_prompt=prompt, |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?