

fix: gradio app breaks on arm
@e827e03d06343caa1ae9843877e9de7ba912e59c
--- src/faster_whisper_server/gradio_app.py
+++ src/faster_whisper_server/gradio_app.py
... | ... | @@ -1,5 +1,6 @@ |
1 | 1 |
from collections.abc import AsyncGenerator |
2 | 2 |
from pathlib import Path |
3 |
+import platform |
|
3 | 4 |
|
4 | 5 |
import gradio as gr |
5 | 6 |
import httpx |
... | ... | @@ -8,14 +9,6 @@ |
8 | 9 |
|
9 | 10 |
from faster_whisper_server.config import Config, Task |
10 | 11 |
from faster_whisper_server.hf_utils import PiperModel |
11 |
- |
|
12 |
-# FIX: this won't work on ARM |
|
13 |
-from faster_whisper_server.routers.speech import ( |
|
14 |
- DEFAULT_VOICE, |
|
15 |
- MAX_SAMPLE_RATE, |
|
16 |
- MIN_SAMPLE_RATE, |
|
17 |
- SUPPORTED_RESPONSE_FORMATS, |
|
18 |
-) |
|
19 | 12 |
|
20 | 13 |
TRANSCRIPTION_ENDPOINT = "/v1/audio/transcriptions" |
21 | 14 |
TRANSLATION_ENDPOINT = "/v1/audio/translations" |
... | ... | @@ -163,13 +156,20 @@ |
163 | 156 |
) |
164 | 157 |
|
165 | 158 |
with gr.Tab(label="Speech Generation"): |
166 |
- # TODO: add warning about ARM |
|
167 |
- text = gr.Textbox(label="Input Text") |
|
168 |
- voice_dropdown = gr.Dropdown( |
|
169 |
- choices=["en_US-amy-medium"], |
|
170 |
- label="Voice", |
|
171 |
- value="en_US-amy-medium", |
|
172 |
- info=""" |
|
159 |
+ if platform.machine() != "x86_64": |
|
160 |
+ from faster_whisper_server.routers.speech import ( |
|
161 |
+ DEFAULT_VOICE, |
|
162 |
+ MAX_SAMPLE_RATE, |
|
163 |
+ MIN_SAMPLE_RATE, |
|
164 |
+ SUPPORTED_RESPONSE_FORMATS, |
|
165 |
+ ) |
|
166 |
+ |
|
167 |
+ text = gr.Textbox(label="Input Text") |
|
168 |
+ voice_dropdown = gr.Dropdown( |
|
169 |
+ choices=["en_US-amy-medium"], |
|
170 |
+ label="Voice", |
|
171 |
+ value="en_US-amy-medium", |
|
172 |
+ info=""" |
|
173 | 173 |
The last part of the voice name is the quality (x_low, low, medium, high). |
174 | 174 |
Each quality has a different default sample rate: |
175 | 175 |
- x_low: 16000 Hz |
... | ... | @@ -177,32 +177,34 @@ |
177 | 177 |
- medium: 22050 Hz |
178 | 178 |
- high: 22050 Hz |
179 | 179 |
""", |
180 |
- ) |
|
181 |
- response_fromat_dropdown = gr.Dropdown( |
|
182 |
- choices=SUPPORTED_RESPONSE_FORMATS, |
|
183 |
- label="Response Format", |
|
184 |
- value="wav", |
|
185 |
- ) |
|
186 |
- speed_slider = gr.Slider(minimum=0.25, maximum=4.0, step=0.05, label="Speed", value=1.0) |
|
187 |
- sample_rate_slider = gr.Number( |
|
188 |
- minimum=MIN_SAMPLE_RATE, |
|
189 |
- maximum=MAX_SAMPLE_RATE, |
|
190 |
- label="Desired Sample Rate", |
|
191 |
- info=""" |
|
180 |
+ ) |
|
181 |
+ response_fromat_dropdown = gr.Dropdown( |
|
182 |
+ choices=SUPPORTED_RESPONSE_FORMATS, |
|
183 |
+ label="Response Format", |
|
184 |
+ value="wav", |
|
185 |
+ ) |
|
186 |
+ speed_slider = gr.Slider(minimum=0.25, maximum=4.0, step=0.05, label="Speed", value=1.0) |
|
187 |
+ sample_rate_slider = gr.Number( |
|
188 |
+ minimum=MIN_SAMPLE_RATE, |
|
189 |
+ maximum=MAX_SAMPLE_RATE, |
|
190 |
+ label="Desired Sample Rate", |
|
191 |
+ info=""" |
|
192 | 192 |
Setting this will resample the generated audio to the desired sample rate. |
193 | 193 |
You may want to set this if you are going to use voices of different qualities but want to keep the same sample rate. |
194 | 194 |
Default: None (No resampling) |
195 | 195 |
""", |
196 |
- value=lambda: None, |
|
197 |
- ) |
|
198 |
- button = gr.Button("Generate Speech") |
|
199 |
- output = gr.Audio(type="filepath") |
|
200 |
- button.click( |
|
201 |
- handle_audio_speech, |
|
202 |
- [text, voice_dropdown, response_fromat_dropdown, speed_slider, sample_rate_slider], |
|
203 |
- output, |
|
204 |
- ) |
|
196 |
+ value=lambda: None, |
|
197 |
+ ) |
|
198 |
+ button = gr.Button("Generate Speech") |
|
199 |
+ output = gr.Audio(type="filepath") |
|
200 |
+ button.click( |
|
201 |
+ handle_audio_speech, |
|
202 |
+ [text, voice_dropdown, response_fromat_dropdown, speed_slider, sample_rate_slider], |
|
203 |
+ output, |
|
204 |
+ ) |
|
205 |
+ demo.load(update_piper_voices_dropdown, inputs=None, outputs=voice_dropdown) |
|
206 |
+ else: |
|
207 |
+ gr.Textbox("Speech generation is only supported on x86_64 machines.") |
|
205 | 208 |
|
206 | 209 |
demo.load(update_whisper_model_dropdown, inputs=None, outputs=model_dropdown) |
207 |
- demo.load(update_piper_voices_dropdown, inputs=None, outputs=voice_dropdown) |
|
208 | 210 |
return demo |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?