• Y
  • List All
  • Feedback
    • This Project
    • All Projects
Profile Account settings Log out
  • Favorite
  • Project
  • All
Loading...
  • Log in
  • Sign up
yjyoon / whisper_server_speaches star
  • Project homeH
  • CodeC
  • IssueI
  • Pull requestP
  • Review R
  • MilestoneM
  • BoardB
  • Files
  • Commit
  • Branches
whisper_server_speachessrcspeachesgradio_app.py
Download as .zip file
File name
Commit message
Commit date
.github/workflows
feat: switch to ghcr.io
01-10
configuration
feat: add instrumentation
2024-12-17
docs
rename to `speaches`
01-12
examples
rename to `speaches`
01-12
scripts
chore: misc changes
2024-10-03
src/speaches
rename to `speaches`
01-12
tests
rename to `speaches`
01-12
.dockerignore
fix: .dockerignore
01-12
.envrc
init
2024-05-20
.gitattributes
chore(deps): update pre-commit hook astral-sh/ruff-pre-commit to v0.7.2
2024-11-02
.gitignore
chore: update .gitignore
2024-07-03
.pre-commit-config.yaml
chore(deps): update pre-commit hook python-jsonschema/check-jsonschema to v0.31.0
01-12
Dockerfile
chore(deps): update ghcr.io/astral-sh/uv docker tag to v0.5.18
01-12
LICENSE
init
2024-05-20
README.md
rename to `speaches`
01-12
Taskfile.yaml
rename to `speaches`
01-12
audio.wav
chore: update volume names and mount points
01-10
compose.cpu.yaml
rename to `speaches`
01-12
compose.cuda-cdi.yaml
rename to `speaches`
01-12
compose.cuda.yaml
rename to `speaches`
01-12
compose.observability.yaml
chore(deps): update otel/opentelemetry-collector-contrib docker tag to v0.117.0
01-12
compose.yaml
rename to `speaches`
01-12
flake.lock
deps: update flake
2024-11-01
flake.nix
chore(deps): add loki and tempo package to flake
2024-12-17
mkdocs.yml
rename to `speaches`
01-12
pyproject.toml
rename to `speaches`
01-12
renovate.json
feat: renovate handle pre-commit
2024-11-01
uv.lock
rename to `speaches`
01-12
File name
Commit message
Commit date
routers
rename to `speaches`
01-12
__init__.py
rename to `speaches`
01-12
api_models.py
rename to `speaches`
01-12
asr.py
rename to `speaches`
01-12
audio.py
rename to `speaches`
01-12
config.py
rename to `speaches`
01-12
dependencies.py
rename to `speaches`
01-12
gradio_app.py
rename to `speaches`
01-12
hf_utils.py
rename to `speaches`
01-12
logger.py
rename to `speaches`
01-12
main.py
rename to `speaches`
01-12
model_manager.py
rename to `speaches`
01-12
text_utils.py
rename to `speaches`
01-12
text_utils_test.py
rename to `speaches`
01-12
transcriber.py
rename to `speaches`
01-12
Fedir Zadniprovskyi 01-12 f3802b7 rename to `speaches` UNIX
Raw Open in browser Change history
from collections.abc import AsyncGenerator from pathlib import Path import platform import gradio as gr import httpx from httpx_sse import aconnect_sse from openai import AsyncOpenAI from speaches.config import Config, Task from speaches.hf_utils import PiperModel TRANSCRIPTION_ENDPOINT = "/v1/audio/transcriptions" TRANSLATION_ENDPOINT = "/v1/audio/translations" TIMEOUT_SECONDS = 180 TIMEOUT = httpx.Timeout(timeout=TIMEOUT_SECONDS) # NOTE: `gr.Request` seems to be passed in as the last positional (not keyword) argument def base_url_from_gradio_req(request: gr.Request) -> str: # NOTE: `request.request.url` seems to always have a path of "/gradio_api/queue/join" assert request.request is not None return f"{request.request.url.scheme}://{request.request.url.netloc}" def http_client_from_gradio_req(request: gr.Request, config: Config) -> httpx.AsyncClient: base_url = base_url_from_gradio_req(request) return httpx.AsyncClient( base_url=base_url, timeout=TIMEOUT, headers={"Authorization": f"Bearer {config.api_key}"} if config.api_key else None, ) def openai_client_from_gradio_req(request: gr.Request, config: Config) -> AsyncOpenAI: base_url = base_url_from_gradio_req(request) return AsyncOpenAI(base_url=f"{base_url}/v1", api_key=config.api_key if config.api_key else "cant-be-empty") def create_gradio_demo(config: Config) -> gr.Blocks: # noqa: C901, PLR0915 async def whisper_handler( file_path: str, model: str, task: Task, temperature: float, stream: bool, request: gr.Request ) -> AsyncGenerator[str, None]: http_client = http_client_from_gradio_req(request, config) if task == Task.TRANSCRIBE: endpoint = TRANSCRIPTION_ENDPOINT elif task == Task.TRANSLATE: endpoint = TRANSLATION_ENDPOINT if stream: previous_transcription = "" async for transcription in streaming_audio_task(http_client, file_path, endpoint, temperature, model): previous_transcription += transcription yield previous_transcription else: yield await audio_task(http_client, file_path, endpoint, temperature, model) async def audio_task( http_client: httpx.AsyncClient, file_path: str, endpoint: str, temperature: float, model: str ) -> str: with Path(file_path).open("rb") as file: # noqa: ASYNC230 response = await http_client.post( endpoint, files={"file": file}, data={ "model": model, "response_format": "text", "temperature": temperature, }, ) response.raise_for_status() return response.text async def streaming_audio_task( http_client: httpx.AsyncClient, file_path: str, endpoint: str, temperature: float, model: str ) -> AsyncGenerator[str, None]: with Path(file_path).open("rb") as file: # noqa: ASYNC230 kwargs = { "files": {"file": file}, "data": { "response_format": "text", "temperature": temperature, "model": model, "stream": True, }, } async with aconnect_sse(http_client, "POST", endpoint, **kwargs) as event_source: async for event in event_source.aiter_sse(): yield event.data async def update_whisper_model_dropdown(request: gr.Request) -> gr.Dropdown: openai_client = openai_client_from_gradio_req(request, config) models = (await openai_client.models.list()).data model_names: list[str] = [model.id for model in models] assert config.whisper.model in model_names recommended_models = {model for model in model_names if model.startswith("Systran")} other_models = [model for model in model_names if model not in recommended_models] model_names = list(recommended_models) + other_models return gr.Dropdown( choices=model_names, label="Model", value=config.whisper.model, ) async def update_piper_voices_dropdown(request: gr.Request) -> gr.Dropdown: http_client = http_client_from_gradio_req(request, config) res = (await http_client.get("/v1/audio/speech/voices")).raise_for_status() piper_models = [PiperModel.model_validate(x) for x in res.json()] return gr.Dropdown(choices=[model.voice for model in piper_models], label="Voice", value=DEFAULT_VOICE) async def handle_audio_speech( text: str, voice: str, response_format: str, speed: float, sample_rate: int | None, request: gr.Request ) -> Path: openai_client = openai_client_from_gradio_req(request, config) res = await openai_client.audio.speech.create( input=text, model="piper", voice=voice, # pyright: ignore[reportArgumentType] response_format=response_format, # pyright: ignore[reportArgumentType] speed=speed, extra_body={"sample_rate": sample_rate}, ) audio_bytes = res.response.read() file_path = Path(f"audio.{response_format}") with file_path.open("wb") as file: # noqa: ASYNC230 file.write(audio_bytes) return file_path with gr.Blocks(title="Speaches Playground") as demo: gr.Markdown( "### Consider supporting the project by starring the [repository on GitHub](https://github.com/speaches-ai/speaches)." ) with gr.Tab(label="Transcribe/Translate"): audio = gr.Audio(type="filepath") model_dropdown = gr.Dropdown( choices=[config.whisper.model], label="Model", value=config.whisper.model, ) task_dropdown = gr.Dropdown( choices=[task.value for task in Task], label="Task", value=Task.TRANSCRIBE, ) temperature_slider = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label="Temperature", value=0.0) stream_checkbox = gr.Checkbox(label="Stream", value=True) button = gr.Button("Generate") output = gr.Textbox() # NOTE: the inputs order must match the `whisper_handler` signature button.click( whisper_handler, [audio, model_dropdown, task_dropdown, temperature_slider, stream_checkbox], output ) with gr.Tab(label="Speech Generation"): if platform.machine() != "x86_64": from speaches.routers.speech import ( DEFAULT_VOICE, MAX_SAMPLE_RATE, MIN_SAMPLE_RATE, SUPPORTED_RESPONSE_FORMATS, ) text = gr.Textbox(label="Input Text") voice_dropdown = gr.Dropdown( choices=["en_US-amy-medium"], label="Voice", value="en_US-amy-medium", info=""" The last part of the voice name is the quality (x_low, low, medium, high). Each quality has a different default sample rate: - x_low: 16000 Hz - low: 16000 Hz - medium: 22050 Hz - high: 22050 Hz """, ) response_fromat_dropdown = gr.Dropdown( choices=SUPPORTED_RESPONSE_FORMATS, label="Response Format", value="wav", ) speed_slider = gr.Slider(minimum=0.25, maximum=4.0, step=0.05, label="Speed", value=1.0) sample_rate_slider = gr.Number( minimum=MIN_SAMPLE_RATE, maximum=MAX_SAMPLE_RATE, label="Desired Sample Rate", info=""" Setting this will resample the generated audio to the desired sample rate. You may want to set this if you are going to use voices of different qualities but want to keep the same sample rate. Default: None (No resampling) """, value=lambda: None, ) button = gr.Button("Generate Speech") output = gr.Audio(type="filepath") button.click( handle_audio_speech, [text, voice_dropdown, response_fromat_dropdown, speed_slider, sample_rate_slider], output, ) demo.load(update_piper_voices_dropdown, inputs=None, outputs=voice_dropdown) else: gr.Textbox("Speech generation is only supported on x86_64 machines.") demo.load(update_whisper_model_dropdown, inputs=None, outputs=model_dropdown) return demo

          
        
    
    
Copyright Yona authors & © NAVER Corp. & NAVER LABS Supported by NAVER CLOUD PLATFORM

or
Sign in with github login with Google Sign in with Google
Reset password | Sign up