Fedir Zadniprovskyi 2024-08-21
feat: add mvp client which runs in the background
@08beefa9283b2c667636335e945230c9c19dc53b
pyproject.toml
--- pyproject.toml
+++ pyproject.toml
@@ -14,11 +14,21 @@
     "httpx-sse",
     "httpx",
     "gradio",
-    "openai"
+    "openai",
 ]
 
 [project.optional-dependencies]
-dev = ["ruff==0.5.3", "pytest", "webvtt-py", "srt", "basedpyright==1.13.0", "pytest-xdist"]
+client = [
+    "keyboard",
+]
+dev = [
+    "ruff==0.5.3",
+    "pytest",
+    "webvtt-py",
+    "srt",
+    "basedpyright==1.13.0",
+    "pytest-xdist"
+]
 
 other = ["youtube-dl @ git+https://github.com/ytdl-org/youtube-dl.git@37cea84f775129ad715b9bcd617251c831fcc980", "aider-chat==0.39.0"]
 
@@ -58,7 +68,8 @@
     "T201", # print
     "TRY003",
     "W505",
-    "ISC001" # recommended to disable for formatting
+    "ISC001", # recommended to disable for formatting
+    "INP001",
 ]
 
 [tool.ruff.lint.isort]
requirements-all.txt
--- requirements-all.txt
+++ requirements-all.txt
@@ -15,7 +15,6 @@
 altair==5.3.0
     # via
     #   aider-chat
-    #   gradio
     #   streamlit
 annotated-types==0.7.0
     # via
@@ -24,17 +23,17 @@
 anyio==4.4.0
     # via
     #   aider-chat
+    #   gradio
     #   httpx
     #   openai
     #   starlette
-    #   watchfiles
 attrs==23.2.0
     # via
     #   aider-chat
     #   aiohttp
     #   jsonschema
     #   referencing
-av==12.2.0
+av==12.3.0
     # via faster-whisper
 backoff==2.2.1
     # via aider-chat
@@ -91,21 +90,15 @@
     # via
     #   aider-chat
     #   openai
-dnspython==2.6.1
-    # via email-validator
-email-validator==2.2.0
-    # via fastapi
 execnet==2.1.1
     # via pytest-xdist
-fastapi==0.111.1
+fastapi==0.112.2
     # via
     #   faster-whisper-server (pyproject.toml)
     #   gradio
-fastapi-cli==0.0.4
-    # via fastapi
 faster-whisper==1.0.3
     # via faster-whisper-server (pyproject.toml)
-ffmpy==0.3.2
+ffmpy==0.4.0
     # via gradio
 filelock==3.15.1
     # via
@@ -168,9 +161,9 @@
     #   aider-chat
     #   google-api-core
     #   grpcio-status
-gradio==4.38.1
+gradio==4.42.0
     # via faster-whisper-server (pyproject.toml)
-gradio-client==1.1.0
+gradio-client==1.3.0
     # via gradio
 greenlet==3.0.3
     # via
@@ -201,13 +194,10 @@
     #   aider-chat
     #   google-api-python-client
     #   google-auth-httplib2
-httptools==0.6.1
-    # via uvicorn
 httpx==0.27.0
     # via
     #   faster-whisper-server (pyproject.toml)
     #   aider-chat
-    #   fastapi
     #   gradio
     #   gradio-client
     #   openai
@@ -227,7 +217,6 @@
     # via
     #   aider-chat
     #   anyio
-    #   email-validator
     #   httpx
     #   requests
     #   yarl
@@ -235,7 +224,7 @@
     # via
     #   aider-chat
     #   litellm
-importlib-resources==6.4.0
+importlib-resources==6.4.4
     # via gradio
 iniconfig==2.0.0
     # via pytest
@@ -243,7 +232,6 @@
     # via
     #   aider-chat
     #   altair
-    #   fastapi
     #   gradio
     #   litellm
     #   pydeck
@@ -255,6 +243,8 @@
     # via
     #   aider-chat
     #   jsonschema
+keyboard==0.13.5
+    # via faster-whisper-server (pyproject.toml)
 kiwisolver==1.4.5
     # via matplotlib
 litellm==1.40.15
@@ -268,7 +258,7 @@
     #   aider-chat
     #   gradio
     #   jinja2
-matplotlib==3.9.1
+matplotlib==3.9.2
     # via gradio
 mccabe==0.7.0
     # via
@@ -287,7 +277,7 @@
     #   yarl
 networkx==3.2.1
     # via aider-chat
-nodejs-wheel-binaries==20.15.1
+nodejs-wheel-binaries==20.17.0
     # via basedpyright
 numpy==1.26.4
     # via
@@ -304,14 +294,14 @@
     #   pydeck
     #   scipy
     #   streamlit
-onnxruntime==1.18.1
+onnxruntime==1.19.0
     # via faster-whisper
 openai==1.34.0
     # via
     #   faster-whisper-server (pyproject.toml)
     #   aider-chat
     #   litellm
-orjson==3.10.6
+orjson==3.10.7
     # via gradio
 packaging==24.1
     # via
@@ -397,7 +387,7 @@
     # via
     #   aider-chat
     #   pydantic
-pydantic-settings==2.3.4
+pydantic-settings==2.4.0
     # via faster-whisper-server (pyproject.toml)
 pydeck==0.9.1
     # via
@@ -424,7 +414,7 @@
     #   aider-chat
     #   httplib2
     #   matplotlib
-pytest==8.2.2
+pytest==8.3.2
     # via
     #   faster-whisper-server (pyproject.toml)
     #   pytest-xdist
@@ -440,11 +430,9 @@
     #   aider-chat
     #   litellm
     #   pydantic-settings
-    #   uvicorn
 python-multipart==0.0.9
     # via
     #   faster-whisper-server (pyproject.toml)
-    #   fastapi
     #   gradio
 pytz==2024.1
     # via
@@ -456,7 +444,6 @@
     #   ctranslate2
     #   gradio
     #   huggingface-hub
-    #   uvicorn
 referencing==0.35.1
     # via
     #   aider-chat
@@ -496,7 +483,7 @@
     # via aider-chat
 semantic-version==2.10.0
     # via gradio
-setuptools==71.0.4
+setuptools==73.0.1
     # via ctranslate2
 shellingham==1.5.4
     # via typer
@@ -526,11 +513,11 @@
     #   beautifulsoup4
 srt==3.5.3
     # via faster-whisper-server (pyproject.toml)
-starlette==0.37.2
+starlette==0.38.2
     # via fastapi
 streamlit==1.35.0
     # via aider-chat
-sympy==1.13.1
+sympy==1.13.2
     # via onnxruntime
 tenacity==8.3.0
     # via
@@ -573,10 +560,8 @@
     # via
     #   aider-chat
     #   grep-ast
-typer==0.12.3
-    # via
-    #   fastapi-cli
-    #   gradio
+typer==0.12.5
+    # via gradio
 typing-extensions==4.12.2
     # via
     #   aider-chat
@@ -604,27 +589,20 @@
     #   aider-chat
     #   gradio
     #   requests
-uvicorn==0.30.1
+uvicorn==0.30.6
     # via
     #   faster-whisper-server (pyproject.toml)
-    #   fastapi
     #   gradio
-uvloop==0.19.0
-    # via uvicorn
 watchdog==4.0.1
     # via
     #   aider-chat
     #   streamlit
-watchfiles==0.22.0
-    # via uvicorn
 wcwidth==0.2.13
     # via
     #   aider-chat
     #   prompt-toolkit
-websockets==11.0.3
-    # via
-    #   gradio-client
-    #   uvicorn
+websockets==12.0
+    # via gradio-client
 webvtt-py==0.5.1
     # via faster-whisper-server (pyproject.toml)
 yarl==1.9.4
requirements-dev.txt
--- requirements-dev.txt
+++ requirements-dev.txt
@@ -2,21 +2,15 @@
 #    uv pip compile --override overrides.txt --extra dev pyproject.toml
 aiofiles==23.2.1
     # via gradio
-altair==5.3.0
-    # via gradio
 annotated-types==0.7.0
     # via pydantic
 anyio==4.4.0
     # via
+    #   gradio
     #   httpx
     #   openai
     #   starlette
-    #   watchfiles
-attrs==23.2.0
-    # via
-    #   jsonschema
-    #   referencing
-av==12.2.0
+av==12.3.0
     # via faster-whisper
 basedpyright==1.13.0
     # via faster-whisper-server (pyproject.toml)
@@ -25,7 +19,7 @@
     #   httpcore
     #   httpx
     #   requests
-cffi==1.16.0
+cffi==1.17.0
     # via soundfile
 charset-normalizer==3.3.2
     # via requests
@@ -43,21 +37,15 @@
     # via matplotlib
 distro==1.9.0
     # via openai
-dnspython==2.6.1
-    # via email-validator
-email-validator==2.2.0
-    # via fastapi
 execnet==2.1.1
     # via pytest-xdist
-fastapi==0.111.1
+fastapi==0.112.2
     # via
     #   faster-whisper-server (pyproject.toml)
     #   gradio
-fastapi-cli==0.0.4
-    # via fastapi
 faster-whisper==1.0.3
     # via faster-whisper-server (pyproject.toml)
-ffmpy==0.3.2
+ffmpy==0.4.0
     # via gradio
 filelock==3.15.4
     # via huggingface-hub
@@ -69,9 +57,9 @@
     # via
     #   gradio-client
     #   huggingface-hub
-gradio==4.38.1
+gradio==4.42.0
     # via faster-whisper-server (pyproject.toml)
-gradio-client==1.1.0
+gradio-client==1.3.0
     # via gradio
 h11==0.14.0
     # via
@@ -79,18 +67,15 @@
     #   uvicorn
 httpcore==1.0.5
     # via httpx
-httptools==0.6.1
-    # via uvicorn
 httpx==0.27.0
     # via
     #   faster-whisper-server (pyproject.toml)
-    #   fastapi
     #   gradio
     #   gradio-client
     #   openai
 httpx-sse==0.4.0
     # via faster-whisper-server (pyproject.toml)
-huggingface-hub==0.24.0
+huggingface-hub==0.24.6
     # via
     #   faster-whisper-server (pyproject.toml)
     #   faster-whisper
@@ -99,25 +84,19 @@
     #   tokenizers
 humanfriendly==10.0
     # via coloredlogs
-idna==3.7
+idna==3.8
     # via
     #   anyio
-    #   email-validator
     #   httpx
     #   requests
-importlib-resources==6.4.0
+importlib-resources==6.4.4
     # via gradio
 iniconfig==2.0.0
     # via pytest
 jinja2==3.1.4
-    # via
-    #   altair
-    #   fastapi
-    #   gradio
-jsonschema==4.23.0
-    # via altair
-jsonschema-specifications==2023.12.1
-    # via jsonschema
+    # via gradio
+jiter==0.5.0
+    # via openai
 kiwisolver==1.4.5
     # via matplotlib
 markdown-it-py==3.0.0
@@ -126,33 +105,31 @@
     # via
     #   gradio
     #   jinja2
-matplotlib==3.9.1
+matplotlib==3.9.2
     # via gradio
 mdurl==0.1.2
     # via markdown-it-py
 mpmath==1.3.0
     # via sympy
-nodejs-wheel-binaries==20.15.1
+nodejs-wheel-binaries==20.17.0
     # via basedpyright
 numpy==1.26.4
     # via
     #   faster-whisper-server (pyproject.toml)
-    #   altair
     #   contourpy
     #   ctranslate2
     #   gradio
     #   matplotlib
     #   onnxruntime
     #   pandas
-onnxruntime==1.18.1
+onnxruntime==1.19.0
     # via faster-whisper
-openai==1.36.0
+openai==1.42.0
     # via faster-whisper-server (pyproject.toml)
-orjson==3.10.6
+orjson==3.10.7
     # via gradio
 packaging==24.1
     # via
-    #   altair
     #   gradio
     #   gradio-client
     #   huggingface-hub
@@ -160,16 +137,14 @@
     #   onnxruntime
     #   pytest
 pandas==2.2.2
-    # via
-    #   altair
-    #   gradio
+    # via gradio
 pillow==10.4.0
     # via
     #   gradio
     #   matplotlib
 pluggy==1.5.0
     # via pytest
-protobuf==5.27.2
+protobuf==5.27.3
     # via onnxruntime
 pycparser==2.22
     # via cffi
@@ -182,15 +157,15 @@
     #   pydantic-settings
 pydantic-core==2.20.1
     # via pydantic
-pydantic-settings==2.3.4
+pydantic-settings==2.4.0
     # via faster-whisper-server (pyproject.toml)
 pydub==0.25.1
     # via gradio
 pygments==2.18.0
     # via rich
-pyparsing==3.1.2
+pyparsing==3.1.4
     # via matplotlib
-pytest==8.2.2
+pytest==8.3.2
     # via
     #   faster-whisper-server (pyproject.toml)
     #   pytest-xdist
@@ -201,41 +176,29 @@
     #   matplotlib
     #   pandas
 python-dotenv==1.0.1
-    # via
-    #   pydantic-settings
-    #   uvicorn
+    # via pydantic-settings
 python-multipart==0.0.9
     # via
     #   faster-whisper-server (pyproject.toml)
-    #   fastapi
     #   gradio
 pytz==2024.1
     # via pandas
-pyyaml==6.0.1
+pyyaml==6.0.2
     # via
     #   ctranslate2
     #   gradio
     #   huggingface-hub
-    #   uvicorn
-referencing==0.35.1
-    # via
-    #   jsonschema
-    #   jsonschema-specifications
 requests==2.32.3
     # via huggingface-hub
-rich==13.7.1
+rich==13.8.0
     # via typer
-rpds-py==0.19.0
-    # via
-    #   jsonschema
-    #   referencing
 ruff==0.5.3
     # via
     #   faster-whisper-server (pyproject.toml)
     #   gradio
 semantic-version==2.10.0
     # via gradio
-setuptools==71.0.4
+setuptools==73.0.1
     # via ctranslate2
 shellingham==1.5.4
     # via typer
@@ -250,24 +213,20 @@
     # via faster-whisper-server (pyproject.toml)
 srt==3.5.3
     # via faster-whisper-server (pyproject.toml)
-starlette==0.37.2
+starlette==0.38.2
     # via fastapi
-sympy==1.13.1
+sympy==1.13.2
     # via onnxruntime
-tokenizers==0.19.1
+tokenizers==0.20.0
     # via faster-whisper
 tomlkit==0.12.0
     # via gradio
-toolz==0.12.1
-    # via altair
-tqdm==4.66.4
+tqdm==4.66.5
     # via
     #   huggingface-hub
     #   openai
-typer==0.12.3
-    # via
-    #   fastapi-cli
-    #   gradio
+typer==0.12.5
+    # via gradio
 typing-extensions==4.12.2
     # via
     #   fastapi
@@ -284,18 +243,11 @@
     # via
     #   gradio
     #   requests
-uvicorn==0.30.1
+uvicorn==0.30.6
     # via
     #   faster-whisper-server (pyproject.toml)
-    #   fastapi
     #   gradio
-uvloop==0.19.0
-    # via uvicorn
-watchfiles==0.22.0
-    # via uvicorn
-websockets==11.0.3
-    # via
-    #   gradio-client
-    #   uvicorn
+websockets==12.0
+    # via gradio-client
 webvtt-py==0.5.1
     # via faster-whisper-server (pyproject.toml)
requirements.txt
--- requirements.txt
+++ requirements.txt
@@ -2,28 +2,22 @@
 #    uv pip compile --override overrides.txt pyproject.toml
 aiofiles==23.2.1
     # via gradio
-altair==5.3.0
-    # via gradio
 annotated-types==0.7.0
     # via pydantic
 anyio==4.4.0
     # via
+    #   gradio
     #   httpx
     #   openai
     #   starlette
-    #   watchfiles
-attrs==23.2.0
-    # via
-    #   jsonschema
-    #   referencing
-av==12.2.0
+av==12.3.0
     # via faster-whisper
 certifi==2024.7.4
     # via
     #   httpcore
     #   httpx
     #   requests
-cffi==1.16.0
+cffi==1.17.0
     # via soundfile
 charset-normalizer==3.3.2
     # via requests
@@ -41,19 +35,13 @@
     # via matplotlib
 distro==1.9.0
     # via openai
-dnspython==2.6.1
-    # via email-validator
-email-validator==2.2.0
-    # via fastapi
-fastapi==0.111.1
+fastapi==0.112.2
     # via
     #   faster-whisper-server (pyproject.toml)
     #   gradio
-fastapi-cli==0.0.4
-    # via fastapi
 faster-whisper==1.0.3
     # via faster-whisper-server (pyproject.toml)
-ffmpy==0.3.2
+ffmpy==0.4.0
     # via gradio
 filelock==3.15.4
     # via huggingface-hub
@@ -65,9 +53,9 @@
     # via
     #   gradio-client
     #   huggingface-hub
-gradio==4.38.1
+gradio==4.42.0
     # via faster-whisper-server (pyproject.toml)
-gradio-client==1.1.0
+gradio-client==1.3.0
     # via gradio
 h11==0.14.0
     # via
@@ -75,18 +63,15 @@
     #   uvicorn
 httpcore==1.0.5
     # via httpx
-httptools==0.6.1
-    # via uvicorn
 httpx==0.27.0
     # via
     #   faster-whisper-server (pyproject.toml)
-    #   fastapi
     #   gradio
     #   gradio-client
     #   openai
 httpx-sse==0.4.0
     # via faster-whisper-server (pyproject.toml)
-huggingface-hub==0.24.0
+huggingface-hub==0.24.6
     # via
     #   faster-whisper-server (pyproject.toml)
     #   faster-whisper
@@ -95,23 +80,17 @@
     #   tokenizers
 humanfriendly==10.0
     # via coloredlogs
-idna==3.7
+idna==3.8
     # via
     #   anyio
-    #   email-validator
     #   httpx
     #   requests
-importlib-resources==6.4.0
+importlib-resources==6.4.4
     # via gradio
 jinja2==3.1.4
-    # via
-    #   altair
-    #   fastapi
-    #   gradio
-jsonschema==4.23.0
-    # via altair
-jsonschema-specifications==2023.12.1
-    # via jsonschema
+    # via gradio
+jiter==0.5.0
+    # via openai
 kiwisolver==1.4.5
     # via matplotlib
 markdown-it-py==3.0.0
@@ -120,7 +99,7 @@
     # via
     #   gradio
     #   jinja2
-matplotlib==3.9.1
+matplotlib==3.9.2
     # via gradio
 mdurl==0.1.2
     # via markdown-it-py
@@ -129,36 +108,32 @@
 numpy==1.26.4
     # via
     #   faster-whisper-server (pyproject.toml)
-    #   altair
     #   contourpy
     #   ctranslate2
     #   gradio
     #   matplotlib
     #   onnxruntime
     #   pandas
-onnxruntime==1.18.1
+onnxruntime==1.19.0
     # via faster-whisper
-openai==1.36.0
+openai==1.42.0
     # via faster-whisper-server (pyproject.toml)
-orjson==3.10.6
+orjson==3.10.7
     # via gradio
 packaging==24.1
     # via
-    #   altair
     #   gradio
     #   gradio-client
     #   huggingface-hub
     #   matplotlib
     #   onnxruntime
 pandas==2.2.2
-    # via
-    #   altair
-    #   gradio
+    # via gradio
 pillow==10.4.0
     # via
     #   gradio
     #   matplotlib
-protobuf==5.27.2
+protobuf==5.27.3
     # via onnxruntime
 pycparser==2.22
     # via cffi
@@ -171,52 +146,40 @@
     #   pydantic-settings
 pydantic-core==2.20.1
     # via pydantic
-pydantic-settings==2.3.4
+pydantic-settings==2.4.0
     # via faster-whisper-server (pyproject.toml)
 pydub==0.25.1
     # via gradio
 pygments==2.18.0
     # via rich
-pyparsing==3.1.2
+pyparsing==3.1.4
     # via matplotlib
 python-dateutil==2.9.0.post0
     # via
     #   matplotlib
     #   pandas
 python-dotenv==1.0.1
-    # via
-    #   pydantic-settings
-    #   uvicorn
+    # via pydantic-settings
 python-multipart==0.0.9
     # via
     #   faster-whisper-server (pyproject.toml)
-    #   fastapi
     #   gradio
 pytz==2024.1
     # via pandas
-pyyaml==6.0.1
+pyyaml==6.0.2
     # via
     #   ctranslate2
     #   gradio
     #   huggingface-hub
-    #   uvicorn
-referencing==0.35.1
-    # via
-    #   jsonschema
-    #   jsonschema-specifications
 requests==2.32.3
     # via huggingface-hub
-rich==13.7.1
+rich==13.8.0
     # via typer
-rpds-py==0.19.0
-    # via
-    #   jsonschema
-    #   referencing
-ruff==0.5.3
+ruff==0.6.2
     # via gradio
 semantic-version==2.10.0
     # via gradio
-setuptools==71.0.4
+setuptools==73.0.1
     # via ctranslate2
 shellingham==1.5.4
     # via typer
@@ -229,24 +192,20 @@
     #   openai
 soundfile==0.12.1
     # via faster-whisper-server (pyproject.toml)
-starlette==0.37.2
+starlette==0.38.2
     # via fastapi
-sympy==1.13.1
+sympy==1.13.2
     # via onnxruntime
-tokenizers==0.19.1
+tokenizers==0.20.0
     # via faster-whisper
 tomlkit==0.12.0
     # via gradio
-toolz==0.12.1
-    # via altair
-tqdm==4.66.4
+tqdm==4.66.5
     # via
     #   huggingface-hub
     #   openai
-typer==0.12.3
-    # via
-    #   fastapi-cli
-    #   gradio
+typer==0.12.5
+    # via gradio
 typing-extensions==4.12.2
     # via
     #   fastapi
@@ -263,16 +222,9 @@
     # via
     #   gradio
     #   requests
-uvicorn==0.30.1
+uvicorn==0.30.6
     # via
     #   faster-whisper-server (pyproject.toml)
-    #   fastapi
     #   gradio
-uvloop==0.19.0
-    # via uvicorn
-watchfiles==0.22.0
-    # via uvicorn
-websockets==11.0.3
-    # via
-    #   gradio-client
-    #   uvicorn
+websockets==12.0
+    # via gradio-client
 
scripts/client.py (added)
+++ scripts/client.py
@@ -0,0 +1,79 @@
+import os
+from pathlib import Path
+import subprocess
+import threading
+
+import httpx
+import keyboard
+
+# NOTE: this is a very basic implementation. Not really meant for usage by others.
+# Included here in case someone wants to use it as a reference.
+
+# This script will run in the background and listen for a keybind to start recording audio.
+# It will then wait until the keybind is pressed again to stop recording.
+# The audio file will be sent to the server for transcription.
+# The transcription will be copied to the clipboard.
+# When having a short audio of a couple of sentences and running inference on a GPU the response time is very fast (less than 2 seconds).  # noqa: E501
+
+CHUNK = 2**12
+AUDIO_RECORD_CMD = [
+    "ffmpeg",
+    # "-hide_banner",
+    # "-loglevel",
+    # "quiet",
+    "-f",
+    "alsa",
+    "-i",
+    "default",
+    "-f",
+    "wav",
+    # "-ac",
+    # "1",
+    # "-ar",
+    # "16000",
+    # "-f",
+    # "s16le",
+    # "-acodec",
+    # "pcm_s16le",
+    # "-",
+]
+COPY_TO_CLIPBOARD_CMD = "wl-copy"
+OPENAI_BASE_URL = "ws://localhost:8000/v1"
+TRANSCRIBE_PATH = "/audio/transcriptions?language=en"
+USER = "nixos"
+TIMEOUT = httpx.Timeout(None)
+KEYBIND = "ctrl+x"
+LANGUAGE = "en"
+RESPONSE_FORMAT = "text"
+
+client = httpx.Client(base_url=OPENAI_BASE_URL, timeout=TIMEOUT)
+is_running = threading.Event()
+file = Path("test.wav")  # TODO: use tempfile
+
+
+while True:
+    keyboard.wait(KEYBIND)
+    print("Action started")
+    process = subprocess.Popen(
+        [*AUDIO_RECORD_CMD, "-y", str(file.name)],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        user=USER,
+        env=dict(os.environ),
+    )
+    keyboard.wait(KEYBIND)
+    process.kill()
+    print("Action finished")
+
+    with open(file, "rb") as f:
+        res = client.post(
+            OPENAI_BASE_URL + TRANSCRIBE_PATH,
+            files={"file": f},
+            data={
+                "response_format": RESPONSE_FORMAT,
+                "language": LANGUAGE,
+            },
+        )
+        transcription = res.text
+        print(transcription)
+        subprocess.run([COPY_TO_CLIPBOARD_CMD], input=transcription.encode(), check=True)
Add a comment
List