Commit @b0036294c6d84df14d64d10904339c4cc51cb668 - yjyoon/whisper_server

Fedir Zadniprovskyi 2024-09-03

chore: minor changes to scripts/client.py

@b0036294c6d84df14d64d10904339c4cc51cb668

1debc1c

b003629

pyproject.toml

--- pyproject.toml

+++ pyproject.toml


     "W505",
     "ISC001", # recommended to disable for formatting
     "INP001",
+    "PT018",
 ]
 
 [tool.ruff.lint.isort]

1debc1c

b003629

scripts/client.py

--- scripts/client.py

+++ scripts/client.py


 from pathlib import Path
 import subprocess
 import threading
+import time
 
 import httpx
 import keyboard

 # The audio file will be sent to the server for transcription.
 # The transcription will be copied to the clipboard.
 # When having a short audio of a couple of sentences and running inference on a GPU the response time is very fast (less than 2 seconds).  # noqa: E501
+# Run this with `sudo -E python scripts/client.py`
 
 CHUNK = 2**12
 AUDIO_RECORD_CMD = [
     "ffmpeg",
-    # "-hide_banner",
+    "-hide_banner",
     # "-loglevel",
     # "quiet",
     "-f",

     "default",
     "-f",
     "wav",
-    # "-ac",
-    # "1",
-    # "-ar",
-    # "16000",
-    # "-f",
-    # "s16le",
-    # "-acodec",
-    # "pcm_s16le",
-    # "-",
 ]
 COPY_TO_CLIPBOARD_CMD = "wl-copy"
 OPENAI_BASE_URL = "ws://localhost:8000/v1"

 
 client = httpx.Client(base_url=OPENAI_BASE_URL, timeout=TIMEOUT)
 is_running = threading.Event()
-file = Path("test.wav")  # TODO: use tempfile
+
+file = Path("test.wav")  # HACK: I had a hard time trying to use a temporary file due to permissions issues
 
 
 while True:
     keyboard.wait(KEYBIND)
-    print("Action started")
+    print("Recording started")
     process = subprocess.Popen(
         [*AUDIO_RECORD_CMD, "-y", str(file.name)],
         stdout=subprocess.PIPE,

     )
     keyboard.wait(KEYBIND)
     process.kill()
-    print("Action finished")
+    stdout, stderr = process.communicate()
+    if stdout or stderr:
+        print(f"stdout: {stdout}")
+        print(f"stderr: {stderr}")
+    print(f"Recording finished. File size: {file.stat().st_size} bytes")
 
-    with open(file, "rb") as f:
-        res = client.post(
-            OPENAI_BASE_URL + TRANSCRIBE_PATH,
-            files={"file": f},
-            data={
-                "response_format": RESPONSE_FORMAT,
-                "language": LANGUAGE,
-            },
-        )
+    try:
+        with open(file, "rb") as fd:
+            start = time.perf_counter()
+            res = client.post(
+                OPENAI_BASE_URL + TRANSCRIBE_PATH,
+                files={"file": fd},
+                data={
+                    "response_format": RESPONSE_FORMAT,
+                    "language": LANGUAGE,
+                },
+            )
+        end = time.perf_counter()
+        print(f"Transcription took {end - start} seconds")
         transcription = res.text
         print(transcription)
         subprocess.run([COPY_TO_CLIPBOARD_CMD], input=transcription.encode(), check=True)
+    except httpx.ConnectError as e:
+        print(f"Couldn't connect to server: {e}")

Add a comment

Open 0
Closed 0

List

...	...	@@ -70,6 +70,7 @@
70	70	"W505",
71	71	"ISC001", # recommended to disable for formatting
72	72	"INP001",
	73	+ "PT018",
73	74	]
74	75
75	76	[tool.ruff.lint.isort]

...	...	@@ -2,6 +2,7 @@
2	2	from pathlib import Path
3	3	import subprocess
4	4	import threading
	5	+import time
5	6
6	7	import httpx
7	8	import keyboard
...	...	@@ -14,11 +15,12 @@
14	15	# The audio file will be sent to the server for transcription.
15	16	# The transcription will be copied to the clipboard.
16	17	# When having a short audio of a couple of sentences and running inference on a GPU the response time is very fast (less than 2 seconds). # noqa: E501
	18	+# Run this with `sudo -E python scripts/client.py`
17	19
18	20	CHUNK = 2**12
19	21	AUDIO_RECORD_CMD = [
20	22	"ffmpeg",
21		- # "-hide_banner",
	23	+ "-hide_banner",
22	24	# "-loglevel",
23	25	# "quiet",
24	26	"-f",
...	...	@@ -27,15 +29,6 @@
27	29	"default",
28	30	"-f",
29	31	"wav",
30		- # "-ac",
31		- # "1",
32		- # "-ar",
33		- # "16000",
34		- # "-f",
35		- # "s16le",
36		- # "-acodec",
37		- # "pcm_s16le",
38		- # "-",
39	32	]
40	33	COPY_TO_CLIPBOARD_CMD = "wl-copy"
41	34	OPENAI_BASE_URL = "ws://localhost:8000/v1"
...	...	@@ -48,12 +41,13 @@
48	41
49	42	client = httpx.Client(base_url=OPENAI_BASE_URL, timeout=TIMEOUT)
50	43	is_running = threading.Event()
51		-file = Path("test.wav") # TODO: use tempfile
	44	+
	45	+file = Path("test.wav") # HACK: I had a hard time trying to use a temporary file due to permissions issues
52	46
53	47
54	48	while True:
55	49	keyboard.wait(KEYBIND)
56		- print("Action started")
	50	+ print("Recording started")
57	51	process = subprocess.Popen(
58	52	[*AUDIO_RECORD_CMD, "-y", str(file.name)],
59	53	stdout=subprocess.PIPE,
...	...	@@ -63,17 +57,27 @@
63	57	)
64	58	keyboard.wait(KEYBIND)
65	59	process.kill()
66		- print("Action finished")
	60	+ stdout, stderr = process.communicate()
	61	+ if stdout or stderr:
	62	+ print(f"stdout: {stdout}")
	63	+ print(f"stderr: {stderr}")
	64	+ print(f"Recording finished. File size: {file.stat().st_size} bytes")
67	65
68		- with open(file, "rb") as f:
69		- res = client.post(
70		- OPENAI_BASE_URL + TRANSCRIBE_PATH,
71		- files={"file": f},
72		- data={
73		- "response_format": RESPONSE_FORMAT,
74		- "language": LANGUAGE,
75		- },
76		- )
	66	+ try:
	67	+ with open(file, "rb") as fd:
	68	+ start = time.perf_counter()
	69	+ res = client.post(
	70	+ OPENAI_BASE_URL + TRANSCRIBE_PATH,
	71	+ files={"file": fd},
	72	+ data={
	73	+ "response_format": RESPONSE_FORMAT,
	74	+ "language": LANGUAGE,
	75	+ },
	76	+ )
	77	+ end = time.perf_counter()
	78	+ print(f"Transcription took {end - start} seconds")
77	79	transcription = res.text
78	80	print(transcription)
79	81	subprocess.run([COPY_TO_CLIPBOARD_CMD], input=transcription.encode(), check=True)
	82	+ except httpx.ConnectError as e:
	83	+ print(f"Couldn't connect to server: {e}")

Delete comment