Commit @a9ee91b071c59d4d2ba3e8ead55f032b0c0e0e1e - yjyoon/whisper_server

Fedir Zadniprovskyi 2024-05-23

refactor: simplify tests

@a9ee91b071c59d4d2ba3e8ead55f032b0c0e0e1e

e1a6910

a9ee91b

tests/app_test.py

--- tests/app_test.py

+++ tests/app_test.py


 from speaches.server_models import TranscriptionVerboseResponse
 
 SIMILARITY_THRESHOLD = 0.97
+AUDIO_FILES_LIMIT = 5
+AUDIO_FILE_DIR = "tests/data"
+TRANSCRIBE_ENDPOINT = "/v1/audio/transcriptions?response_format=verbose_json"
 
 
 @pytest.fixture()

         yield client
 
 
+@pytest.fixture()
+def ws(client: TestClient) -> Generator[WebSocketTestSession, None, None]:
+    with client.websocket_connect(TRANSCRIBE_ENDPOINT) as ws:
+        yield ws
+
+
 def get_audio_file_paths():
     file_paths = []
     directory = "tests/data"
-    for filename in reversed(os.listdir(directory)[5:6]):
-        if filename.endswith(".raw"):
-            file_paths.append(os.path.join(directory, filename))
+    for filename in sorted(os.listdir(directory)[:AUDIO_FILES_LIMIT]):
+        file_paths.append(os.path.join(directory, filename))
     return file_paths
 
 

     client: TestClient, data: bytes
 ) -> TranscriptionVerboseResponse:
     response = client.post(
-        "/v1/audio/transcriptions?response_format=verbose_json",
+        TRANSCRIBE_ENDPOINT,
         files={"file": ("audio.raw", data, "audio/raw")},
     )
     data = json.loads(response.json())  # TODO: figure this out

 
 
 @pytest.mark.parametrize("file_path", file_paths)
-def test_ws_audio_transcriptions(client: TestClient, file_path: str):
+def test_ws_audio_transcriptions(
+    client: TestClient, ws: WebSocketTestSession, file_path: str
+):
     with open(file_path, "rb") as file:
         data = file.read()
-        streaming_transcription: TranscriptionVerboseResponse = None  # type: ignore
-        with client.websocket_connect(
-            "/v1/audio/transcriptions?response_format=verbose_json"
-        ) as ws:
-            thread = threading.Thread(
-                target=stream_audio_data, args=(ws, data), kwargs={"speed": 4.0}
-            )
-            thread.start()
-            while True:
-                try:
-                    streaming_transcription = TranscriptionVerboseResponse(
-                        **ws.receive_json()
-                    )
-                except WebSocketDisconnect:
-                    break
-            ws.close()
-        file_transcription = transcribe_audio_data(client, data)
-        s = SequenceMatcher(
-            lambda x: x == " ", file_transcription.text, streaming_transcription.text
-        )
-        assert (
-            s.ratio() > SIMILARITY_THRESHOLD
-        ), f"\nExpected: {file_transcription.text}\nReceived: {streaming_transcription.text}"
+
+    streaming_transcription: TranscriptionVerboseResponse = None  # type: ignore
+    thread = threading.Thread(
+        target=stream_audio_data, args=(ws, data), kwargs={"speed": 4.0}
+    )
+    thread.start()
+    while True:
+        try:
+            streaming_transcription = TranscriptionVerboseResponse(**ws.receive_json())
+        except WebSocketDisconnect:
+            break
+    file_transcription = transcribe_audio_data(client, data)
+    s = SequenceMatcher(
+        lambda x: x == " ", file_transcription.text, streaming_transcription.text
+    )
+    assert (
+        s.ratio() > SIMILARITY_THRESHOLD
+    ), f"\nExpected: {file_transcription.text}\nReceived: {streaming_transcription.text}"

Add a comment

Open 0
Closed 0

List

...	...	@@ -15,6 +15,9 @@
15	15	from speaches.server_models import TranscriptionVerboseResponse
16	16
17	17	SIMILARITY_THRESHOLD = 0.97
	18	+AUDIO_FILES_LIMIT = 5
	19	+AUDIO_FILE_DIR = "tests/data"
	20	+TRANSCRIBE_ENDPOINT = "/v1/audio/transcriptions?response_format=verbose_json"
18	21
19	22
20	23	@pytest.fixture()
...	...	@@ -23,12 +26,17 @@
23	26	yield client
24	27
25	28
	29	+@pytest.fixture()
	30	+def ws(client: TestClient) -> Generator[WebSocketTestSession, None, None]:
	31	+ with client.websocket_connect(TRANSCRIBE_ENDPOINT) as ws:
	32	+ yield ws
	33	+
	34	+
26	35	def get_audio_file_paths():
27	36	file_paths = []
28	37	directory = "tests/data"
29		- for filename in reversed(os.listdir(directory)[5:6]):
30		- if filename.endswith(".raw"):
31		- file_paths.append(os.path.join(directory, filename))
	38	+ for filename in sorted(os.listdir(directory)[:AUDIO_FILES_LIMIT]):
	39	+ file_paths.append(os.path.join(directory, filename))
32	40	return file_paths
33	41
34	42
...	...	@@ -48,7 +56,7 @@
48	56	client: TestClient, data: bytes
49	57	) -> TranscriptionVerboseResponse:
50	58	response = client.post(
51		- "/v1/audio/transcriptions?response_format=verbose_json",
	59	+ TRANSCRIBE_ENDPOINT,
52	60	files={"file": ("audio.raw", data, "audio/raw")},
53	61	)
54	62	data = json.loads(response.json()) # TODO: figure this out
...	...	@@ -56,29 +64,26 @@
56	64
57	65
58	66	@pytest.mark.parametrize("file_path", file_paths)
59		-def test_ws_audio_transcriptions(client: TestClient, file_path: str):
	67	+def test_ws_audio_transcriptions(
	68	+ client: TestClient, ws: WebSocketTestSession, file_path: str
	69	+):
60	70	with open(file_path, "rb") as file:
61	71	data = file.read()
62		- streaming_transcription: TranscriptionVerboseResponse = None # type: ignore
63		- with client.websocket_connect(
64		- "/v1/audio/transcriptions?response_format=verbose_json"
65		- ) as ws:
66		- thread = threading.Thread(
67		- target=stream_audio_data, args=(ws, data), kwargs={"speed": 4.0}
68		- )
69		- thread.start()
70		- while True:
71		- try:
72		- streaming_transcription = TranscriptionVerboseResponse(
73		- **ws.receive_json()
74		- )
75		- except WebSocketDisconnect:
76		- break
77		- ws.close()
78		- file_transcription = transcribe_audio_data(client, data)
79		- s = SequenceMatcher(
80		- lambda x: x == " ", file_transcription.text, streaming_transcription.text
81		- )
82		- assert (
83		- s.ratio() > SIMILARITY_THRESHOLD
84		- ), f"\nExpected: {file_transcription.text}\nReceived: {streaming_transcription.text}"
	72	+
	73	+ streaming_transcription: TranscriptionVerboseResponse = None # type: ignore
	74	+ thread = threading.Thread(
	75	+ target=stream_audio_data, args=(ws, data), kwargs={"speed": 4.0}
	76	+ )
	77	+ thread.start()
	78	+ while True:
	79	+ try:
	80	+ streaming_transcription = TranscriptionVerboseResponse(**ws.receive_json())
	81	+ except WebSocketDisconnect:
	82	+ break
	83	+ file_transcription = transcribe_audio_data(client, data)
	84	+ s = SequenceMatcher(
	85	+ lambda x: x == " ", file_transcription.text, streaming_transcription.text
	86	+ )
	87	+ assert (
	88	+ s.ratio() > SIMILARITY_THRESHOLD
	89	+ ), f"\nExpected: {file_transcription.text}\nReceived: {streaming_transcription.text}"

Delete comment