Fedir Zadniprovskyi 2024-09-05
feat: ollama-like ps endpoints
@3b49f1432e7febdd1fbce214df61147011142104
faster_whisper_server/main.py
--- faster_whisper_server/main.py
+++ faster_whisper_server/main.py
@@ -3,6 +3,7 @@
 import asyncio
 from collections import OrderedDict
 from contextlib import asynccontextmanager
+import gc
 from io import BytesIO
 import time
 from typing import TYPE_CHECKING, Annotated, Literal
@@ -107,6 +108,29 @@
     return Response(status_code=200, content="OK")
 
 
+@app.get("/api/ps", tags=["experimental"], summary="Get a list of loaded models.")
+def get_running_models() -> dict[str, list[str]]:
+    return {"models": list(loaded_models.keys())}
+
+
+@app.post("/api/ps/{model_name:path}", tags=["experimental"], summary="Load a model into memory.")
+def load_model_route(model_name: str) -> Response:
+    if model_name in loaded_models:
+        return Response(status_code=409, content="Model already loaded")
+    load_model(model_name)
+    return Response(status_code=201)
+
+
+@app.delete("/api/ps/{model_name:path}", tags=["experimental"], summary="Unload a model from memory.")
+def stop_running_model(model_name: str) -> Response:
+    model = loaded_models.get(model_name)
+    if model is not None:
+        del loaded_models[model_name]
+        gc.collect()
+        return Response(status_code=204)
+    return Response(status_code=404)
+
+
 @app.get("/v1/models")
 def get_models() -> ModelListResponse:
     models = huggingface_hub.list_models(library="ctranslate2", tags="automatic-speech-recognition", cardData=True)
Add a comment
List