

feat: ollama-like ps endpoints
@3b49f1432e7febdd1fbce214df61147011142104
--- faster_whisper_server/main.py
+++ faster_whisper_server/main.py
... | ... | @@ -3,6 +3,7 @@ |
3 | 3 |
import asyncio |
4 | 4 |
from collections import OrderedDict |
5 | 5 |
from contextlib import asynccontextmanager |
6 |
+import gc |
|
6 | 7 |
from io import BytesIO |
7 | 8 |
import time |
8 | 9 |
from typing import TYPE_CHECKING, Annotated, Literal |
... | ... | @@ -107,6 +108,29 @@ |
107 | 108 |
return Response(status_code=200, content="OK") |
108 | 109 |
|
109 | 110 |
|
111 |
+@app.get("/api/ps", tags=["experimental"], summary="Get a list of loaded models.") |
|
112 |
+def get_running_models() -> dict[str, list[str]]: |
|
113 |
+ return {"models": list(loaded_models.keys())} |
|
114 |
+ |
|
115 |
+ |
|
116 |
+@app.post("/api/ps/{model_name:path}", tags=["experimental"], summary="Load a model into memory.") |
|
117 |
+def load_model_route(model_name: str) -> Response: |
|
118 |
+ if model_name in loaded_models: |
|
119 |
+ return Response(status_code=409, content="Model already loaded") |
|
120 |
+ load_model(model_name) |
|
121 |
+ return Response(status_code=201) |
|
122 |
+ |
|
123 |
+ |
|
124 |
+@app.delete("/api/ps/{model_name:path}", tags=["experimental"], summary="Unload a model from memory.") |
|
125 |
+def stop_running_model(model_name: str) -> Response: |
|
126 |
+ model = loaded_models.get(model_name) |
|
127 |
+ if model is not None: |
|
128 |
+ del loaded_models[model_name] |
|
129 |
+ gc.collect() |
|
130 |
+ return Response(status_code=204) |
|
131 |
+ return Response(status_code=404) |
|
132 |
+ |
|
133 |
+ |
|
110 | 134 |
@app.get("/v1/models") |
111 | 135 |
def get_models() -> ModelListResponse: |
112 | 136 |
models = huggingface_hub.list_models(library="ctranslate2", tags="automatic-speech-recognition", cardData=True) |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?