We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent cc706fb commit 3cab3efCopy full SHA for 3cab3ef
llama_cpp/server/__main__.py
@@ -28,7 +28,7 @@
28
class Settings(BaseSettings):
29
model: str
30
n_ctx: int = 2048
31
- n_batch: int = 8
+ n_batch: int = 512
32
n_threads: int = max((os.cpu_count() or 2) // 2, 1)
33
f16_kv: bool = True
34
use_mlock: bool = False # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out...
0 commit comments