# optimized for coding
# max context:	81K headless
# 1. Set Environment Variables
export LD_LIBRARY_PATH="/home/eaman/llama/bin_vulkan" 

# 2. Run the Server
/home/eaman/llama/bin_vulkan/llama-server  \
 -m /home/eaman/lm/models/mradermacher/Qwen3.6-27B/Qwen3.6-27B.i1-IQ4_XS-attn_qkv-IQ4_XS.gguf \
	--host 0.0.0.0 	-np 1 -fa on --no-mmap \
	--fit-target 50 \
	-ctk q8_0 \
	-ctv q5_0 \
	--temp 0.5 --min-p 0.0 \
	--repeat-penalty 1.0 --presence_penalty 0.0 \
	-b 512 \
	--jinja  \
	--reasoning-budget 1 \
	--chat-template-kwargs '{"enable_thinking":false}' \
	--spec-type ngram-mod \
	--spec-ngram-mod-n-match 8 \
	--spec-ngram-mod-n-min 3 \
	--spec-ngram-mod-n-max 24 \
	-lv 4 \
