# optimized for coding
# max context:	32k headless
# 1. Set Environment Variables
export LD_LIBRARY_PATH="/home/eaman/llama/bin_vulkan" 

# /home/eaman/lm/models/mradermacher/Qwen3.6-27B/Qwen3.6-27B.i1-IQ4_XS-attn_qkv-IQ4_XS.gguf
# /home/eaman/lm/models/localweights/Qwen3.6-27B-MTP-IQ4_XS-Q8nextn-GGUF/Qwen3.6-27B-MTP-IQ4_XS-Q8nextn.gguf
	#  --spec-draft-n-max 2 \
# Headless
#	--ctx-size 32768 \

# 2. Run the Server
/home/eaman/llama/bin_vulkan/llama-server \
 -m /home/eaman/lm/models/localweights/Qwen3.6-27B-MTP-IQ4_XS-Q8nextn-GGUF/Qwen3.6-27B-MTP-IQ4_XS-Q8nextn.gguf \
	--host 0.0.0.0 	-np 1 \
	--fit-target 50 \
	-ctk q5_1 \
	-ctv q4_0 \
	-fa on \
	--temp 0.7  --top-k 30 --top-p 0.95 --min-p 0.0 \
	--repeat-penalty 1.0 --presence_penalty 0.0 \
	-b 128 \
	--jinja  \
	--no-mmap \
	--spec-type draft-mtp --spec-draft-p-min 0.75  --spec-draft-n-max 3 \
	-ctkd q8_0 -ctvd q8_0 \
	--ctx-size 30000 \
	-ngl 99 \
	--cache-ram 6000 -ngl 99 -lv 3 --no-warmup \


