Qwen3.6-35B-A3B Base

Base configuration for the 35B-A3B UD model with Q4_K_S quantization. Good reference config with 80K context.

qwen3.6.sh

Base — Q4_K_S UD, ngram speculative decoding

# optimized for coding
# 1. Set Environment Variables
export LD_LIBRARY_PATH="/home/eaman/llama/bin_vulkan" 

# 2. Run the Server
/home/eaman/llama/bin_vulkan/llama-server \
 -m /home/eaman/lm/models/unsloth/Qwen3.6-35B-A3B-GGUF/Qwen3.6-35B-A3B-UD-Q4_K_S.gguf \
    --host 0.0.0.0 \
    -np 1 \
    -ctk q5_0 \
    -ctv q4_0 \
    -fa on \
    --temp 0.55 \
    --top-p 0.9 \
    --top-k 30 \
    --min-p 0.0 \
    --repeat-penalty 1.0 \
    -b 512 \
    --fit-target 50 \
    --ctx-size 80000 \
    --jinja  \
    --chat-template-kwargs '{"enable_thinking":false}' \
    --reasoning-budget 1 \
    --no-mmap \
    --spec-type ngram-mod \
    --spec-ngram-mod-n-match 8 \
    --spec-ngram-mod-n-min 3 \
    --spec-ngram-mod-n-max 24 \
    -lv 4 \