davide221 commited on
Commit
d0be991
·
verified ·
1 Parent(s): 3bfaf33

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +16 -7
README.md CHANGED
@@ -60,22 +60,31 @@ Measured with `bench_laguna_generate` from lucebox-hub (dflash autoregressive fo
60
  ### lucebox-hub (dflash + PFlash, recommended for 128K)
61
 
62
  ```bash
 
63
  git clone https://github.com/Luce-Org/lucebox-hub
64
  cd lucebox-hub/dflash
65
- cmake -B build -DCMAKE_CUDA_ARCHITECTURES=86 # 86 for 3090, 89 for 4090, 120 for 5090
 
 
66
  cmake --build build -j
67
 
 
68
  hf download Lucebox/Laguna-XS.2-GGUF laguna-xs2-Q4_K_M.gguf --local-dir models/
69
- hf download poolside/Laguna-XS.2 chat_template.jinja tokenizer.json tokenizer_config.json special_tokens_map.json config.json --local-dir models/Laguna-XS-2
 
70
 
 
 
71
  python3 scripts/server.py \
72
- --target models/laguna-xs2-Q4_K_M.gguf \
73
- --tokenizer models/Laguna-XS-2 \
74
- --port 8000 --max-ctx 131072
 
75
 
 
76
  curl http://localhost:8000/v1/chat/completions \
77
- -H 'Content-Type: application/json' \
78
- -d '{"model":"luce-dflash","messages":[{"role":"user","content":"hello"}],"stream":true}'
79
  ```
80
 
81
  ## License
 
60
  ### lucebox-hub (dflash + PFlash, recommended for 128K)
61
 
62
  ```bash
63
+ # clone
64
  git clone https://github.com/Luce-Org/lucebox-hub
65
  cd lucebox-hub/dflash
66
+
67
+ # build with sm_86 (3090 / A6000)
68
+ cmake -B build -DCMAKE_CUDA_ARCHITECTURES=86
69
  cmake --build build -j
70
 
71
+ # fetch the Q4_K_M GGUF + Poolside tokenizer
72
  hf download Lucebox/Laguna-XS.2-GGUF laguna-xs2-Q4_K_M.gguf --local-dir models/
73
+ hf download poolside/Laguna-XS.2 chat_template.jinja tokenizer.json tokenizer_config.json \
74
+ special_tokens_map.json config.json --local-dir models/Laguna-XS-2
75
 
76
+ # run the OpenAI server (same server.py as qwen35, arch auto-detected from GGUF).
77
+ # -ctk/-ctv q4_0 keeps the 131K KV cache under ~6 GB so weights + KV fit on 24 GB.
78
  python3 scripts/server.py \
79
+ --target models/laguna-xs2-Q4_K_M.gguf \
80
+ --tokenizer models/Laguna-XS-2 \
81
+ --port 8000 --max-ctx 131072 \
82
+ -ctk q4_0 -ctv q4_0
83
 
84
+ # chat
85
  curl http://localhost:8000/v1/chat/completions \
86
+ -H "Content-Type: application/json" \
87
+ -d '{"model":"luce-dflash","messages":[{"role":"user","content":"hello"}],"stream":true}'
88
  ```
89
 
90
  ## License