SeaWolf-AI commited on
Commit
d75309f
Β·
verified Β·
1 Parent(s): cfad124

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -3
app.py CHANGED
@@ -87,9 +87,25 @@ N_CTX = int(os.getenv("N_CTX", "32768"))
87
  print(f"[MODEL] Loading {REPO_ID} ...", flush=True)
88
  print(f"[MODEL] n_gpu_layers={N_GPU_LAYERS}, n_ctx={N_CTX}", flush=True)
89
 
90
- llm = Llama.from_pretrained(
91
- repo_id=REPO_ID,
92
- filename=GGUF_FILE,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  n_gpu_layers=N_GPU_LAYERS,
94
  n_ctx=N_CTX,
95
  verbose=True,
 
87
  print(f"[MODEL] Loading {REPO_ID} ...", flush=True)
88
  print(f"[MODEL] n_gpu_layers={N_GPU_LAYERS}, n_ctx={N_CTX}", flush=True)
89
 
90
+ # ── Split GGUF: 3개 μƒ€λ“œ μ „λΆ€ λ‹€μš΄λ‘œλ“œ ν•„μˆ˜ ──
91
+ from huggingface_hub import hf_hub_download
92
+
93
+ GGUF_SHARDS = [
94
+ "merged_109838c2-q8_0-00001-of-00003.gguf",
95
+ "merged_109838c2-q8_0-00002-of-00003.gguf",
96
+ "merged_109838c2-q8_0-00003-of-00003.gguf",
97
+ ]
98
+
99
+ shard_paths = []
100
+ for shard in GGUF_SHARDS:
101
+ print(f"[MODEL] Downloading {shard} ...", flush=True)
102
+ p = hf_hub_download(repo_id=REPO_ID, filename=shard)
103
+ shard_paths.append(p)
104
+ print(f"[MODEL] β†’ {p}", flush=True)
105
+
106
+ # 첫 번째 μƒ€λ“œ 경둜둜 λ‘œλ“œ (llama.cppκ°€ 같은 ν΄λ”μ˜ λ‚˜λ¨Έμ§€ μžλ™ 감지)
107
+ llm = Llama(
108
+ model_path=shard_paths[0],
109
  n_gpu_layers=N_GPU_LAYERS,
110
  n_ctx=N_CTX,
111
  verbose=True,