Spaces:
Running on A10G
Running on A10G
Update app.py
Browse files
app.py
CHANGED
|
@@ -87,9 +87,25 @@ N_CTX = int(os.getenv("N_CTX", "32768"))
|
|
| 87 |
print(f"[MODEL] Loading {REPO_ID} ...", flush=True)
|
| 88 |
print(f"[MODEL] n_gpu_layers={N_GPU_LAYERS}, n_ctx={N_CTX}", flush=True)
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
n_gpu_layers=N_GPU_LAYERS,
|
| 94 |
n_ctx=N_CTX,
|
| 95 |
verbose=True,
|
|
|
|
| 87 |
print(f"[MODEL] Loading {REPO_ID} ...", flush=True)
|
| 88 |
print(f"[MODEL] n_gpu_layers={N_GPU_LAYERS}, n_ctx={N_CTX}", flush=True)
|
| 89 |
|
| 90 |
+
# ββ Split GGUF: 3κ° μ€λ μ λΆ λ€μ΄λ‘λ νμ ββ
|
| 91 |
+
from huggingface_hub import hf_hub_download
|
| 92 |
+
|
| 93 |
+
GGUF_SHARDS = [
|
| 94 |
+
"merged_109838c2-q8_0-00001-of-00003.gguf",
|
| 95 |
+
"merged_109838c2-q8_0-00002-of-00003.gguf",
|
| 96 |
+
"merged_109838c2-q8_0-00003-of-00003.gguf",
|
| 97 |
+
]
|
| 98 |
+
|
| 99 |
+
shard_paths = []
|
| 100 |
+
for shard in GGUF_SHARDS:
|
| 101 |
+
print(f"[MODEL] Downloading {shard} ...", flush=True)
|
| 102 |
+
p = hf_hub_download(repo_id=REPO_ID, filename=shard)
|
| 103 |
+
shard_paths.append(p)
|
| 104 |
+
print(f"[MODEL] β {p}", flush=True)
|
| 105 |
+
|
| 106 |
+
# 첫 λ²μ§Έ μ€λ κ²½λ‘λ‘ λ‘λ (llama.cppκ° κ°μ ν΄λμ λλ¨Έμ§ μλ κ°μ§)
|
| 107 |
+
llm = Llama(
|
| 108 |
+
model_path=shard_paths[0],
|
| 109 |
n_gpu_layers=N_GPU_LAYERS,
|
| 110 |
n_ctx=N_CTX,
|
| 111 |
verbose=True,
|