Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- README.md +2 -2
- app.py +13 -1
- requirements.txt +2 -2
README.md
CHANGED
|
@@ -22,6 +22,6 @@ A Gradio Space that applies the Appendix-style prompt: the model must prioritize
|
|
| 22 |
- `HF_TOKEN` — required if the model is gated.
|
| 23 |
|
| 24 |
**Files**
|
| 25 |
-
- `app.py` — Gradio app
|
| 26 |
-
- `requirements.txt` — dependencies
|
| 27 |
- `examples/` — (optional) assets/presets
|
|
|
|
| 22 |
- `HF_TOKEN` — required if the model is gated.
|
| 23 |
|
| 24 |
**Files**
|
| 25 |
+
- `app.py` — Gradio app (cache disabled to avoid DynamicCache issues)
|
| 26 |
+
- `requirements.txt` — dependencies (pins transformers 4.43.3, accelerate 0.32.1)
|
| 27 |
- `examples/` — (optional) assets/presets
|
app.py
CHANGED
|
@@ -68,6 +68,17 @@ def load_model(model_id: str = DEFAULT_MODEL):
|
|
| 68 |
use_auth_token=auth,
|
| 69 |
trust_remote_code=TRUST_REMOTE_CODE,
|
| 70 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
return _tokenizer, _model
|
| 72 |
|
| 73 |
def generate_text(question: str, context: str, temperature: float, top_p: float, max_new_tokens: int, model_id: str):
|
|
@@ -81,7 +92,8 @@ def generate_text(question: str, context: str, temperature: float, top_p: float,
|
|
| 81 |
temperature=temperature,
|
| 82 |
top_p=top_p,
|
| 83 |
max_new_tokens=max_new_tokens,
|
| 84 |
-
pad_token_id=tokenizer.eos_token_id,
|
|
|
|
| 85 |
)
|
| 86 |
text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 87 |
|
|
|
|
| 68 |
use_auth_token=auth,
|
| 69 |
trust_remote_code=TRUST_REMOTE_CODE,
|
| 70 |
)
|
| 71 |
+
|
| 72 |
+
# Safety: ensure pad_token_id is set
|
| 73 |
+
if _tokenizer.pad_token_id is None and _tokenizer.eos_token_id is not None:
|
| 74 |
+
_tokenizer.pad_token_id = _tokenizer.eos_token_id
|
| 75 |
+
|
| 76 |
+
# Prefer static cache if available to avoid DynamicCache issues in some remote code
|
| 77 |
+
try:
|
| 78 |
+
_model.generation_config.cache_implementation = "static"
|
| 79 |
+
except Exception:
|
| 80 |
+
pass
|
| 81 |
+
|
| 82 |
return _tokenizer, _model
|
| 83 |
|
| 84 |
def generate_text(question: str, context: str, temperature: float, top_p: float, max_new_tokens: int, model_id: str):
|
|
|
|
| 92 |
temperature=temperature,
|
| 93 |
top_p=top_p,
|
| 94 |
max_new_tokens=max_new_tokens,
|
| 95 |
+
pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
|
| 96 |
+
use_cache=False, # <-- avoid DynamicCache path in custom modeling code
|
| 97 |
)
|
| 98 |
text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 99 |
|
requirements.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
gradio>=4.44.0
|
| 2 |
-
transformers
|
| 3 |
-
accelerate
|
| 4 |
torch
|
| 5 |
sentencepiece
|
|
|
|
| 1 |
gradio>=4.44.0
|
| 2 |
+
transformers==4.43.3
|
| 3 |
+
accelerate==0.32.1
|
| 4 |
torch
|
| 5 |
sentencepiece
|