Spaces:

Inpris
/

Humains-Junior

Sleeping

NS-Y commited on Nov 2, 2025

Commit

1a26838

verified ·

1 Parent(s): 0de4069

Upload 3 files

Files changed (3) hide show

README.md CHANGED Viewed

@@ -22,6 +22,6 @@ A Gradio Space that applies the Appendix-style prompt: the model must prioritize
 - `HF_TOKEN` — required if the model is gated.
 **Files**
-- `app.py` — Gradio app
-- `requirements.txt` — dependencies
 - `examples/` — (optional) assets/presets

 - `HF_TOKEN` — required if the model is gated.
 **Files**
+- `app.py` — Gradio app (cache disabled to avoid DynamicCache issues)
+- `requirements.txt` — dependencies (pins transformers 4.43.3, accelerate 0.32.1)
 - `examples/` — (optional) assets/presets

app.py CHANGED Viewed

@@ -68,6 +68,17 @@ def load_model(model_id: str = DEFAULT_MODEL):
         use_auth_token=auth,
         trust_remote_code=TRUST_REMOTE_CODE,
     )
     return _tokenizer, _model
 def generate_text(question: str, context: str, temperature: float, top_p: float, max_new_tokens: int, model_id: str):
@@ -81,7 +92,8 @@ def generate_text(question: str, context: str, temperature: float, top_p: float,
             temperature=temperature,
             top_p=top_p,
             max_new_tokens=max_new_tokens,
-            pad_token_id=tokenizer.eos_token_id,
         )
     text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

         use_auth_token=auth,
         trust_remote_code=TRUST_REMOTE_CODE,
     )
+    # Safety: ensure pad_token_id is set
+    if _tokenizer.pad_token_id is None and _tokenizer.eos_token_id is not None:
+        _tokenizer.pad_token_id = _tokenizer.eos_token_id
+    # Prefer static cache if available to avoid DynamicCache issues in some remote code
+    try:
+        _model.generation_config.cache_implementation = "static"
+    except Exception:
+        pass
     return _tokenizer, _model
 def generate_text(question: str, context: str, temperature: float, top_p: float, max_new_tokens: int, model_id: str):
             temperature=temperature,
             top_p=top_p,
             max_new_tokens=max_new_tokens,
+            pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
+            use_cache=False,  # <-- avoid DynamicCache path in custom modeling code
         )
     text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 gradio>=4.44.0
-transformers>=4.44.0
-accelerate>=0.33.0
 torch
 sentencepiece

 gradio>=4.44.0
+transformers==4.43.3
+accelerate==0.32.1
 torch
 sentencepiece