NS-Y commited on
Commit
1a26838
·
verified ·
1 Parent(s): 0de4069

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +2 -2
  2. app.py +13 -1
  3. requirements.txt +2 -2
README.md CHANGED
@@ -22,6 +22,6 @@ A Gradio Space that applies the Appendix-style prompt: the model must prioritize
22
  - `HF_TOKEN` — required if the model is gated.
23
 
24
  **Files**
25
- - `app.py` — Gradio app
26
- - `requirements.txt` — dependencies
27
  - `examples/` — (optional) assets/presets
 
22
  - `HF_TOKEN` — required if the model is gated.
23
 
24
  **Files**
25
+ - `app.py` — Gradio app (cache disabled to avoid DynamicCache issues)
26
+ - `requirements.txt` — dependencies (pins transformers 4.43.3, accelerate 0.32.1)
27
  - `examples/` — (optional) assets/presets
app.py CHANGED
@@ -68,6 +68,17 @@ def load_model(model_id: str = DEFAULT_MODEL):
68
  use_auth_token=auth,
69
  trust_remote_code=TRUST_REMOTE_CODE,
70
  )
 
 
 
 
 
 
 
 
 
 
 
71
  return _tokenizer, _model
72
 
73
  def generate_text(question: str, context: str, temperature: float, top_p: float, max_new_tokens: int, model_id: str):
@@ -81,7 +92,8 @@ def generate_text(question: str, context: str, temperature: float, top_p: float,
81
  temperature=temperature,
82
  top_p=top_p,
83
  max_new_tokens=max_new_tokens,
84
- pad_token_id=tokenizer.eos_token_id,
 
85
  )
86
  text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
87
 
 
68
  use_auth_token=auth,
69
  trust_remote_code=TRUST_REMOTE_CODE,
70
  )
71
+
72
+ # Safety: ensure pad_token_id is set
73
+ if _tokenizer.pad_token_id is None and _tokenizer.eos_token_id is not None:
74
+ _tokenizer.pad_token_id = _tokenizer.eos_token_id
75
+
76
+ # Prefer static cache if available to avoid DynamicCache issues in some remote code
77
+ try:
78
+ _model.generation_config.cache_implementation = "static"
79
+ except Exception:
80
+ pass
81
+
82
  return _tokenizer, _model
83
 
84
  def generate_text(question: str, context: str, temperature: float, top_p: float, max_new_tokens: int, model_id: str):
 
92
  temperature=temperature,
93
  top_p=top_p,
94
  max_new_tokens=max_new_tokens,
95
+ pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
96
+ use_cache=False, # <-- avoid DynamicCache path in custom modeling code
97
  )
98
  text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
99
 
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  gradio>=4.44.0
2
- transformers>=4.44.0
3
- accelerate>=0.33.0
4
  torch
5
  sentencepiece
 
1
  gradio>=4.44.0
2
+ transformers==4.43.3
3
+ accelerate==0.32.1
4
  torch
5
  sentencepiece