surrogate1 commited on
Commit
548096f
·
verified ·
1 Parent(s): 9fe5ee4

fix: 14B + bnb int4

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -8,7 +8,7 @@ import gradio as gr
8
  import spaces
9
  import torch
10
 
11
- BASE_MODEL = os.environ.get("BASE_MODEL", "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ")
12
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
13
 
14
  DOMAIN_SEEDS = {
@@ -50,9 +50,16 @@ def _load_lazy():
50
  BASE_MODEL, token=HF_TOKEN or None, trust_remote_code=True)
51
  if _tok.pad_token_id is None:
52
  _tok.pad_token_id = _tok.eos_token_id
 
 
 
 
 
 
 
53
  _model = AutoModelForCausalLM.from_pretrained(
54
  BASE_MODEL, token=HF_TOKEN or None, trust_remote_code=True,
55
- device_map="cuda", torch_dtype=torch.bfloat16)
56
  return _model, _tok
57
 
58
 
 
8
  import spaces
9
  import torch
10
 
11
+ BASE_MODEL = os.environ.get("BASE_MODEL", "Qwen/Qwen2.5-Coder-14B-Instruct")
12
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
13
 
14
  DOMAIN_SEEDS = {
 
50
  BASE_MODEL, token=HF_TOKEN or None, trust_remote_code=True)
51
  if _tok.pad_token_id is None:
52
  _tok.pad_token_id = _tok.eos_token_id
53
+ from transformers import BitsAndBytesConfig
54
+ bnb = BitsAndBytesConfig(
55
+ load_in_4bit=True,
56
+ bnb_4bit_compute_dtype=torch.bfloat16,
57
+ bnb_4bit_quant_type="nf4",
58
+ bnb_4bit_use_double_quant=True,
59
+ )
60
  _model = AutoModelForCausalLM.from_pretrained(
61
  BASE_MODEL, token=HF_TOKEN or None, trust_remote_code=True,
62
+ device_map="cuda", quantization_config=bnb)
63
  return _model, _tok
64
 
65