HowieHwong commited on
Commit
3bf42c1
·
verified ·
1 Parent(s): 8b6e796

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +50 -1
README.md CHANGED
@@ -55,7 +55,56 @@ model = AutoModelForCausalLM.from_pretrained(
55
  tokenizer = AutoTokenizer.from_pretrained(base_model_id)
56
 
57
  # Load LoRA adapter
58
- model = PeftModel.from_pretrained(model, "YOUR_USERNAME/ppopt-llama-3.1-8b-lora")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  ```
60
 
61
  ### Merge LoRA (Optional)
 
55
  tokenizer = AutoTokenizer.from_pretrained(base_model_id)
56
 
57
  # Load LoRA adapter
58
+ model = PeftModel.from_pretrained(model, "HowieHwong/ppopt")
59
+ ```
60
+
61
+ ### Inference Example
62
+
63
+ ```python
64
+ from transformers import AutoModelForCausalLM, AutoTokenizer
65
+ from peft import PeftModel
66
+
67
+ # Load model
68
+ base_model_id = "meta-llama/Llama-3.1-8B-Instruct"
69
+ model = AutoModelForCausalLM.from_pretrained(
70
+ base_model_id,
71
+ torch_dtype="auto",
72
+ device_map="auto"
73
+ )
74
+ tokenizer = AutoTokenizer.from_pretrained(base_model_id)
75
+ model = PeftModel.from_pretrained(model, "HowieHwong/ppopt")
76
+
77
+ # Prepare input
78
+ conversation_history = """User: How do I center a div?
79
+ Assistant: You can use flexbox: display: flex; justify-content: center; align-items: center;
80
+ User: What about grid?
81
+ Assistant: With grid: display: grid; place-items: center;"""
82
+
83
+ current_query = "how to make it responsive"
84
+
85
+ prompt = f"""Based on the conversation history and user preferences, optimize the following query into a clearer, more specific prompt.
86
+
87
+ Conversation History:
88
+ {conversation_history}
89
+
90
+ Current Query: {current_query}
91
+
92
+ Optimized Prompt:"""
93
+
94
+ # Generate
95
+ messages = [{"role": "user", "content": prompt}]
96
+ input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device)
97
+
98
+ outputs = model.generate(
99
+ input_ids,
100
+ max_new_tokens=256,
101
+ temperature=0.7,
102
+ do_sample=True,
103
+ pad_token_id=tokenizer.eos_token_id
104
+ )
105
+
106
+ response = tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=True)
107
+ print(response)
108
  ```
109
 
110
  ### Merge LoRA (Optional)