madDegen commited on
Commit
ce752c2
·
verified ·
1 Parent(s): 66c8911

consolidate: Evo DPO/RLHF feedback collector

Browse files
Files changed (1) hide show
  1. evo/feedback_collector.py +45 -0
evo/feedback_collector.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Agent Q3 [Evo] — Feedback Collector
3
+ Captures DPO / RLHF signal pairs from inference runs.
4
+ Writes to HF dataset madDegen/agent-q3-trainingevo.
5
+ """
6
+ import json, os, datetime
7
+ from pathlib import Path
8
+
9
+ FEEDBACK_DIR = Path(os.getenv("FEEDBACK_DIR", "./feedback"))
10
+
11
+ def record(prompt: str, chosen: str, rejected: str, domain: str = "general"):
12
+ """Record a DPO preference pair."""
13
+ FEEDBACK_DIR.mkdir(exist_ok=True)
14
+ entry = {
15
+ "timestamp": datetime.datetime.utcnow().isoformat(),
16
+ "domain": domain,
17
+ "prompt": prompt,
18
+ "chosen": chosen,
19
+ "rejected": rejected,
20
+ }
21
+ date_str = datetime.date.today().isoformat()
22
+ out_file = FEEDBACK_DIR / f"feedback_{date_str}.jsonl"
23
+ with open(out_file, "a") as f:
24
+ f.write(json.dumps(entry) + "\n")
25
+ return entry
26
+
27
+ def push_to_hf(token: str = None):
28
+ """Push accumulated feedback to madDegen/agent-q3-trainingevo."""
29
+ from datasets import Dataset, load_dataset
30
+ import glob
31
+ token = token or os.getenv("HF_TOKEN")
32
+ files = sorted(glob.glob(str(FEEDBACK_DIR / "*.jsonl")))
33
+ rows = []
34
+ for fp in files:
35
+ with open(fp) as f:
36
+ rows.extend(json.loads(l) for l in f if l.strip())
37
+ if not rows:
38
+ print("No feedback to push.")
39
+ return
40
+ ds = Dataset.from_list(rows)
41
+ ds.push_to_hub("madDegen/agent-q3-trainingevo", token=token, split="train")
42
+ print(f"Pushed {len(rows)} feedback pairs to HF.")
43
+
44
+ if __name__ == "__main__":
45
+ push_to_hf()