| """ |
| Agent Q3 [Evo] — Feedback Collector |
| Captures DPO / RLHF signal pairs from inference runs. |
| Writes to HF dataset madDegen/agent-q3-trainingevo. |
| """ |
| import json, os, datetime |
| from pathlib import Path |
|
|
| FEEDBACK_DIR = Path(os.getenv("FEEDBACK_DIR", "./feedback")) |
|
|
| def record(prompt: str, chosen: str, rejected: str, domain: str = "general"): |
| """Record a DPO preference pair.""" |
| FEEDBACK_DIR.mkdir(exist_ok=True) |
| entry = { |
| "timestamp": datetime.datetime.utcnow().isoformat(), |
| "domain": domain, |
| "prompt": prompt, |
| "chosen": chosen, |
| "rejected": rejected, |
| } |
| date_str = datetime.date.today().isoformat() |
| out_file = FEEDBACK_DIR / f"feedback_{date_str}.jsonl" |
| with open(out_file, "a") as f: |
| f.write(json.dumps(entry) + "\n") |
| return entry |
|
|
| def push_to_hf(token: str = None): |
| """Push accumulated feedback to madDegen/agent-q3-trainingevo.""" |
| from datasets import Dataset, load_dataset |
| import glob |
| token = token or os.getenv("HF_TOKEN") |
| files = sorted(glob.glob(str(FEEDBACK_DIR / "*.jsonl"))) |
| rows = [] |
| for fp in files: |
| with open(fp) as f: |
| rows.extend(json.loads(l) for l in f if l.strip()) |
| if not rows: |
| print("No feedback to push.") |
| return |
| ds = Dataset.from_list(rows) |
| ds.push_to_hub("madDegen/agent-q3-trainingevo", token=token, split="train") |
| print(f"Pushed {len(rows)} feedback pairs to HF.") |
|
|
| if __name__ == "__main__": |
| push_to_hf() |
|
|