Spaces:
Sleeping
Sleeping
File size: 1,942 Bytes
1bacd77 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | """
ECHO ULTIMATE β HuggingFace Space GPU Training Entrypoint.
Runs full GRPO training then pushes adapter to HF Hub.
Hardware: T4 medium or A10G small (set in Space settings).
"""
import os
import sys
import logging
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s β %(message)s",
handlers=[logging.StreamHandler(sys.stdout)],
)
import threading
from fastapi import FastAPI
from fastapi.responses import PlainTextResponse
import uvicorn
# ββ Tiny status server on :7860 so HF Space health checks pass ββββββββββββββββ
status_app = FastAPI()
training_log = []
@status_app.get("/health")
def health():
return {"status": "training", "log_lines": len(training_log)}
@status_app.get("/log", response_class=PlainTextResponse)
def log():
return "\n".join(training_log[-100:])
def run_status_server():
uvicorn.run(status_app, host="0.0.0.0", port=7860, log_level="warning")
threading.Thread(target=run_status_server, daemon=True).start()
# ββ Training ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
print("=" * 60)
print("π ECHO ULTIMATE β GRPO Training on HF GPU Space")
print("=" * 60)
from config import cfg
from env.task_bank import TaskBank
from training.train import train
bank = TaskBank()
bank.download_all()
hf_token = os.environ.get("HF_TOKEN", "")
use_wandb = bool(os.environ.get("WANDB_API_KEY", ""))
train(
model_name=cfg.MODEL_NAME,
output_dir=cfg.MODEL_SAVE_DIR,
task_bank=bank,
use_wandb=use_wandb,
)
print("\nπ Training complete! Space will stay running β check /log for details.")
# Keep the status server alive after training
import time
while True:
time.sleep(60)
|