Spaces:

Asad-ullah008
/

asad-ai

Running

File size: 8,290 Bytes

# ================================================================
#  ASAD AI — GRADIO APP  v4.0 (HF Space compatible)
#  • Auto‑trains every 24h in background
#  • Persistent storage /data/
#  • No localhost launch errors
# ================================================================

import os, json, random, time, datetime, threading, logging
import numpy as np
import torch
import gradio as gr
from train import run_training, bow, clean, AsadAIModel, BASE_DATA
from sklearn.preprocessing import LabelEncoder

logging.basicConfig(level=logging.INFO,
                    format="%(asctime)s [APP] %(message)s",
                    datefmt="%H:%M:%S")
log = logging.getLogger(__name__)

STORAGE_DIR = os.environ.get("STORAGE_DIR", "/data")
MODEL_PATH  = os.path.join(STORAGE_DIR, "asad_ai_best.pth")
INFO_PATH   = os.path.join(STORAGE_DIR, "model_info.json")
DATA_PATH   = os.path.join(STORAGE_DIR, "training_data.json")
RETRAIN_EVERY = 24 * 60 * 60

_model = None
_vocab = []
_le    = None
_all_data = BASE_DATA
_last_trained = None
_is_training = False
_lock = threading.Lock()

def load_from_disk():
    global _model, _vocab, _le, _all_data, _last_trained
    try:
        if not os.path.exists(INFO_PATH) or not os.path.exists(MODEL_PATH):
            return False
        with open(INFO_PATH, 'r') as f:
            info = json.load(f)
        le = LabelEncoder()
        le.classes_ = np.array(info['tags'])
        m = AsadAIModel(info['input_size'], info['hidden_size'], info['output_size'])
        m.load_state_dict(torch.load(MODEL_PATH, map_location='cpu', weights_only=True))
        m.eval()
        if os.path.exists(DATA_PATH):
            with open(DATA_PATH, 'r', encoding='utf-8') as f:
                _all_data = json.load(f)
        with _lock:
            _model = m
            _vocab = info['vocab']
            _le    = le
            if 'trained_at' in info:
                try:
                    _last_trained = datetime.datetime.strptime(info['trained_at'], "%Y-%m-%d %H:%M:%S")
                except:
                    _last_trained = datetime.datetime.now()
        log.info("✅ Loaded saved model")
        return True
    except Exception as e:
        log.warning(f"Load failed: {e}")
        return False

def do_train():
    global _model, _vocab, _le, _all_data, _last_trained, _is_training
    with _lock:
        _is_training = True
    try:
        result = run_training()
        if result:
            m, v, le, data = result
            with _lock:
                _model = m
                _vocab = v
                _le    = le
                _all_data = data
                _last_trained = datetime.datetime.now()
            log.info("✅ New model active")
        else:
            load_from_disk()
    finally:
        with _lock:
            _is_training = False

def scheduler_loop():
    log.info("🕐 Scheduler started – training now, then every 24h")
    do_train()
    while True:
        time.sleep(RETRAIN_EVERY)
        do_train()

def get_response(text, threshold=0.40):
    with _lock:
        m, v, le, data = _model, _vocab, _le, _all_data
    if m is None:
        return "⏳ Model abhi train ho raha hai – thodi der mein aao!", "loading", 0.0
    b = bow(text, v)
    t = torch.FloatTensor(b).unsqueeze(0)
    with torch.no_grad():
        out   = m(t)
        probs = torch.softmax(out, dim=1)
        conf, cls = torch.max(probs, 1)
    conf_val = conf.item()
    tag = le.inverse_transform(cls.numpy())[0]
    if conf_val < threshold:
        tag = "unknown"
    for intent in data.get("intents", []):
        if intent["tag"] == tag and intent.get("responses"):
            return random.choice(intent["responses"]), tag, conf_val
    return "Maafi chahta hoon!", "unknown", 0.0

def chat_fn(message, history):
    if not message.strip():
        return ""
    resp, _, _ = get_response(message)
    return resp

def get_status():
    with _lock:
        training = _is_training
        lt = _last_trained
    uptime = str(datetime.datetime.now() - _start_time).split('.')[0]
    if training:
        return f"### 🔄 Training in progress...\n⏳ Please wait.\n🕐 Uptime: `{uptime}`"
    if lt:
        nxt = lt + datetime.timedelta(seconds=RETRAIN_EVERY)
        rem = max(nxt - datetime.datetime.now(), datetime.timedelta(0))
        h = int(rem.total_seconds() // 3600)
        m = int((rem.total_seconds() % 3600) // 60)
        return f"### ✅ Model ready\n📅 Last trained: `{lt.strftime('%Y-%m-%d %H:%M:%S')}`\n⏰ Next: `{h}h {m}m`\n🕐 Uptime: `{uptime}`"
    return f"### ⏳ Waiting for first training...\n🕐 Uptime: `{uptime}`"

def get_info():
    try:
        if not os.path.exists(INFO_PATH):
            return "No model yet."
        with open(INFO_PATH, 'r') as f:
            info = json.load(f)
        return "\n\n".join([
            f"🎯 Best accuracy: `{info.get('best_acc','?')}%`",
            f"📚 Vocab size: `{len(info.get('vocab',[]))}`",
            f"🗂️ Intents: `{info.get('intents_n', len(info.get('tags',[])))}`",
            f"📝 Patterns: `{info.get('patterns_n','?')}`",
            f"⏱️ Training time: `{info.get('elapsed_s','?')}s`"
        ])
    except Exception:
        return "Info not available."

def get_logs():
    log_file = os.path.join(STORAGE_DIR, "train_log.jsonl")
    try:
        if not os.path.exists(log_file):
            return "No logs yet."
        with open(log_file, 'r') as f:
            lines = f.readlines()[-8:]
        out = []
        for line in lines:
            try:
                d = json.loads(line)
                out.append(f"[{d.get('ts','')}] {d.get('event','')} | loss={d.get('loss','?')} acc={d.get('acc','?')}%")
            except:
                out.append(line.strip())
        return '\n'.join(out)
    except:
        return "Log read error."

# ── Start background scheduler ──
_start_time = datetime.datetime.now()
load_from_disk()
_thread = threading.Thread(target=scheduler_loop, daemon=True)
_thread.start()

# ── Gradio UI (HF Space safe) ──
CSS = """
.gradio-container { max-width: 980px !important; margin: auto !important; }
footer { display: none !important; }
"""

HEADER = """
<div style="background: linear-gradient(135deg, #064e3b, #047857); border-radius: 18px; padding: 30px 28px; text-align: center; color: white;">
  <h1>🤖 Asad AI</h1>
  <p>Pakistan ka Bilingual AI Chatbot — Urdu & English</p>
  <p style="margin-top: 10px;"><span style="background: rgba(255,255,255,0.15); border-radius: 20px; padding: 4px 14px;">🇵🇰 Made in Pakistan</span> <span style="background: rgba(255,255,255,0.15); border-radius: 20px; padding: 4px 14px;">🔄 Auto-trains every 24h</span></p>
</div>
"""

EXAMPLES = [
    "Assalamualaikum! Kya haal hai?",
    "Tumhara naam kya hai?",
    "Ek mazedaar joke sunao!",
    "Python programming kaise seekhein?",
    "Pakistan ke baare mein batao",
    "Mujhe motivation chahiye 💪",
    "2 + 2 kya hota hai?",
]

with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald"), css=CSS) as demo:
    gr.HTML(HEADER)
    with gr.Tabs():
        with gr.Tab("💬 Chat"):
            gr.ChatInterface(fn=chat_fn, examples=EXAMPLES)
        with gr.Tab("📊 Training Status"):
            status_md = gr.Markdown(get_status())
            info_md   = gr.Markdown(get_info())
            log_box   = gr.Textbox(label="Recent logs", lines=6, interactive=False)
            gr.Button("🔄 Refresh").click(fn=lambda: (get_status(), get_info(), get_logs()),
                                          outputs=[status_md, info_md, log_box])
            demo.load(fn=lambda: (get_status(), get_info(), get_logs()),
                      outputs=[status_md, info_md, log_box])
        with gr.Tab("ℹ️ About"):
            gr.Markdown("""
## 🧠 Asad AI – Technical Details
- **Neural network:** 4 layers (256→256→128→output)
- **Training:** 400 epochs, AdamW, Cosine annealing
- **Datasets:** Claude Opus (38k) + DeepSeek traces (4k) + base intents
- **Auto‑retrain every 24h** – persists in `/data`
- **Bilingual:** Urdu, English, Hinglish
            """)

# ✅ HF Space par launch – without share=True, server_name already set internally
demo.launch(server_name="0.0.0.0", server_port=7860)