Spaces:

DeepImagix
/

self-trained2

Running

App Files Files Community

DeepImagix commited on 9 days ago

Commit

1971de2

verified ·

1 Parent(s): e155766

Upload neurones_vision.py

Browse files

Files changed (1) hide show

models/neurones_vision.py +108 -31

models/neurones_vision.py CHANGED Viewed

@@ -1,66 +1,143 @@
 """
 Neurones Vision 1.0
 ===================
-NeuraPrompt's multimodal model. Powered by Llama 4 Scout (17B) on Groq.
-Can see and understand images, read text in images (OCR), describe scenes,
-and answer questions about visual content.
-To upgrade: change `groq_model` and bump `version`. That's it.
 """
 MODEL = {
-    # ── Identity ────────────────────────────────────────────────
     "id":           "neurones-vision-1.0",
     "display_name": "Neurones Vision 1.0",
-    "version":      "1.0",
     "release_date": "2026-03-29",
-    "tagline":      "NeuraPrompt's eyes. Sees, reads, and understands images.",
-    # ── Speed ───────────────────────────────────────────────────
     "speed":        "balanced",
     "speed_label":  "👁️ Vision",
-    # ── Backend ─────────────────────────────────────────────────
-    # llama-4-scout supports image input via Groq's vision API
-    "groq_model":          "openai/gpt-oss-120b",
-    "groq_vision_model":   "openai/gpt-oss-120b",  # same model handles vision
-    "max_tokens":          4096,
-    "temperature":         0.6,
-    # ── Capabilities ────────────────────────────────────────────
     "can_stream":         False,
-    "can_reason":         False,
-    "can_vision":         True,    # Image upload enabled for this model
-    "can_generate_image": True,   # Analysis only, not generation
     "can_search":         False,
     "can_code":           False,
-    "can_translate":      True,
     "can_summarise":      True,
     "is_local":           False,
-    # ── Limits ──────────────────────────────────────────────────
     "context_window":  16384,
     "rate_limit_rpm":  10,
-    # ── System Prompt ───────────────────────────────────────────
     "system_prompt": (
-        "You are Neurones Vision 1.0, NeuraPrompt's multimodal AI model, "
         "created by Andile Mtolo (Toxic Dee Modder). "
-        "When given an image, analyse it thoroughly: describe what you see, "
-        "read and transcribe any text visible in the image, identify objects, "
-        "people, locations, colours, and context. "
-        "Answer questions about images with precision and detail. "
-        "If text is present in the image, always extract and include it in your response."
     ),
-    # ── UI Hints ─────────────────────────────────────────────────
     "badge_color":  "#ff6d00",
     "icon":         "👁️",
     "recommended_for": [
-        "image analysis", "OCR / text extraction",
-        "visual Q&A", "document scanning", "photo description",
     ],
     "not_recommended_for": [
-        "deep mathematical reasoning", "generating images",
     ],
 }

 """
 Neurones Vision 1.0
 ===================
+NeuraPrompt's multimodal model. Powered by meta-llama/llama-4-scout-17b on Groq.
+Handles: images, documents (PDF/text/code), OCR, file analysis, visual Q&A.
+NOT for: general chat, math, coding tasks — redirects user to another model.
+Datasets: scans models/datasets/ for image-related datasets automatically.
 """
+import pathlib
+import json
+import logging
+# ── Dataset scanner ──────────────────────────────────────────────
+DATASETS_DIR = pathlib.Path(__file__).parent / "datasets"
+# Keywords that indicate a dataset is image/vision related
+_IMAGE_KEYWORDS = {
+    "image", "vision", "visual", "photo", "picture", "img",
+    "caption", "scene", "object", "detection", "classify",
+    "ocr", "document", "diagram", "chart", "screenshot",
+}
+def _is_vision_dataset(filepath: pathlib.Path) -> bool:
+    """Heuristic: check filename for vision-related keywords."""
+    name = filepath.stem.lower()
+    return any(kw in name for kw in _IMAGE_KEYWORDS)
+def load_vision_datasets(max_per_file: int = 2000) -> list[dict]:
+    """
+    Load vision/image-related datasets from models/datasets/.
+    Skips text-only datasets based on filename heuristics.
+    Returns list of {prompt, response} pairs for few-shot context.
+    """
+    if not DATASETS_DIR.exists():
+        return []
+    pairs = []
+    for fp in sorted(DATASETS_DIR.iterdir()):
+        if not fp.is_file():
+            continue
+        suffix = "".join(fp.suffixes).lower()
+        if suffix not in (".jsonl", ".jsonl.txt", ".json", ".txt"):
+            continue
+        if not _is_vision_dataset(fp):
+            logging.debug(f"[Vision] Skipping non-vision dataset: {fp.name}")
+            continue
+        count = 0
+        try:
+            with open(fp, "r", encoding="utf-8", errors="replace") as f:
+                for line in f:
+                    line = line.strip()
+                    if not line:
+                        continue
+                    try:
+                        entry = json.loads(line)
+                    except json.JSONDecodeError:
+                        continue
+                    # Extract prompt/response pair
+                    prompt   = (entry.get("question") or entry.get("prompt") or
+                                entry.get("instruction") or entry.get("input") or "")
+                    response = (entry.get("answer") or entry.get("response") or
+                                entry.get("output") or entry.get("caption") or "")
+                    if prompt and response and len(response) > 10:
+                        pairs.append({"prompt": str(prompt)[:300], "response": str(response)[:500]})
+                        count += 1
+                    if count >= max_per_file:
+                        break
+            if count:
+                logging.info(f"[Vision] Loaded {count} pairs from {fp.name}")
+        except Exception as e:
+            logging.warning(f"[Vision] Failed to read {fp.name}: {e}")
+    return pairs
 MODEL = {
+    # ── Identity ─────────────────────────────────────────────────
     "id":           "neurones-vision-1.0",
     "display_name": "Neurones Vision 1.0",
+    "version":      "1.1",
     "release_date": "2026-03-29",
+    "tagline":      "NeuraPrompt's eyes. Sees, reads, and understands images and files.",
+    # ── Speed ────────────────────────────────────────────────────
     "speed":        "balanced",
     "speed_label":  "👁️ Vision",
+    # ── Backend ─────────────────────────────────��────────────────
+    "groq_model":        "meta-llama/llama-4-scout-17b-16e-instruct",
+    "groq_vision_model": "meta-llama/llama-4-scout-17b-16e-instruct",
+    "max_tokens":        4096,
+    "temperature":       0.3,
+    # ── Capabilities ─────────────────────────────────────────────
     "can_stream":         False,
+    "can_reason":         True,
+    "can_vision":         True,
+    "can_files":          True,   # NEW: file analysis
+    "can_generate_image": False,
     "can_search":         False,
     "can_code":           False,
+    "can_translate":      False,
     "can_summarise":      True,
     "is_local":           False,
+    "vision_only":        True,   # main.py uses this to block plain chat
+    # ── Limits ───────────────────────────────────────────────────
     "context_window":  16384,
     "rate_limit_rpm":  10,
+    # ── System Prompt ─────────────────────────────────────────────
     "system_prompt": (
+        "You are Neurones Vision 1.0, NeuraPrompt's visual analysis model, "
         "created by Andile Mtolo (Toxic Dee Modder). "
+        "Your specialty is images and files ONLY.\n\n"
+        "For IMAGES: describe thoroughly, extract all visible text (OCR), "
+        "identify objects, people, colours, scene type, and context. "
+        "Answer any question about the visual content with precision.\n\n"
+        "For FILES/DOCUMENTS: extract text content, summarise key points, "
+        "identify structure (headings, tables, code), and answer questions.\n\n"
+        "If a user sends a plain text message with NO image or file, respond:\n"
+        "'I am Neurones Vision — I specialise in images and files. "
+        "For general chat please switch to Neurones Pro or Flash using the model selector.'\n\n"
+        "Never guess when you cannot see something clearly — say so."
     ),
+    # ── UI Hints ──────────────────────────────────────────────────
     "badge_color":  "#ff6d00",
     "icon":         "👁️",
     "recommended_for": [
+        "image analysis", "OCR / text extraction", "file reading",
+        "document scanning", "photo description", "visual Q&A",
+        "PDF summary", "screenshot analysis",
     ],
     "not_recommended_for": [
+        "general chat", "math", "coding", "real-time search",
     ],
 }