Spaces:
Running
Running
Upload neurones_vision.py
Browse files- models/neurones_vision.py +108 -31
models/neurones_vision.py
CHANGED
|
@@ -1,66 +1,143 @@
|
|
| 1 |
"""
|
| 2 |
Neurones Vision 1.0
|
| 3 |
===================
|
| 4 |
-
NeuraPrompt's multimodal model. Powered by
|
| 5 |
-
|
| 6 |
-
|
| 7 |
|
| 8 |
-
|
| 9 |
"""
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
MODEL = {
|
| 12 |
-
# ββ Identity ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 13 |
"id": "neurones-vision-1.0",
|
| 14 |
"display_name": "Neurones Vision 1.0",
|
| 15 |
-
"version": "1.
|
| 16 |
"release_date": "2026-03-29",
|
| 17 |
-
"tagline": "NeuraPrompt's eyes. Sees, reads, and understands images.",
|
| 18 |
|
| 19 |
-
# ββ Speed βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 20 |
"speed": "balanced",
|
| 21 |
"speed_label": "ποΈ Vision",
|
| 22 |
|
| 23 |
-
# ββ Backend βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 24 |
-
|
| 25 |
-
"
|
| 26 |
-
"
|
| 27 |
-
"
|
| 28 |
-
"temperature": 0.6,
|
| 29 |
|
| 30 |
-
# ββ Capabilities ββββββββββββββββββββββββββββββββββββββββββββ
|
| 31 |
"can_stream": False,
|
| 32 |
-
"can_reason":
|
| 33 |
-
"can_vision": True,
|
| 34 |
-
"
|
|
|
|
| 35 |
"can_search": False,
|
| 36 |
"can_code": False,
|
| 37 |
-
"can_translate":
|
| 38 |
"can_summarise": True,
|
| 39 |
"is_local": False,
|
|
|
|
| 40 |
|
| 41 |
-
# ββ Limits ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 42 |
"context_window": 16384,
|
| 43 |
"rate_limit_rpm": 10,
|
| 44 |
|
| 45 |
-
# ββ System Prompt βββββββββββββββββββββββββββββββββββββββββββ
|
| 46 |
"system_prompt": (
|
| 47 |
-
"You are Neurones Vision 1.0, NeuraPrompt's
|
| 48 |
"created by Andile Mtolo (Toxic Dee Modder). "
|
| 49 |
-
"
|
| 50 |
-
"
|
| 51 |
-
"
|
| 52 |
-
"Answer
|
| 53 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
),
|
| 55 |
|
| 56 |
-
# ββ UI Hints βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 57 |
"badge_color": "#ff6d00",
|
| 58 |
"icon": "ποΈ",
|
| 59 |
"recommended_for": [
|
| 60 |
-
"image analysis", "OCR / text extraction",
|
| 61 |
-
"
|
|
|
|
| 62 |
],
|
| 63 |
"not_recommended_for": [
|
| 64 |
-
"
|
| 65 |
],
|
| 66 |
}
|
|
|
|
| 1 |
"""
|
| 2 |
Neurones Vision 1.0
|
| 3 |
===================
|
| 4 |
+
NeuraPrompt's multimodal model. Powered by meta-llama/llama-4-scout-17b on Groq.
|
| 5 |
+
Handles: images, documents (PDF/text/code), OCR, file analysis, visual Q&A.
|
| 6 |
+
NOT for: general chat, math, coding tasks β redirects user to another model.
|
| 7 |
|
| 8 |
+
Datasets: scans models/datasets/ for image-related datasets automatically.
|
| 9 |
"""
|
| 10 |
|
| 11 |
+
import pathlib
|
| 12 |
+
import json
|
| 13 |
+
import logging
|
| 14 |
+
|
| 15 |
+
# ββ Dataset scanner ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 16 |
+
DATASETS_DIR = pathlib.Path(__file__).parent / "datasets"
|
| 17 |
+
|
| 18 |
+
# Keywords that indicate a dataset is image/vision related
|
| 19 |
+
_IMAGE_KEYWORDS = {
|
| 20 |
+
"image", "vision", "visual", "photo", "picture", "img",
|
| 21 |
+
"caption", "scene", "object", "detection", "classify",
|
| 22 |
+
"ocr", "document", "diagram", "chart", "screenshot",
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
def _is_vision_dataset(filepath: pathlib.Path) -> bool:
|
| 26 |
+
"""Heuristic: check filename for vision-related keywords."""
|
| 27 |
+
name = filepath.stem.lower()
|
| 28 |
+
return any(kw in name for kw in _IMAGE_KEYWORDS)
|
| 29 |
+
|
| 30 |
+
def load_vision_datasets(max_per_file: int = 2000) -> list[dict]:
|
| 31 |
+
"""
|
| 32 |
+
Load vision/image-related datasets from models/datasets/.
|
| 33 |
+
Skips text-only datasets based on filename heuristics.
|
| 34 |
+
Returns list of {prompt, response} pairs for few-shot context.
|
| 35 |
+
"""
|
| 36 |
+
if not DATASETS_DIR.exists():
|
| 37 |
+
return []
|
| 38 |
+
|
| 39 |
+
pairs = []
|
| 40 |
+
for fp in sorted(DATASETS_DIR.iterdir()):
|
| 41 |
+
if not fp.is_file():
|
| 42 |
+
continue
|
| 43 |
+
suffix = "".join(fp.suffixes).lower()
|
| 44 |
+
if suffix not in (".jsonl", ".jsonl.txt", ".json", ".txt"):
|
| 45 |
+
continue
|
| 46 |
+
if not _is_vision_dataset(fp):
|
| 47 |
+
logging.debug(f"[Vision] Skipping non-vision dataset: {fp.name}")
|
| 48 |
+
continue
|
| 49 |
+
|
| 50 |
+
count = 0
|
| 51 |
+
try:
|
| 52 |
+
with open(fp, "r", encoding="utf-8", errors="replace") as f:
|
| 53 |
+
for line in f:
|
| 54 |
+
line = line.strip()
|
| 55 |
+
if not line:
|
| 56 |
+
continue
|
| 57 |
+
try:
|
| 58 |
+
entry = json.loads(line)
|
| 59 |
+
except json.JSONDecodeError:
|
| 60 |
+
continue
|
| 61 |
+
|
| 62 |
+
# Extract prompt/response pair
|
| 63 |
+
prompt = (entry.get("question") or entry.get("prompt") or
|
| 64 |
+
entry.get("instruction") or entry.get("input") or "")
|
| 65 |
+
response = (entry.get("answer") or entry.get("response") or
|
| 66 |
+
entry.get("output") or entry.get("caption") or "")
|
| 67 |
+
if prompt and response and len(response) > 10:
|
| 68 |
+
pairs.append({"prompt": str(prompt)[:300], "response": str(response)[:500]})
|
| 69 |
+
count += 1
|
| 70 |
+
if count >= max_per_file:
|
| 71 |
+
break
|
| 72 |
+
|
| 73 |
+
if count:
|
| 74 |
+
logging.info(f"[Vision] Loaded {count} pairs from {fp.name}")
|
| 75 |
+
except Exception as e:
|
| 76 |
+
logging.warning(f"[Vision] Failed to read {fp.name}: {e}")
|
| 77 |
+
|
| 78 |
+
return pairs
|
| 79 |
+
|
| 80 |
+
|
| 81 |
MODEL = {
|
| 82 |
+
# ββ Identity βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 83 |
"id": "neurones-vision-1.0",
|
| 84 |
"display_name": "Neurones Vision 1.0",
|
| 85 |
+
"version": "1.1",
|
| 86 |
"release_date": "2026-03-29",
|
| 87 |
+
"tagline": "NeuraPrompt's eyes. Sees, reads, and understands images and files.",
|
| 88 |
|
| 89 |
+
# ββ Speed ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 90 |
"speed": "balanced",
|
| 91 |
"speed_label": "ποΈ Vision",
|
| 92 |
|
| 93 |
+
# ββ Backend βββββββββββββββββββββββββββββββββοΏ½οΏ½ββββββββββββββββ
|
| 94 |
+
"groq_model": "meta-llama/llama-4-scout-17b-16e-instruct",
|
| 95 |
+
"groq_vision_model": "meta-llama/llama-4-scout-17b-16e-instruct",
|
| 96 |
+
"max_tokens": 4096,
|
| 97 |
+
"temperature": 0.3,
|
|
|
|
| 98 |
|
| 99 |
+
# ββ Capabilities βββββββββββββββββββββββββββββββββββββββββββββ
|
| 100 |
"can_stream": False,
|
| 101 |
+
"can_reason": True,
|
| 102 |
+
"can_vision": True,
|
| 103 |
+
"can_files": True, # NEW: file analysis
|
| 104 |
+
"can_generate_image": False,
|
| 105 |
"can_search": False,
|
| 106 |
"can_code": False,
|
| 107 |
+
"can_translate": False,
|
| 108 |
"can_summarise": True,
|
| 109 |
"is_local": False,
|
| 110 |
+
"vision_only": True, # main.py uses this to block plain chat
|
| 111 |
|
| 112 |
+
# ββ Limits βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 113 |
"context_window": 16384,
|
| 114 |
"rate_limit_rpm": 10,
|
| 115 |
|
| 116 |
+
# ββ System Prompt βββββββββββββββββββββββββββββββββββββββββββββ
|
| 117 |
"system_prompt": (
|
| 118 |
+
"You are Neurones Vision 1.0, NeuraPrompt's visual analysis model, "
|
| 119 |
"created by Andile Mtolo (Toxic Dee Modder). "
|
| 120 |
+
"Your specialty is images and files ONLY.\n\n"
|
| 121 |
+
"For IMAGES: describe thoroughly, extract all visible text (OCR), "
|
| 122 |
+
"identify objects, people, colours, scene type, and context. "
|
| 123 |
+
"Answer any question about the visual content with precision.\n\n"
|
| 124 |
+
"For FILES/DOCUMENTS: extract text content, summarise key points, "
|
| 125 |
+
"identify structure (headings, tables, code), and answer questions.\n\n"
|
| 126 |
+
"If a user sends a plain text message with NO image or file, respond:\n"
|
| 127 |
+
"'I am Neurones Vision β I specialise in images and files. "
|
| 128 |
+
"For general chat please switch to Neurones Pro or Flash using the model selector.'\n\n"
|
| 129 |
+
"Never guess when you cannot see something clearly β say so."
|
| 130 |
),
|
| 131 |
|
| 132 |
+
# ββ UI Hints ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 133 |
"badge_color": "#ff6d00",
|
| 134 |
"icon": "ποΈ",
|
| 135 |
"recommended_for": [
|
| 136 |
+
"image analysis", "OCR / text extraction", "file reading",
|
| 137 |
+
"document scanning", "photo description", "visual Q&A",
|
| 138 |
+
"PDF summary", "screenshot analysis",
|
| 139 |
],
|
| 140 |
"not_recommended_for": [
|
| 141 |
+
"general chat", "math", "coding", "real-time search",
|
| 142 |
],
|
| 143 |
}
|