Upload ViT base model classifier

Browse files

Files changed (6) hide show

README.md +8 -51
config.json +71 -26
label_mapping.json +101 -0
model.safetensors +2 -2
modeling.py +39 -0
preprocessor_config.json +4 -7

README.md CHANGED Viewed

@@ -1,56 +1,13 @@
----
-license: apache-2.0
-tags:
-  - image-classification
-  - vision-transformer
-  - civitai
-  - vit5
-  - runpod
-pipeline_tag: image-classification
----
-# GenGlasses — CivitAI Model Reverse Engineer
-Predicts the **SD checkpoint**, **model family**, and **tags** from a generated image.
-Backbone: fine-tuned [ViT-5](https://arxiv.org/abs/2602.08071).
-RunPod handler: [cjyu81/genglassrunpod](https://github.com/cjyu81/genglassrunpod)
-## Repo contents
-| File | Description |
-|------|-------------|
-| `model.safetensors` | Model weights (HF-standard format) |
-| `config.json` | Architecture config + training metadata |
-| `preprocessor_config.json` | Image preprocessing parameters |
-| `label_maps.json` | Index ↔ label maps for all three heads |
-| `training_log.json` | Full run history + seen image IDs |
-| `best.pth` | Legacy PyTorch checkpoint (backward compat) |
-## Label Space
-- **Models**  : 50
-- **Families**: 8 — `Flux, Illustrious, NoobAI, Other, Pony, SD 1.5, SDXL, ZImageTurbo`
-- **Tags**    : 108
-## Training History
-| Run | Date | Skip Scrape | Fresh Train | New Images | Best Val Loss |
-|-----|------|------------|-------------|-----------|---------------|
-| 1 | 2026-04-04 | True | True | 831 | 2.3535 |
-## RunPod Endpoint
-> **Important:** Use RunPod **Serverless → New Endpoint → Custom** (Docker image), NOT "Custom Deployment" (HF model link).
-```json
-{"input": {"image": "<base64-jpeg>"}}
-```
-## Local usage
 ```python
-from safetensors.torch import load_file
-from huggingface_hub import hf_hub_download
-import json, torch
-state = load_file(hf_hub_download('Charlie81/genglasses', 'model.safetensors'))
-cfg   = json.load(open(hf_hub_download('Charlie81/genglasses', 'config.json')))
-maps  = json.load(open(hf_hub_download('Charlie81/genglasses', 'label_maps.json')))
-# rebuild CivitClassifier, load_state_dict(state), then predict
-```

+# Base Model Classifier (ViT)
+This model predicts the **base model used to generate an AI image**.
+- Backbone: vit_base_patch16_224
+- Classes: 31
+- Task: image classification
+## Usage
 ```python
+from modeling import predict
+print(predict("example.jpg"))

config.json CHANGED Viewed

@@ -1,28 +1,73 @@
 {
-  "model_type": "civit_classifier",
-  "architectures": [
-    "CivitClassifier"
-  ],
-  "torch_dtype": "float32",
-  "backbone_name": "vit5_base",
-  "embed_dim": 768,
-  "img_size": 224,
-  "num_model_classes": 50,
-  "num_family_classes": 8,
-  "num_tags": 108,
-  "last_epoch": 9,
-  "best_val_loss": 2.353485876514066,
-  "label_maps_sha": "bf09b7611f3734afff21bc3cdacdfe71",
-  "image_mean": [
-    0.485,
-    0.456,
-    0.406
-  ],
-  "image_std": [
-    0.229,
-    0.224,
-    0.225
-  ],
-  "tag_threshold": 0.35,
-  "top_k_tags": 10
 }

 {
+  "model_type": "custom_timm_vit_image_classifier",
+  "backbone_name": "vit_base_patch16_224",
+  "num_labels": 31,
+  "id2label": {
+    "0": "Chroma",
+    "1": "Flux.1 D",
+    "2": "Flux.1 DFlux.1 D",
+    "3": "Flux.1 DIllustrious",
+    "4": "Flux.1 Kontext",
+    "5": "Flux.1 S",
+    "6": "HiDream",
+    "7": "Illustrious",
+    "8": "Imagen4",
+    "9": "Nano Banana",
+    "10": "NoobAI",
+    "11": "OpenAI",
+    "12": "Other",
+    "13": "Pony",
+    "14": "Pony V7",
+    "15": "Qwen",
+    "16": "SD 1.5",
+    "17": "SD 3",
+    "18": "SD 3.5",
+    "19": "SD 3.5 Large",
+    "20": "SD 3.5 Medium",
+    "21": "SDXL 1.0",
+    "22": "SDXL 1.0SDXL 1.0",
+    "23": "SDXL Hyper",
+    "24": "SDXL Lightning",
+    "25": "SDXL Turbo",
+    "26": "Seedream",
+    "27": "Veo 3",
+    "28": "Wan Video 14B i2v 720p",
+    "29": "Wan Video 14B t2v",
+    "30": "Wan Video 2.2 I2V-A14B"
+  },
+  "label2id": {
+    "Chroma": 0,
+    "Flux.1 D": 1,
+    "Flux.1 DFlux.1 D": 2,
+    "Flux.1 DIllustrious": 3,
+    "Flux.1 Kontext": 4,
+    "Flux.1 S": 5,
+    "HiDream": 6,
+    "Illustrious": 7,
+    "Imagen4": 8,
+    "Nano Banana": 9,
+    "NoobAI": 10,
+    "OpenAI": 11,
+    "Other": 12,
+    "Pony": 13,
+    "Pony V7": 14,
+    "Qwen": 15,
+    "SD 1.5": 16,
+    "SD 3": 17,
+    "SD 3.5": 18,
+    "SD 3.5 Large": 19,
+    "SD 3.5 Medium": 20,
+    "SDXL 1.0": 21,
+    "SDXL 1.0SDXL 1.0": 22,
+    "SDXL Hyper": 23,
+    "SDXL Lightning": 24,
+    "SDXL Turbo": 25,
+    "Seedream": 26,
+    "Veo 3": 27,
+    "Wan Video 14B i2v 720p": 28,
+    "Wan Video 14B t2v": 29,
+    "Wan Video 2.2 I2V-A14B": 30
+  },
+  "image_size": 224,
+  "library_name": "timm"
 }

label_mapping.json ADDED Viewed

	@@ -0,0 +1,101 @@

+{
+  "model2idx": {
+    "Chroma": 0,
+    "Flux.1 D": 1,
+    "Flux.1 DFlux.1 D": 2,
+    "Flux.1 DIllustrious": 3,
+    "Flux.1 Kontext": 4,
+    "Flux.1 S": 5,
+    "HiDream": 6,
+    "Illustrious": 7,
+    "Imagen4": 8,
+    "Nano Banana": 9,
+    "NoobAI": 10,
+    "OpenAI": 11,
+    "Other": 12,
+    "Pony": 13,
+    "Pony V7": 14,
+    "Qwen": 15,
+    "SD 1.5": 16,
+    "SD 3": 17,
+    "SD 3.5": 18,
+    "SD 3.5 Large": 19,
+    "SD 3.5 Medium": 20,
+    "SDXL 1.0": 21,
+    "SDXL 1.0SDXL 1.0": 22,
+    "SDXL Hyper": 23,
+    "SDXL Lightning": 24,
+    "SDXL Turbo": 25,
+    "Seedream": 26,
+    "Veo 3": 27,
+    "Wan Video 14B i2v 720p": 28,
+    "Wan Video 14B t2v": 29,
+    "Wan Video 2.2 I2V-A14B": 30
+  },
+  "idx2model": {
+    "0": "Chroma",
+    "1": "Flux.1 D",
+    "2": "Flux.1 DFlux.1 D",
+    "3": "Flux.1 DIllustrious",
+    "4": "Flux.1 Kontext",
+    "5": "Flux.1 S",
+    "6": "HiDream",
+    "7": "Illustrious",
+    "8": "Imagen4",
+    "9": "Nano Banana",
+    "10": "NoobAI",
+    "11": "OpenAI",
+    "12": "Other",
+    "13": "Pony",
+    "14": "Pony V7",
+    "15": "Qwen",
+    "16": "SD 1.5",
+    "17": "SD 3",
+    "18": "SD 3.5",
+    "19": "SD 3.5 Large",
+    "20": "SD 3.5 Medium",
+    "21": "SDXL 1.0",
+    "22": "SDXL 1.0SDXL 1.0",
+    "23": "SDXL Hyper",
+    "24": "SDXL Lightning",
+    "25": "SDXL Turbo",
+    "26": "Seedream",
+    "27": "Veo 3",
+    "28": "Wan Video 14B i2v 720p",
+    "29": "Wan Video 14B t2v",
+    "30": "Wan Video 2.2 I2V-A14B"
+  },
+  "labels": [
+    "Chroma",
+    "Flux.1 D",
+    "Flux.1 DFlux.1 D",
+    "Flux.1 DIllustrious",
+    "Flux.1 Kontext",
+    "Flux.1 S",
+    "HiDream",
+    "Illustrious",
+    "Imagen4",
+    "Nano Banana",
+    "NoobAI",
+    "OpenAI",
+    "Other",
+    "Pony",
+    "Pony V7",
+    "Qwen",
+    "SD 1.5",
+    "SD 3",
+    "SD 3.5",
+    "SD 3.5 Large",
+    "SD 3.5 Medium",
+    "SDXL 1.0",
+    "SDXL 1.0SDXL 1.0",
+    "SDXL Hyper",
+    "SDXL Lightning",
+    "SDXL Turbo",
+    "Seedream",
+    "Veo 3",
+    "Wan Video 14B i2v 720p",
+    "Wan Video 14B t2v",
+    "Wan Video 2.2 I2V-A14B"
+  ]
+}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b25f4ced64dbb9cb7860b8f411f49ba22c19b28f18d8c2707fbc90ecc9ff1d1e
-size 343721040

 version https://git-lfs.github.com/spec/v1
+oid sha256:95db8bf14f3cb609c3398ee29130d9b7a500bf087ab1796c031f34af39265b5e
+size 343304068

modeling.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import json, torch, timm
+from PIL import Image
+from safetensors.torch import load_file
+from torchvision import transforms
+MODEL_NAME = "vit_base_patch16_224"
+IMG_SIZE = 224
+MEAN = [0.485, 0.456, 0.406]
+STD = [0.229, 0.224, 0.225]
+def load_model(repo_dir="."):
+    with open(f"{repo_dir}/config.json") as f:
+        cfg = json.load(f)
+    model = timm.create_model(MODEL_NAME, pretrained=False, num_classes=cfg["num_labels"])
+    state = load_file(f"{repo_dir}/model.safetensors")
+    model.load_state_dict(state)
+    model.eval()
+    return model, cfg
+def predict(image_path, repo_dir="."):
+    model, cfg = load_model(repo_dir)
+    tfm = transforms.Compose([
+        transforms.Resize(256),
+        transforms.CenterCrop(IMG_SIZE),
+        transforms.ToTensor(),
+        transforms.Normalize(MEAN, STD),
+    ])
+    img = Image.open(image_path).convert("RGB")
+    x = tfm(img).unsqueeze(0)
+    with torch.no_grad():
+        logits = model(x)
+        pred = logits.argmax(-1).item()
+    return cfg["id2label"][str(pred)]

preprocessor_config.json CHANGED Viewed

@@ -1,5 +1,4 @@
 {
-  "feature_extractor_type": "ImageFeatureExtractor",
   "image_mean": [
     0.485,
     0.456,
@@ -10,10 +9,8 @@
     0.224,
     0.225
   ],
-  "size": 224,
-  "crop_size": 224,
-  "resample": 3,
-  "do_resize": true,
-  "do_center_crop": true,
-  "do_normalize": true
 }

 {
   "image_mean": [
     0.485,
     0.456,
     0.224,
     0.225
   ],
+  "size": {
+    "height": 224,
+    "width": 224
+  }
 }