Upload ViT base model classifier
Browse files- README.md +8 -51
- config.json +71 -26
- label_mapping.json +101 -0
- model.safetensors +2 -2
- modeling.py +39 -0
- preprocessor_config.json +4 -7
README.md
CHANGED
|
@@ -1,56 +1,13 @@
|
|
| 1 |
-
|
| 2 |
-
license: apache-2.0
|
| 3 |
-
tags:
|
| 4 |
-
- image-classification
|
| 5 |
-
- vision-transformer
|
| 6 |
-
- civitai
|
| 7 |
-
- vit5
|
| 8 |
-
- runpod
|
| 9 |
-
pipeline_tag: image-classification
|
| 10 |
-
---
|
| 11 |
|
| 12 |
-
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
|
| 18 |
-
##
|
| 19 |
|
| 20 |
-
| File | Description |
|
| 21 |
-
|------|-------------|
|
| 22 |
-
| `model.safetensors` | Model weights (HF-standard format) |
|
| 23 |
-
| `config.json` | Architecture config + training metadata |
|
| 24 |
-
| `preprocessor_config.json` | Image preprocessing parameters |
|
| 25 |
-
| `label_maps.json` | Index ↔ label maps for all three heads |
|
| 26 |
-
| `training_log.json` | Full run history + seen image IDs |
|
| 27 |
-
| `best.pth` | Legacy PyTorch checkpoint (backward compat) |
|
| 28 |
-
|
| 29 |
-
## Label Space
|
| 30 |
-
- **Models** : 50
|
| 31 |
-
- **Families**: 8 — `Flux, Illustrious, NoobAI, Other, Pony, SD 1.5, SDXL, ZImageTurbo`
|
| 32 |
-
- **Tags** : 108
|
| 33 |
-
|
| 34 |
-
## Training History
|
| 35 |
-
| Run | Date | Skip Scrape | Fresh Train | New Images | Best Val Loss |
|
| 36 |
-
|-----|------|------------|-------------|-----------|---------------|
|
| 37 |
-
| 1 | 2026-04-04 | True | True | 831 | 2.3535 |
|
| 38 |
-
|
| 39 |
-
## RunPod Endpoint
|
| 40 |
-
> **Important:** Use RunPod **Serverless → New Endpoint → Custom** (Docker image), NOT "Custom Deployment" (HF model link).
|
| 41 |
-
|
| 42 |
-
```json
|
| 43 |
-
{"input": {"image": "<base64-jpeg>"}}
|
| 44 |
-
```
|
| 45 |
-
|
| 46 |
-
## Local usage
|
| 47 |
```python
|
| 48 |
-
from
|
| 49 |
-
|
| 50 |
-
import json, torch
|
| 51 |
-
|
| 52 |
-
state = load_file(hf_hub_download('Charlie81/genglasses', 'model.safetensors'))
|
| 53 |
-
cfg = json.load(open(hf_hub_download('Charlie81/genglasses', 'config.json')))
|
| 54 |
-
maps = json.load(open(hf_hub_download('Charlie81/genglasses', 'label_maps.json')))
|
| 55 |
-
# rebuild CivitClassifier, load_state_dict(state), then predict
|
| 56 |
-
```
|
|
|
|
| 1 |
+
# Base Model Classifier (ViT)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
This model predicts the **base model used to generate an AI image**.
|
| 4 |
|
| 5 |
+
- Backbone: vit_base_patch16_224
|
| 6 |
+
- Classes: 31
|
| 7 |
+
- Task: image classification
|
| 8 |
|
| 9 |
+
## Usage
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
```python
|
| 12 |
+
from modeling import predict
|
| 13 |
+
print(predict("example.jpg"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config.json
CHANGED
|
@@ -1,28 +1,73 @@
|
|
| 1 |
{
|
| 2 |
-
"model_type": "
|
| 3 |
-
"
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"model_type": "custom_timm_vit_image_classifier",
|
| 3 |
+
"backbone_name": "vit_base_patch16_224",
|
| 4 |
+
"num_labels": 31,
|
| 5 |
+
"id2label": {
|
| 6 |
+
"0": "Chroma",
|
| 7 |
+
"1": "Flux.1 D",
|
| 8 |
+
"2": "Flux.1 DFlux.1 D",
|
| 9 |
+
"3": "Flux.1 DIllustrious",
|
| 10 |
+
"4": "Flux.1 Kontext",
|
| 11 |
+
"5": "Flux.1 S",
|
| 12 |
+
"6": "HiDream",
|
| 13 |
+
"7": "Illustrious",
|
| 14 |
+
"8": "Imagen4",
|
| 15 |
+
"9": "Nano Banana",
|
| 16 |
+
"10": "NoobAI",
|
| 17 |
+
"11": "OpenAI",
|
| 18 |
+
"12": "Other",
|
| 19 |
+
"13": "Pony",
|
| 20 |
+
"14": "Pony V7",
|
| 21 |
+
"15": "Qwen",
|
| 22 |
+
"16": "SD 1.5",
|
| 23 |
+
"17": "SD 3",
|
| 24 |
+
"18": "SD 3.5",
|
| 25 |
+
"19": "SD 3.5 Large",
|
| 26 |
+
"20": "SD 3.5 Medium",
|
| 27 |
+
"21": "SDXL 1.0",
|
| 28 |
+
"22": "SDXL 1.0SDXL 1.0",
|
| 29 |
+
"23": "SDXL Hyper",
|
| 30 |
+
"24": "SDXL Lightning",
|
| 31 |
+
"25": "SDXL Turbo",
|
| 32 |
+
"26": "Seedream",
|
| 33 |
+
"27": "Veo 3",
|
| 34 |
+
"28": "Wan Video 14B i2v 720p",
|
| 35 |
+
"29": "Wan Video 14B t2v",
|
| 36 |
+
"30": "Wan Video 2.2 I2V-A14B"
|
| 37 |
+
},
|
| 38 |
+
"label2id": {
|
| 39 |
+
"Chroma": 0,
|
| 40 |
+
"Flux.1 D": 1,
|
| 41 |
+
"Flux.1 DFlux.1 D": 2,
|
| 42 |
+
"Flux.1 DIllustrious": 3,
|
| 43 |
+
"Flux.1 Kontext": 4,
|
| 44 |
+
"Flux.1 S": 5,
|
| 45 |
+
"HiDream": 6,
|
| 46 |
+
"Illustrious": 7,
|
| 47 |
+
"Imagen4": 8,
|
| 48 |
+
"Nano Banana": 9,
|
| 49 |
+
"NoobAI": 10,
|
| 50 |
+
"OpenAI": 11,
|
| 51 |
+
"Other": 12,
|
| 52 |
+
"Pony": 13,
|
| 53 |
+
"Pony V7": 14,
|
| 54 |
+
"Qwen": 15,
|
| 55 |
+
"SD 1.5": 16,
|
| 56 |
+
"SD 3": 17,
|
| 57 |
+
"SD 3.5": 18,
|
| 58 |
+
"SD 3.5 Large": 19,
|
| 59 |
+
"SD 3.5 Medium": 20,
|
| 60 |
+
"SDXL 1.0": 21,
|
| 61 |
+
"SDXL 1.0SDXL 1.0": 22,
|
| 62 |
+
"SDXL Hyper": 23,
|
| 63 |
+
"SDXL Lightning": 24,
|
| 64 |
+
"SDXL Turbo": 25,
|
| 65 |
+
"Seedream": 26,
|
| 66 |
+
"Veo 3": 27,
|
| 67 |
+
"Wan Video 14B i2v 720p": 28,
|
| 68 |
+
"Wan Video 14B t2v": 29,
|
| 69 |
+
"Wan Video 2.2 I2V-A14B": 30
|
| 70 |
+
},
|
| 71 |
+
"image_size": 224,
|
| 72 |
+
"library_name": "timm"
|
| 73 |
}
|
label_mapping.json
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model2idx": {
|
| 3 |
+
"Chroma": 0,
|
| 4 |
+
"Flux.1 D": 1,
|
| 5 |
+
"Flux.1 DFlux.1 D": 2,
|
| 6 |
+
"Flux.1 DIllustrious": 3,
|
| 7 |
+
"Flux.1 Kontext": 4,
|
| 8 |
+
"Flux.1 S": 5,
|
| 9 |
+
"HiDream": 6,
|
| 10 |
+
"Illustrious": 7,
|
| 11 |
+
"Imagen4": 8,
|
| 12 |
+
"Nano Banana": 9,
|
| 13 |
+
"NoobAI": 10,
|
| 14 |
+
"OpenAI": 11,
|
| 15 |
+
"Other": 12,
|
| 16 |
+
"Pony": 13,
|
| 17 |
+
"Pony V7": 14,
|
| 18 |
+
"Qwen": 15,
|
| 19 |
+
"SD 1.5": 16,
|
| 20 |
+
"SD 3": 17,
|
| 21 |
+
"SD 3.5": 18,
|
| 22 |
+
"SD 3.5 Large": 19,
|
| 23 |
+
"SD 3.5 Medium": 20,
|
| 24 |
+
"SDXL 1.0": 21,
|
| 25 |
+
"SDXL 1.0SDXL 1.0": 22,
|
| 26 |
+
"SDXL Hyper": 23,
|
| 27 |
+
"SDXL Lightning": 24,
|
| 28 |
+
"SDXL Turbo": 25,
|
| 29 |
+
"Seedream": 26,
|
| 30 |
+
"Veo 3": 27,
|
| 31 |
+
"Wan Video 14B i2v 720p": 28,
|
| 32 |
+
"Wan Video 14B t2v": 29,
|
| 33 |
+
"Wan Video 2.2 I2V-A14B": 30
|
| 34 |
+
},
|
| 35 |
+
"idx2model": {
|
| 36 |
+
"0": "Chroma",
|
| 37 |
+
"1": "Flux.1 D",
|
| 38 |
+
"2": "Flux.1 DFlux.1 D",
|
| 39 |
+
"3": "Flux.1 DIllustrious",
|
| 40 |
+
"4": "Flux.1 Kontext",
|
| 41 |
+
"5": "Flux.1 S",
|
| 42 |
+
"6": "HiDream",
|
| 43 |
+
"7": "Illustrious",
|
| 44 |
+
"8": "Imagen4",
|
| 45 |
+
"9": "Nano Banana",
|
| 46 |
+
"10": "NoobAI",
|
| 47 |
+
"11": "OpenAI",
|
| 48 |
+
"12": "Other",
|
| 49 |
+
"13": "Pony",
|
| 50 |
+
"14": "Pony V7",
|
| 51 |
+
"15": "Qwen",
|
| 52 |
+
"16": "SD 1.5",
|
| 53 |
+
"17": "SD 3",
|
| 54 |
+
"18": "SD 3.5",
|
| 55 |
+
"19": "SD 3.5 Large",
|
| 56 |
+
"20": "SD 3.5 Medium",
|
| 57 |
+
"21": "SDXL 1.0",
|
| 58 |
+
"22": "SDXL 1.0SDXL 1.0",
|
| 59 |
+
"23": "SDXL Hyper",
|
| 60 |
+
"24": "SDXL Lightning",
|
| 61 |
+
"25": "SDXL Turbo",
|
| 62 |
+
"26": "Seedream",
|
| 63 |
+
"27": "Veo 3",
|
| 64 |
+
"28": "Wan Video 14B i2v 720p",
|
| 65 |
+
"29": "Wan Video 14B t2v",
|
| 66 |
+
"30": "Wan Video 2.2 I2V-A14B"
|
| 67 |
+
},
|
| 68 |
+
"labels": [
|
| 69 |
+
"Chroma",
|
| 70 |
+
"Flux.1 D",
|
| 71 |
+
"Flux.1 DFlux.1 D",
|
| 72 |
+
"Flux.1 DIllustrious",
|
| 73 |
+
"Flux.1 Kontext",
|
| 74 |
+
"Flux.1 S",
|
| 75 |
+
"HiDream",
|
| 76 |
+
"Illustrious",
|
| 77 |
+
"Imagen4",
|
| 78 |
+
"Nano Banana",
|
| 79 |
+
"NoobAI",
|
| 80 |
+
"OpenAI",
|
| 81 |
+
"Other",
|
| 82 |
+
"Pony",
|
| 83 |
+
"Pony V7",
|
| 84 |
+
"Qwen",
|
| 85 |
+
"SD 1.5",
|
| 86 |
+
"SD 3",
|
| 87 |
+
"SD 3.5",
|
| 88 |
+
"SD 3.5 Large",
|
| 89 |
+
"SD 3.5 Medium",
|
| 90 |
+
"SDXL 1.0",
|
| 91 |
+
"SDXL 1.0SDXL 1.0",
|
| 92 |
+
"SDXL Hyper",
|
| 93 |
+
"SDXL Lightning",
|
| 94 |
+
"SDXL Turbo",
|
| 95 |
+
"Seedream",
|
| 96 |
+
"Veo 3",
|
| 97 |
+
"Wan Video 14B i2v 720p",
|
| 98 |
+
"Wan Video 14B t2v",
|
| 99 |
+
"Wan Video 2.2 I2V-A14B"
|
| 100 |
+
]
|
| 101 |
+
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95db8bf14f3cb609c3398ee29130d9b7a500bf087ab1796c031f34af39265b5e
|
| 3 |
+
size 343304068
|
modeling.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import json, torch, timm
|
| 3 |
+
from PIL import Image
|
| 4 |
+
from safetensors.torch import load_file
|
| 5 |
+
from torchvision import transforms
|
| 6 |
+
|
| 7 |
+
MODEL_NAME = "vit_base_patch16_224"
|
| 8 |
+
IMG_SIZE = 224
|
| 9 |
+
MEAN = [0.485, 0.456, 0.406]
|
| 10 |
+
STD = [0.229, 0.224, 0.225]
|
| 11 |
+
|
| 12 |
+
def load_model(repo_dir="."):
|
| 13 |
+
with open(f"{repo_dir}/config.json") as f:
|
| 14 |
+
cfg = json.load(f)
|
| 15 |
+
|
| 16 |
+
model = timm.create_model(MODEL_NAME, pretrained=False, num_classes=cfg["num_labels"])
|
| 17 |
+
state = load_file(f"{repo_dir}/model.safetensors")
|
| 18 |
+
model.load_state_dict(state)
|
| 19 |
+
model.eval()
|
| 20 |
+
return model, cfg
|
| 21 |
+
|
| 22 |
+
def predict(image_path, repo_dir="."):
|
| 23 |
+
model, cfg = load_model(repo_dir)
|
| 24 |
+
|
| 25 |
+
tfm = transforms.Compose([
|
| 26 |
+
transforms.Resize(256),
|
| 27 |
+
transforms.CenterCrop(IMG_SIZE),
|
| 28 |
+
transforms.ToTensor(),
|
| 29 |
+
transforms.Normalize(MEAN, STD),
|
| 30 |
+
])
|
| 31 |
+
|
| 32 |
+
img = Image.open(image_path).convert("RGB")
|
| 33 |
+
x = tfm(img).unsqueeze(0)
|
| 34 |
+
|
| 35 |
+
with torch.no_grad():
|
| 36 |
+
logits = model(x)
|
| 37 |
+
pred = logits.argmax(-1).item()
|
| 38 |
+
|
| 39 |
+
return cfg["id2label"][str(pred)]
|
preprocessor_config.json
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
{
|
| 2 |
-
"feature_extractor_type": "ImageFeatureExtractor",
|
| 3 |
"image_mean": [
|
| 4 |
0.485,
|
| 5 |
0.456,
|
|
@@ -10,10 +9,8 @@
|
|
| 10 |
0.224,
|
| 11 |
0.225
|
| 12 |
],
|
| 13 |
-
"size":
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
"do_center_crop": true,
|
| 18 |
-
"do_normalize": true
|
| 19 |
}
|
|
|
|
| 1 |
{
|
|
|
|
| 2 |
"image_mean": [
|
| 3 |
0.485,
|
| 4 |
0.456,
|
|
|
|
| 9 |
0.224,
|
| 10 |
0.225
|
| 11 |
],
|
| 12 |
+
"size": {
|
| 13 |
+
"height": 224,
|
| 14 |
+
"width": 224
|
| 15 |
+
}
|
|
|
|
|
|
|
| 16 |
}
|