Spaces:

dcorcoran
/

Pokemon_Card_Image_Processor_API

Running

App Files Files Community

dcorcoran commited on 28 days ago

Commit

6a18d52

1 Parent(s): 28fbb7e

Initial commit

Browse files

Files changed (25) hide show

.dockerignore +0 -0
Dockerfile +18 -0
app/__pycache__/config.cpython-311.pyc +0 -0
app/__pycache__/main.cpython-311.pyc +0 -0
app/__pycache__/schemas.cpython-311.pyc +0 -0
app/config.py +26 -0
app/index/faiss_index.bin +3 -0
app/index/metadata.json +0 -0
app/main.py +81 -0
app/models/embedding_model.py +0 -0
app/models/layout_detector.py +0 -0
app/models/ocr_model.py +0 -0
app/schemas.py +39 -0
app/services/__pycache__/embedding_service.cpython-311.pyc +0 -0
app/services/__pycache__/ocr_service.cpython-311.pyc +0 -0
app/services/__pycache__/similarity_service.cpython-311.pyc +0 -0
app/services/embedding_service.py +43 -0
app/services/ocr_service.py +100 -0
app/services/similarity_service.py +48 -0
app/utils.py +0 -0
requirements.txt +13 -0
training/build_faiss_index.py +0 -0
training/evaluate_similarity.py +0 -0
training/ocr_validation.py +0 -0
training/train_embedding.py +0 -0

.dockerignore ADDED Viewed

File without changes

Dockerfile ADDED Viewed

	@@ -0,0 +1,18 @@

+FROM python:3.11-slim
+WORKDIR /app
+RUN apt-get update && apt-get install -y \
+    tesseract-ocr \
+    libgl1 \
+    libglib2.0-0 \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY app ./app
+EXPOSE 8000
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

app/__pycache__/config.cpython-311.pyc ADDED Viewed

Binary file (1.47 kB). View file

app/__pycache__/main.cpython-311.pyc ADDED Viewed

Binary file (3.65 kB). View file

app/__pycache__/schemas.cpython-311.pyc ADDED Viewed

Binary file (1.86 kB). View file

app/config.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from pydantic_settings import BaseSettings
+from pathlib import Path
+# --------------------------------
+# ----- PATHS --------------------
+# --------------------------------
+BASE_DIR = Path(__file__).resolve().parent
+class Settings(BaseSettings):
+    # Index paths
+    FAISS_INDEX_PATH: str = str(BASE_DIR / "index" / "faiss_index.bin")
+    METADATA_PATH: str = str(BASE_DIR / "index" / "metadata.json")
+    # Model settings
+    EMBEDDING_DIM: int = 2048
+    TOP_K: int = 5
+    # Tesseract path (Windows only)
+    TESSERACT_PATH: str = "C:/Program Files/Tesseract-OCR/tesseract.exe"
+    class Config:
+        env_file = ".env"
+settings = Settings()

app/index/faiss_index.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67c8f11faee0a2a2d30eba64f64aa0f924b413983320ad3dd532e5f174b4d35f
+size 4046893

app/index/metadata.json ADDED Viewed

The diff for this file is too large to render. See raw diff

app/main.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from fastapi import FastAPI, File, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
+from contextlib import asynccontextmanager
+from PIL import Image
+import io
+from app.services.embedding_service import EmbeddingService
+from app.services.similarity_service import SimilarityService
+from app.services.ocr_service import OCRService
+from app.schemas import CardResponse
+from app.config import settings
+# --------------------
+# ----- LIFESPAN -----
+# --------------------
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Load all models and indexes once at startup
+    app.state.embedding_service = EmbeddingService()
+    app.state.similarity_service = SimilarityService()
+    app.state.ocr_service = OCRService()
+    print("Models and index loaded.")
+    yield
+    print("Shutting down.")
+# ---------------
+# ----- APP -----
+# ---------------
+app = FastAPI(
+    title="Pokemon Card Image Processor",
+    version="1.0.0",
+    lifespan=lifespan
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# --------------------
+# ----- ROUTES -------
+# --------------------
+@app.get("/health")
+def health():
+    return {"status": "ok"}
+@app.post("/predict", response_model=CardResponse)
+async def predict(file: UploadFile = File(...)):
+    image_bytes = await file.read()
+    image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+    # Generate embedding and find similar cards first
+    embedding = app.state.embedding_service.embed(image)
+    similar_cards = app.state.similarity_service.search(embedding, top_k=5)
+    # Use OCR for extraction
+    ocr_data = app.state.ocr_service.extract(image)
+    # If top match is very confident, use its metadata to fill in OCR gaps
+    if similar_cards and similar_cards[0]["score"] > 0.99:
+        top_match = similar_cards[0]
+        ocr_data["name"] = ocr_data["name"] or top_match["name"]
+        ocr_data["types"] = ocr_data["types"] or top_match["types"]
+    return CardResponse(
+        name=ocr_data.get("name"),
+        hp=ocr_data.get("hp"),
+        types=ocr_data.get("types"),
+        moves=ocr_data.get("moves"),
+        similar_cards=similar_cards
+    )

app/models/embedding_model.py ADDED Viewed

File without changes

app/models/layout_detector.py ADDED Viewed

File without changes

app/models/ocr_model.py ADDED Viewed

File without changes

app/schemas.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from pydantic import BaseModel
+from typing import Optional
+# --------------------------------
+# ----- SIMILAR CARD -------------
+# --------------------------------
+class SimilarCard(BaseModel):
+    id: str
+    name: str
+    set: Optional[str] = None
+    types: Optional[list[str]] = None
+    rarity: Optional[str] = None
+    image_url: Optional[str] = None
+    score: float
+# --------------------------------
+# ----- MOVE ---------------------
+# --------------------------------
+class Move(BaseModel):
+    name: str
+    damage: Optional[str] = None
+    text: Optional[str] = None
+# --------------------------------
+# ----- CARD RESPONSE ------------
+# --------------------------------
+class CardResponse(BaseModel):
+    name: Optional[str] = None
+    hp: Optional[str] = None
+    types: Optional[list[str]] = None
+    moves: Optional[list[Move]] = None
+    similar_cards: list[SimilarCard] = []

app/services/__pycache__/embedding_service.cpython-311.pyc ADDED Viewed

Binary file (2.76 kB). View file

app/services/__pycache__/ocr_service.cpython-311.pyc ADDED Viewed

Binary file (5.74 kB). View file

app/services/__pycache__/similarity_service.cpython-311.pyc ADDED Viewed

Binary file (2.85 kB). View file

app/services/embedding_service.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import torch
+import torchvision.transforms as transforms
+from torchvision import models
+from PIL import Image
+import numpy as np
+class EmbeddingService:
+    def __init__(self):
+        print("Loading embedding model...")
+        # Load pretrained ResNet50
+        model = models.resnet50(pretrained=True)
+        # Remove final classification layer
+        self.model = torch.nn.Sequential(*list(model.children())[:-1])
+        self.model.eval()
+        # Preprocessing pipeline, must match what was used to build the index (in data_collection/build_faiss_index.py)
+        self.transform = transforms.Compose([
+            transforms.Resize((224, 224)),
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]
+            )
+        ])
+        print("Embedding model loaded.")
+    def embed(self, image: Image.Image) -> np.ndarray:
+        # Preprocess
+        tensor = self.transform(image).unsqueeze(0)
+        # Forward pass
+        with torch.no_grad():
+            embedding = self.model(tensor)
+        # Flatten to 1D and normalize
+        embedding = embedding.squeeze().numpy().astype("float32")
+        embedding = embedding / np.linalg.norm(embedding)
+        return embedding

app/services/ocr_service.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import pytesseract
+import re
+from PIL import Image
+from app.config import settings
+pytesseract.pytesseract.tesseract_cmd = settings.TESSERACT_PATH
+class OCRService:
+    def __init__(self):
+        print("OCR service initialized.")
+    def extract(self, image: Image.Image) -> dict:
+        w, h = image.size
+        # --------------------------------
+        # ----- CROP REGIONS -------------
+        # --------------------------------
+        # Card name — top left area
+        name_region = image.crop((0.15 * w, 0.02 * h, 0.75 * w, 0.10 * h))
+        # HP — top right area
+        hp_region = image.crop((0.60 * w, 0.02 * h, 0.95 * w, 0.10 * h))
+        # Moves — lower middle section
+        moves_region = image.crop((0.00 * w, 0.55 * h, 1.00 * w, 0.85 * h))
+        # Full image for type detection
+        full_text = pytesseract.image_to_string(image)
+        # --------------------------------
+        # ----- EXTRACT FIELDS -----------
+        # --------------------------------
+        return {
+            "name": self._extract_name(name_region),
+            "hp": self._extract_hp(hp_region),
+            "types": self._extract_types(full_text),
+            "moves": self._extract_moves(moves_region),
+        }
+    # --------------------------------
+    # ----- EXTRACTORS ---------------
+    # --------------------------------
+    def _extract_name(self, region: Image.Image) -> str | None:
+        # Upscale region for better OCR accuracy
+        region = region.resize(
+            (region.width * 3, region.height * 3),
+            Image.LANCZOS
+        )
+        text = pytesseract.image_to_string(region, config="--psm 7").strip()
+        return text if text else None
+    def _extract_hp(self, region: Image.Image) -> str | None:
+        region = region.resize(
+            (region.width * 3, region.height * 3),
+            Image.LANCZOS
+        )
+        text = pytesseract.image_to_string(region, config="--psm 7")
+        match = re.search(r'(\d+)\s*HP|HP\s*(\d+)', text, re.IGNORECASE)
+        if match:
+            return match.group(1) or match.group(2)
+        return None
+    def _extract_types(self, text: str) -> list[str] | None:
+        types = [
+            "Fire", "Water", "Grass", "Electric", "Psychic",
+            "Fighting", "Darkness", "Metal", "Colorless",
+            "Dragon", "Fairy", "Lightning", "Normal"
+        ]
+        found = [t for t in types if t.lower() in text.lower()]
+        return found if found else None
+    def _extract_moves(self, region: Image.Image) -> list[dict] | None:
+        region = region.resize(
+            (region.width * 2, region.height * 2),
+            Image.LANCZOS
+        )
+        text = pytesseract.image_to_string(region)
+        lines = [line.strip() for line in text.splitlines() if line.strip()]
+        moves = []
+        i = 0
+        while i < len(lines):
+            # Match move name with damage e.g. "Lightning Flash 20"
+            match = re.match(r'^([A-Z][a-zA-Z\s]+?)\s+(\d+\+?)$', lines[i])
+            if match:
+                moves.append({
+                    "name": match.group(1).strip(),
+                    "damage": match.group(2).strip(),
+                    "text": lines[i + 1] if i + 1 < len(lines) else None
+                })
+                i += 2
+            else:
+                i += 1
+        return moves if moves else None

app/services/similarity_service.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import faiss
+import json
+import numpy as np
+from app.config import settings
+class SimilarityService:
+    def __init__(self):
+        print("Loading FAISS index...")
+        # Load FAISS index
+        self.index = faiss.read_index(settings.FAISS_INDEX_PATH)
+        # Load metadata
+        with open(settings.METADATA_PATH, "r") as f:
+            self.metadata = json.load(f)
+        print(f"FAISS index loaded with {self.index.ntotal} cards.")
+    def search(self, embedding: np.ndarray, top_k: int = 5) -> list:
+        # Reshape to 2D array for FAISS
+        query = embedding.reshape(1, -1).astype("float32")
+        # Normalize for cosine similarity
+        faiss.normalize_L2(query)
+        # Search index
+        scores, indices = self.index.search(query, top_k)
+        # Map results to metadata
+        results = []
+        for score, idx in zip(scores[0], indices[0]):
+            if idx == -1:
+                continue
+            card = self.metadata[idx]
+            results.append({
+                "id": card.get("id"),
+                "name": card.get("name"),
+                "set": card.get("set"),
+                "types": card.get("types"),
+                "rarity": card.get("rarity"),
+                "image_url": card.get("image_url"),
+                "score": float(score)
+            })
+        return results

app/utils.py ADDED Viewed

File without changes

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+fastapi
+uvicorn
+pillow
+numpy
+torch
+torchvision
+faiss-cpu
+pytesseract
+opencv-python
+pydantic
+pydantic-settings
+python-multipart
+requests

training/build_faiss_index.py ADDED Viewed

File without changes

training/evaluate_similarity.py ADDED Viewed

File without changes

training/ocr_validation.py ADDED Viewed

File without changes

training/train_embedding.py ADDED Viewed

File without changes