Instructions to use dchen0/font_classifier_v4 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use dchen0/font_classifier_v4 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-classification", model="dchen0/font_classifier_v4") pipe("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/parrots.png")# Load model directly from transformers import AutoImageProcessor, AutoModelForImageClassification processor = AutoImageProcessor.from_pretrained("dchen0/font_classifier_v4") model = AutoModelForImageClassification.from_pretrained("dchen0/font_classifier_v4") - Notebooks
- Google Colab
- Kaggle
| """ | |
| Custom DINOv2 model that includes pad_to_square preprocessing in the forward pass. | |
| This allows inference endpoints to automatically apply correct preprocessing. | |
| """ | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| from transformers import Dinov2ForImageClassification | |
| class FontClassifierWithPreprocessing(Dinov2ForImageClassification): | |
| """ | |
| DINOv2 model that automatically applies pad_to_square preprocessing. | |
| This model can be deployed to Inference Endpoints and will automatically | |
| handle preprocessing in the forward pass, so clients can send raw images. | |
| """ | |
| def __init__(self, config): | |
| super().__init__(config) | |
| # Store preprocessing parameters | |
| self.register_buffer('image_mean', torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)) | |
| self.register_buffer('image_std', torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)) | |
| self.target_size = 224 | |
| def pad_to_square_tensor(self, images): | |
| """ | |
| Pad batch of images to square preserving aspect ratio. | |
| Args: | |
| images: Tensor of shape (B, C, H, W) | |
| Returns: | |
| Tensor of shape (B, C, max_size, max_size) | |
| """ | |
| B, C, H, W = images.shape | |
| max_size = max(H, W) | |
| if H == W == max_size: | |
| return images # Already square | |
| # Calculate padding | |
| pad_h = max_size - H | |
| pad_w = max_size - W | |
| pad_top = pad_h // 2 | |
| pad_bottom = pad_h - pad_top | |
| pad_left = pad_w // 2 | |
| pad_right = pad_w - pad_left | |
| # Apply padding (left, right, top, bottom) | |
| padded = F.pad(images, (pad_left, pad_right, pad_top, pad_bottom), value=0) | |
| return padded | |
| def preprocess_images(self, pixel_values): | |
| """ | |
| Apply full preprocessing pipeline to raw or partially processed images. | |
| Args: | |
| pixel_values: Tensor of shape (B, C, H, W) | |
| Returns: | |
| Preprocessed tensor ready for DINOv2 | |
| """ | |
| # Ensure we have a batch dimension | |
| if pixel_values.dim() == 3: | |
| pixel_values = pixel_values.unsqueeze(0) | |
| # Convert to float if needed | |
| if pixel_values.dtype != torch.float32: | |
| pixel_values = pixel_values.float() | |
| # Normalize to [0, 1] if values are in [0, 255] | |
| if pixel_values.max() > 1.0: | |
| pixel_values = pixel_values / 255.0 | |
| # Apply pad_to_square | |
| pixel_values = self.pad_to_square_tensor(pixel_values) | |
| # Resize to target size | |
| if pixel_values.shape[-1] != self.target_size or pixel_values.shape[-2] != self.target_size: | |
| pixel_values = F.interpolate( | |
| pixel_values, | |
| size=(self.target_size, self.target_size), | |
| mode='bilinear', | |
| align_corners=False | |
| ) | |
| # Apply ImageNet normalization | |
| pixel_values = (pixel_values - self.image_mean) / self.image_std | |
| return pixel_values | |
| def forward(self, pixel_values=None, labels=None, **kwargs): | |
| """ | |
| Forward pass with automatic preprocessing. | |
| Args: | |
| pixel_values: Raw or preprocessed images | |
| labels: Optional labels for training | |
| """ | |
| if pixel_values is None: | |
| raise ValueError("pixel_values must be provided") | |
| # Apply preprocessing automatically | |
| processed_pixel_values = self.preprocess_images(pixel_values) | |
| # Call parent forward with preprocessed images | |
| return super().forward(pixel_values=processed_pixel_values, labels=labels, **kwargs) | |
| # Function to convert existing model | |
| def convert_to_preprocessing_model(original_model_path, output_path): | |
| """ | |
| Convert an existing DINOv2 model to include preprocessing. | |
| Args: | |
| original_model_path: Path to original model | |
| output_path: Path to save converted model | |
| """ | |
| print(f"Converting {original_model_path} to include preprocessing...") | |
| # Load original model | |
| original_model = Dinov2ForImageClassification.from_pretrained(original_model_path) | |
| # Create new model with same config | |
| preprocessing_model = FontClassifierWithPreprocessing(original_model.config) | |
| # Copy all weights | |
| preprocessing_model.load_state_dict(original_model.state_dict()) | |
| # Save the new model | |
| preprocessing_model.save_pretrained(output_path, safe_serialization=True) | |
| # Copy processor config (unchanged) | |
| from transformers import AutoImageProcessor | |
| processor = AutoImageProcessor.from_pretrained(original_model_path) | |
| processor.save_pretrained(output_path) | |
| print(f"✅ Converted model saved to {output_path}") | |
| return preprocessing_model | |
| if __name__ == "__main__": | |
| # Example: Convert existing model | |
| convert_to_preprocessing_model( | |
| "dchen0/font-classifier-v4", | |
| "./font-classifier-with-preprocessing" | |
| ) |