| import timm |
| import torch |
| from PIL import Image |
| from torchvision import transforms |
| import requests |
| from io import BytesIO |
|
|
|
|
| def load_model(): |
| """Load the pre-trained model.""" |
| model = timm.create_model("hf_hub:timm/vit_large_patch14_clip_336.laion2b_ft_in12k_in1k_inat21", pretrained=True) |
| model.eval() |
| return model |
|
|
|
|
| def get_label_names(): |
| """Fetch the class labels from the Hugging Face Hub.""" |
| config_url = "https://huggingface.co/timm/vit_large_patch14_clip_336.laion2b_ft_in12k_in1k_inat21/resolve/main/config.json" |
| response = requests.get(config_url) |
| response.raise_for_status() |
| config = response.json() |
| return config["label_names"] |
|
|
|
|
| def preprocess_image(image_url): |
| """Fetch and preprocess the image.""" |
| preprocess = transforms.Compose([ |
| transforms.Resize(336), |
| transforms.CenterCrop(336), |
| transforms.ToTensor(), |
| transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), |
| ]) |
|
|
| response = requests.get(image_url) |
| response.raise_for_status() |
| image = Image.open(BytesIO(response.content)) |
| input_tensor = preprocess(image).unsqueeze(0) |
| return input_tensor |
|
|
|
|
| def predict_species(model, image_url, label_names): |
| """Make a prediction using the model.""" |
| input_tensor = preprocess_image(image_url) |
|
|
| |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| model = model.to(device) |
| input_tensor = input_tensor.to(device) |
|
|
| |
| with torch.no_grad(): |
| output = model(input_tensor) |
| _, predicted_class = torch.max(output, 1) |
|
|
| |
| predicted_species = label_names[predicted_class.item()] |
| return predicted_species |
| |