| import base64 |
| import io |
| import json |
| from typing import Dict, Any |
| from PIL import Image |
| from transformers import pipeline |
|
|
| class EndpointHandler: |
| """ |
| Custom handler for the ZoeDepth model, fully compliant with the latest |
| Hugging Face Inference Endpoints documentation. |
| The final result is serialized into a single JSON string. |
| """ |
| def __init__(self, path=""): |
| |
| self.pipe = pipeline(task="depth-estimation", model=path) |
| print("Depth estimation pipeline initialized successfully.") |
|
|
| def __call__(self, data: Dict[str, Any]) -> Dict[str, str]: |
| """ |
| This method is called for every API request. |
| |
| Args: |
| data (Dict): The input data dictionary. Can be PIL Image or bytes. |
| |
| Returns: |
| Dict[str, str]: A dictionary with a single key "generated_text", |
| containing a JSON string of the results. |
| """ |
| |
| inputs = data.pop("inputs", data) |
| |
| |
| if isinstance(inputs, Image.Image): |
| image = inputs |
| else: |
| image = Image.open(io.BytesIO(inputs)) |
|
|
| |
| prediction = self.pipe(image) |
| |
| |
| raw_depth_tensor = prediction["predicted_depth"] |
| raw_depth_data = raw_depth_tensor.cpu().tolist() |
|
|
| visual_map_image = prediction["depth"] |
| buffered = io.BytesIO() |
| visual_map_image.save(buffered, format="PNG") |
| visual_map_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8") |
|
|
| |
| results = { |
| "raw_depth_data": raw_depth_data, |
| "visual_depth_map": f"data:image/png;base64,{visual_map_base64}" |
| } |
|
|
| |
| json_output_string = json.dumps(results) |
|
|
| |
| return {"generated_text": json_output_string} |
|
|