Text Detection Model
YOLO-based text detection model trained on ICDAR2003.
Model
Architecture: YOLO11m
Input size: 640
Metric: mAP50-95 = 0.692
Usage
from huggingface_hub import hf_hub_download
from ultralytics import YOLO
import json
import cv2
import matplotlib.pyplot as plt
# plot function
def visualize_bbox(img_path, predictions, conf_thres=0.8, font=cv2.FONT_HERSHEY_SIMPLEX):
img = cv2.imread(img_path)
for prediction in predictions:
conf_score = prediction["confidence"]
if conf_score < conf_thres:
continue
bbox = prediction["box"]
xmin = int(bbox["x1"])
ymin = int(bbox["y1"])
xmax = int(bbox["x2"])
ymax = int(bbox["y2"])
cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 3)
text = f"{conf_score:.2f}"
(text_width, text_height), _ = cv2.getTextSize(text, font, 1, 2)
cv2.rectangle(
img,
(xmin, ymin - text_height - 5),
(xmin + text_width, ymin),
(0, 255, 0),
-1,
)
cv2.putText(img, text, (xmin, ymin - 5), font, 1, (0, 0, 0), 2)
return img
# download model and sample
model_path = hf_hub_download(
repo_id="huytqvn/text-detection-str-pipeline",
filename="best.pt"
)
sample_path = hf_hub_download(
repo_id="huytqvn/text-detection-str-pipeline",
filename="sample.JPG"
)
# load model
model = YOLO(model_path)
# inference
results = model(sample_path)
predict = json.loads(results[0].to_json())
# visualize
visualize_img = visualize_bbox(sample_path, predict, conf_thres=0.75)
visualize_img = cv2.cvtColor(visualize_img, cv2.COLOR_BGR2RGB)
plt.imshow(visualize_img)
plt.axis("off")
plt.show()
- Downloads last month
- 6