HZSDU commited on Sep 11, 2025

Commit

d9b768c

verified ·

1 Parent(s): ac03b97

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

yolov8_model/ultralytics/data/__init__.py +15 -0
yolov8_model/ultralytics/data/__pycache__/__init__.cpython-310.pyc +0 -0
yolov8_model/ultralytics/data/__pycache__/augment.cpython-310.pyc +0 -0
yolov8_model/ultralytics/data/__pycache__/base.cpython-310.pyc +0 -0
yolov8_model/ultralytics/data/__pycache__/build.cpython-310.pyc +0 -0
yolov8_model/ultralytics/data/__pycache__/converter.cpython-310.pyc +0 -0
yolov8_model/ultralytics/data/__pycache__/dataset.cpython-310.pyc +0 -0
yolov8_model/ultralytics/data/__pycache__/loaders.cpython-310.pyc +0 -0
yolov8_model/ultralytics/data/__pycache__/utils.cpython-310.pyc +0 -0
yolov8_model/ultralytics/data/dataset.py +375 -0
yolov8_model/ultralytics/data/explorer/__init__.py +5 -0
yolov8_model/ultralytics/data/explorer/__pycache__/__init__.cpython-310.pyc +0 -0
yolov8_model/ultralytics/data/explorer/__pycache__/explorer.cpython-310.pyc +0 -0
yolov8_model/ultralytics/data/explorer/__pycache__/utils.cpython-310.pyc +0 -0
yolov8_model/ultralytics/data/explorer/explorer.py +471 -0
yolov8_model/ultralytics/data/explorer/gui/__init__.py +1 -0
yolov8_model/ultralytics/data/explorer/gui/dash.py +268 -0
yolov8_model/ultralytics/data/explorer/utils.py +166 -0
yolov8_model/ultralytics/data/loaders.py +533 -0
yolov8_model/ultralytics/data/scripts/download_weights.sh +18 -0
yolov8_model/ultralytics/data/scripts/get_coco.sh +60 -0
yolov8_model/ultralytics/data/scripts/get_coco128.sh +17 -0
yolov8_model/ultralytics/data/scripts/get_imagenet.sh +51 -0
yolov8_model/ultralytics/data/split_dota.py +288 -0
yolov8_model/ultralytics/data/utils.py +647 -0
yolov8_model/ultralytics/engine/__init__.py +1 -0
yolov8_model/ultralytics/engine/__pycache__/__init__.cpython-310.pyc +0 -0
yolov8_model/ultralytics/engine/__pycache__/exporter.cpython-310.pyc +0 -0
yolov8_model/ultralytics/engine/__pycache__/model.cpython-310.pyc +0 -0
yolov8_model/ultralytics/engine/__pycache__/predictor.cpython-310.pyc +0 -0
yolov8_model/ultralytics/engine/__pycache__/results.cpython-310.pyc +0 -0
yolov8_model/ultralytics/engine/__pycache__/trainer.cpython-310.pyc +0 -0
yolov8_model/ultralytics/engine/__pycache__/validator.cpython-310.pyc +0 -0
yolov8_model/ultralytics/engine/exporter.py +1099 -0
yolov8_model/ultralytics/engine/model.py +772 -0
yolov8_model/ultralytics/engine/predictor.py +407 -0
yolov8_model/ultralytics/engine/results.py +680 -0
yolov8_model/ultralytics/engine/trainer.py +755 -0
yolov8_model/ultralytics/engine/tuner.py +240 -0
yolov8_model/ultralytics/engine/validator.py +336 -0
yolov8_model/ultralytics/hub/__init__.py +128 -0
yolov8_model/ultralytics/hub/__pycache__/__init__.cpython-310.pyc +0 -0
yolov8_model/ultralytics/hub/__pycache__/auth.cpython-310.pyc +0 -0
yolov8_model/ultralytics/hub/__pycache__/utils.cpython-310.pyc +0 -0
yolov8_model/ultralytics/hub/auth.py +136 -0
yolov8_model/ultralytics/hub/session.py +348 -0
yolov8_model/ultralytics/hub/utils.py +247 -0
yolov8_model/ultralytics/models/__init__.py +7 -0
yolov8_model/ultralytics/models/fastsam/__init__.py +8 -0
yolov8_model/ultralytics/models/fastsam/__pycache__/__init__.cpython-310.pyc +0 -0

yolov8_model/ultralytics/data/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+from .base import BaseDataset
+from .build import build_dataloader, build_yolo_dataset, load_inference_source
+from .dataset import ClassificationDataset, SemanticDataset, YOLODataset
+__all__ = (
+    "BaseDataset",
+    "ClassificationDataset",
+    "SemanticDataset",
+    "YOLODataset",
+    "build_yolo_dataset",
+    "build_dataloader",
+    "load_inference_source",
+)

yolov8_model/ultralytics/data/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (470 Bytes). View file

yolov8_model/ultralytics/data/__pycache__/augment.cpython-310.pyc ADDED Viewed

Binary file (44.5 kB). View file

yolov8_model/ultralytics/data/__pycache__/base.cpython-310.pyc ADDED Viewed

Binary file (11.7 kB). View file

yolov8_model/ultralytics/data/__pycache__/build.cpython-310.pyc ADDED Viewed

Binary file (6.24 kB). View file

yolov8_model/ultralytics/data/__pycache__/converter.cpython-310.pyc ADDED Viewed

Binary file (13.7 kB). View file

yolov8_model/ultralytics/data/__pycache__/dataset.cpython-310.pyc ADDED Viewed

Binary file (14 kB). View file

yolov8_model/ultralytics/data/__pycache__/loaders.cpython-310.pyc ADDED Viewed

Binary file (20.4 kB). View file

yolov8_model/ultralytics/data/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (26.7 kB). View file

yolov8_model/ultralytics/data/dataset.py ADDED Viewed

	@@ -0,0 +1,375 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+import contextlib
+from itertools import repeat
+from multiprocessing.pool import ThreadPool
+from pathlib import Path
+import cv2
+import numpy as np
+import torch
+import torchvision
+from PIL import Image
+from yolov8_model.ultralytics.utils import LOCAL_RANK, NUM_THREADS, TQDM, colorstr, is_dir_writeable
+from yolov8_model.ultralytics.utils.ops import resample_segments
+from .augment import Compose, Format, Instances, LetterBox, classify_augmentations, classify_transforms, v8_transforms
+from .base import BaseDataset
+from .utils import HELP_URL, LOGGER, get_hash, img2label_paths, verify_image, verify_image_label
+# Ultralytics dataset *.cache version, >= 1.0.0 for YOLOv8
+DATASET_CACHE_VERSION = "1.0.3"
+class YOLODataset(BaseDataset):
+    """
+    Dataset class for loading object detection and/or segmentation labels in YOLO format.
+    Args:
+        data (dict, optional): A dataset YAML dictionary. Defaults to None.
+        task (str): An explicit arg to point current task, Defaults to 'detect'.
+    Returns:
+        (torch.utils.data.Dataset): A PyTorch dataset object that can be used for training an object detection model.
+    """
+    def __init__(self, *args, data=None, task="detect", **kwargs):
+        """Initializes the YOLODataset with optional configurations for segments and keypoints."""
+        self.use_segments = task == "segment"
+        self.use_keypoints = task == "pose"
+        self.use_obb = task == "obb"
+        self.data = data
+        assert not (self.use_segments and self.use_keypoints), "Can not use both segments and keypoints."
+        super().__init__(*args, **kwargs)
+    def cache_labels(self, path=Path("./labels.cache")):
+        """
+        Cache dataset labels, check images and read shapes.
+        Args:
+            path (Path): Path where to save the cache file. Default is Path('./labels.cache').
+        Returns:
+            (dict): labels.
+        """
+        x = {"labels": []}
+        nm, nf, ne, nc, msgs = 0, 0, 0, 0, []  # number missing, found, empty, corrupt, messages
+        desc = f"{self.prefix}Scanning {path.parent / path.stem}..."
+        total = len(self.im_files)
+        nkpt, ndim = self.data.get("kpt_shape", (0, 0))
+        if self.use_keypoints and (nkpt <= 0 or ndim not in (2, 3)):
+            raise ValueError(
+                "'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of "
+                "keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'"
+            )
+        with ThreadPool(NUM_THREADS) as pool:
+            results = pool.imap(
+                func=verify_image_label,
+                iterable=zip(
+                    self.im_files,
+                    self.label_files,
+                    repeat(self.prefix),
+                    repeat(self.use_keypoints),
+                    repeat(len(self.data["names"])),
+                    repeat(nkpt),
+                    repeat(ndim),
+                ),
+            )
+            pbar = TQDM(results, desc=desc, total=total)
+            for im_file, lb, shape, segments, keypoint, nm_f, nf_f, ne_f, nc_f, msg in pbar:
+                nm += nm_f
+                nf += nf_f
+                ne += ne_f
+                nc += nc_f
+                if im_file:
+                    x["labels"].append(
+                        dict(
+                            im_file=im_file,
+                            shape=shape,
+                            cls=lb[:, 0:1],  # n, 1
+                            bboxes=lb[:, 1:],  # n, 4
+                            segments=segments,
+                            keypoints=keypoint,
+                            normalized=True,
+                            bbox_format="xywh",
+                        )
+                    )
+                if msg:
+                    msgs.append(msg)
+                pbar.desc = f"{desc} {nf} images, {nm + ne} backgrounds, {nc} corrupt"
+            pbar.close()
+        if msgs:
+            LOGGER.info("\n".join(msgs))
+        if nf == 0:
+            LOGGER.warning(f"{self.prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}")
+        x["hash"] = get_hash(self.label_files + self.im_files)
+        x["results"] = nf, nm, ne, nc, len(self.im_files)
+        x["msgs"] = msgs  # warnings
+        save_dataset_cache_file(self.prefix, path, x)
+        return x
+    def get_labels(self):
+        """Returns dictionary of labels for YOLO training."""
+        self.label_files = img2label_paths(self.im_files)
+        cache_path = Path(self.label_files[0]).parent.with_suffix(".cache")
+        try:
+            cache, exists = load_dataset_cache_file(cache_path), True  # attempt to load a *.cache file
+            assert cache["version"] == DATASET_CACHE_VERSION  # matches current version
+            assert cache["hash"] == get_hash(self.label_files + self.im_files)  # identical hash
+        except (FileNotFoundError, AssertionError, AttributeError):
+            cache, exists = self.cache_labels(cache_path), False  # run cache ops
+        # Display cache
+        nf, nm, ne, nc, n = cache.pop("results")  # found, missing, empty, corrupt, total
+        if exists and LOCAL_RANK in (-1, 0):
+            d = f"Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt"
+            TQDM(None, desc=self.prefix + d, total=n, initial=n)  # display results
+            if cache["msgs"]:
+                LOGGER.info("\n".join(cache["msgs"]))  # display warnings
+        # Read cache
+        [cache.pop(k) for k in ("hash", "version", "msgs")]  # remove items
+        labels = cache["labels"]
+        if not labels:
+            LOGGER.warning(f"WARNING ⚠️ No images found in {cache_path}, training may not work correctly. {HELP_URL}")
+        self.im_files = [lb["im_file"] for lb in labels]  # update im_files
+        # Check if the dataset is all boxes or all segments
+        lengths = ((len(lb["cls"]), len(lb["bboxes"]), len(lb["segments"])) for lb in labels)
+        len_cls, len_boxes, len_segments = (sum(x) for x in zip(*lengths))
+        if len_segments and len_boxes != len_segments:
+            LOGGER.warning(
+                f"WARNING ⚠️ Box and segment counts should be equal, but got len(segments) = {len_segments}, "
+                f"len(boxes) = {len_boxes}. To resolve this only boxes will be used and all segments will be removed. "
+                "To avoid this please supply either a detect or segment dataset, not a detect-segment mixed dataset."
+            )
+            for lb in labels:
+                lb["segments"] = []
+        if len_cls == 0:
+            LOGGER.warning(f"WARNING ⚠️ No labels found in {cache_path}, training may not work correctly. {HELP_URL}")
+        return labels
+    def build_transforms(self, hyp=None):
+        """Builds and appends transforms to the list."""
+        if self.augment:
+            hyp.mosaic = hyp.mosaic if self.augment and not self.rect else 0.0
+            hyp.mixup = hyp.mixup if self.augment and not self.rect else 0.0
+            transforms = v8_transforms(self, self.imgsz, hyp)
+        else:
+            transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), scaleup=False)])
+        transforms.append(
+            Format(
+                bbox_format="xywh",
+                normalize=True,
+                return_mask=self.use_segments,
+                return_keypoint=self.use_keypoints,
+                return_obb=self.use_obb,
+                batch_idx=True,
+                mask_ratio=hyp.mask_ratio,
+                mask_overlap=hyp.overlap_mask,
+            )
+        )
+        return transforms
+    def close_mosaic(self, hyp):
+        """Sets mosaic, copy_paste and mixup options to 0.0 and builds transformations."""
+        hyp.mosaic = 0.0  # set mosaic ratio=0.0
+        hyp.copy_paste = 0.0  # keep the same behavior as previous v8 close-mosaic
+        hyp.mixup = 0.0  # keep the same behavior as previous v8 close-mosaic
+        self.transforms = self.build_transforms(hyp)
+    def update_labels_info(self, label):
+        """
+        Custom your label format here.
+        Note:
+            cls is not with bboxes now, classification and semantic segmentation need an independent cls label
+            Can also support classification and semantic segmentation by adding or removing dict keys there.
+        """
+        bboxes = label.pop("bboxes")
+        segments = label.pop("segments", [])
+        keypoints = label.pop("keypoints", None)
+        bbox_format = label.pop("bbox_format")
+        normalized = label.pop("normalized")
+        # NOTE: do NOT resample oriented boxes
+        segment_resamples = 100 if self.use_obb else 1000
+        if len(segments) > 0:
+            # list[np.array(1000, 2)] * num_samples
+            # (N, 1000, 2)
+            segments = np.stack(resample_segments(segments, n=segment_resamples), axis=0)
+        else:
+            segments = np.zeros((0, segment_resamples, 2), dtype=np.float32)
+        label["instances"] = Instances(bboxes, segments, keypoints, bbox_format=bbox_format, normalized=normalized)
+        return label
+    @staticmethod
+    def collate_fn(batch):
+        """Collates data samples into batches."""
+        new_batch = {}
+        keys = batch[0].keys()
+        values = list(zip(*[list(b.values()) for b in batch]))
+        for i, k in enumerate(keys):
+            value = values[i]
+            if k == "img":
+                value = torch.stack(value, 0)
+            if k in ["masks", "keypoints", "bboxes", "cls", "segments", "obb"]:
+                value = torch.cat(value, 0)
+            new_batch[k] = value
+        new_batch["batch_idx"] = list(new_batch["batch_idx"])
+        for i in range(len(new_batch["batch_idx"])):
+            new_batch["batch_idx"][i] += i  # add target image index for build_targets()
+        new_batch["batch_idx"] = torch.cat(new_batch["batch_idx"], 0)
+        return new_batch
+# Classification dataloaders -------------------------------------------------------------------------------------------
+class ClassificationDataset(torchvision.datasets.ImageFolder):
+    """
+    YOLO Classification Dataset.
+    Args:
+        root (str): Dataset path.
+    Attributes:
+        cache_ram (bool): True if images should be cached in RAM, False otherwise.
+        cache_disk (bool): True if images should be cached on disk, False otherwise.
+        samples (list): List of samples containing file, index, npy, and im.
+        torch_transforms (callable): torchvision transforms applied to the dataset.
+        album_transforms (callable, optional): Albumentations transforms applied to the dataset if augment is True.
+    """
+    def __init__(self, root, args, augment=False, cache=False, prefix=""):
+        """
+        Initialize YOLO object with root, image size, augmentations, and cache settings.
+        Args:
+            root (str): Dataset path.
+            args (Namespace): Argument parser containing dataset related settings.
+            augment (bool, optional): True if dataset should be augmented, False otherwise. Defaults to False.
+            cache (bool | str | optional): Cache setting, can be True, False, 'ram' or 'disk'. Defaults to False.
+        """
+        super().__init__(root=root)
+        if augment and args.fraction < 1.0:  # reduce training fraction
+            self.samples = self.samples[: round(len(self.samples) * args.fraction)]
+        self.prefix = colorstr(f"{prefix}: ") if prefix else ""
+        self.cache_ram = cache is True or cache == "ram"
+        self.cache_disk = cache == "disk"
+        self.samples = self.verify_images()  # filter out bad images
+        self.samples = [list(x) + [Path(x[0]).with_suffix(".npy"), None] for x in self.samples]  # file, index, npy, im
+        scale = (1.0 - args.scale, 1.0)  # (0.08, 1.0)
+        self.torch_transforms = (
+            classify_augmentations(
+                size=args.imgsz,
+                scale=scale,
+                hflip=args.fliplr,
+                vflip=args.flipud,
+                erasing=args.erasing,
+                auto_augment=args.auto_augment,
+                hsv_h=args.hsv_h,
+                hsv_s=args.hsv_s,
+                hsv_v=args.hsv_v,
+            )
+            if augment
+            else classify_transforms(size=args.imgsz, crop_fraction=args.crop_fraction)
+        )
+    def __getitem__(self, i):
+        """Returns subset of data and targets corresponding to given indices."""
+        f, j, fn, im = self.samples[i]  # filename, index, filename.with_suffix('.npy'), image
+        if self.cache_ram and im is None:
+            im = self.samples[i][3] = cv2.imread(f)
+        elif self.cache_disk:
+            if not fn.exists():  # load npy
+                np.save(fn.as_posix(), cv2.imread(f), allow_pickle=False)
+            im = np.load(fn)
+        else:  # read image
+            im = cv2.imread(f)  # BGR
+        # Convert NumPy array to PIL image
+        im = Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
+        sample = self.torch_transforms(im)
+        return {"img": sample, "cls": j}
+    def __len__(self) -> int:
+        """Return the total number of samples in the dataset."""
+        return len(self.samples)
+    def verify_images(self):
+        """Verify all images in dataset."""
+        desc = f"{self.prefix}Scanning {self.root}..."
+        path = Path(self.root).with_suffix(".cache")  # *.cache file path
+        with contextlib.suppress(FileNotFoundError, AssertionError, AttributeError):
+            cache = load_dataset_cache_file(path)  # attempt to load a *.cache file
+            assert cache["version"] == DATASET_CACHE_VERSION  # matches current version
+            assert cache["hash"] == get_hash([x[0] for x in self.samples])  # identical hash
+            nf, nc, n, samples = cache.pop("results")  # found, missing, empty, corrupt, total
+            if LOCAL_RANK in (-1, 0):
+                d = f"{desc} {nf} images, {nc} corrupt"
+                TQDM(None, desc=d, total=n, initial=n)
+                if cache["msgs"]:
+                    LOGGER.info("\n".join(cache["msgs"]))  # display warnings
+            return samples
+        # Run scan if *.cache retrieval failed
+        nf, nc, msgs, samples, x = 0, 0, [], [], {}
+        with ThreadPool(NUM_THREADS) as pool:
+            results = pool.imap(func=verify_image, iterable=zip(self.samples, repeat(self.prefix)))
+            pbar = TQDM(results, desc=desc, total=len(self.samples))
+            for sample, nf_f, nc_f, msg in pbar:
+                if nf_f:
+                    samples.append(sample)
+                if msg:
+                    msgs.append(msg)
+                nf += nf_f
+                nc += nc_f
+                pbar.desc = f"{desc} {nf} images, {nc} corrupt"
+            pbar.close()
+        if msgs:
+            LOGGER.info("\n".join(msgs))
+        x["hash"] = get_hash([x[0] for x in self.samples])
+        x["results"] = nf, nc, len(samples), samples
+        x["msgs"] = msgs  # warnings
+        save_dataset_cache_file(self.prefix, path, x)
+        return samples
+def load_dataset_cache_file(path):
+    """Load an Ultralytics *.cache dictionary from path."""
+    import gc
+    gc.disable()  # reduce pickle load time https://github.com/ultralytics/ultralytics/pull/1585
+    cache = np.load(str(path), allow_pickle=True).item()  # load dict
+    gc.enable()
+    return cache
+def save_dataset_cache_file(prefix, path, x):
+    """Save an Ultralytics dataset *.cache dictionary x to path."""
+    x["version"] = DATASET_CACHE_VERSION  # add cache version
+    if is_dir_writeable(path.parent):
+        if path.exists():
+            path.unlink()  # remove *.cache file if exists
+        np.save(str(path), x)  # save cache for next time
+        path.with_suffix(".cache.npy").rename(path)  # remove .npy suffix
+        LOGGER.info(f"{prefix}New cache created: {path}")
+    else:
+        LOGGER.warning(f"{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable, cache not saved.")
+# TODO: support semantic segmentation
+class SemanticDataset(BaseDataset):
+    """
+    Semantic Segmentation Dataset.
+    This class is responsible for handling datasets used for semantic segmentation tasks. It inherits functionalities
+    from the BaseDataset class.
+    Note:
+        This class is currently a placeholder and needs to be populated with methods and attributes for supporting
+        semantic segmentation tasks.
+    """
+    def __init__(self):
+        """Initialize a SemanticDataset object."""
+        super().__init__()

yolov8_model/ultralytics/data/explorer/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+from .utils import plot_query_result
+__all__ = ["plot_query_result"]

yolov8_model/ultralytics/data/explorer/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (248 Bytes). View file

yolov8_model/ultralytics/data/explorer/__pycache__/explorer.cpython-310.pyc ADDED Viewed

Binary file (17 kB). View file

yolov8_model/ultralytics/data/explorer/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (7.39 kB). View file

yolov8_model/ultralytics/data/explorer/explorer.py ADDED Viewed

	@@ -0,0 +1,471 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+from io import BytesIO
+from pathlib import Path
+from typing import Any, List, Tuple, Union
+import cv2
+import numpy as np
+import torch
+from PIL import Image
+from matplotlib import pyplot as plt
+from pandas import DataFrame
+from tqdm import tqdm
+from yolov8_model.ultralytics.data.augment import Format
+from yolov8_model.ultralytics.data.dataset import YOLODataset
+from yolov8_model.ultralytics.data.utils import check_det_dataset
+from yolov8_model.ultralytics.models.yolo.model import YOLO
+from yolov8_model.ultralytics.utils import LOGGER, IterableSimpleNamespace, checks, USER_CONFIG_DIR
+from .utils import get_sim_index_schema, get_table_schema, plot_query_result, prompt_sql_query, sanitize_batch
+class ExplorerDataset(YOLODataset):
+    def __init__(self, *args, data: dict = None, **kwargs) -> None:
+        super().__init__(*args, data=data, **kwargs)
+    def load_image(self, i: int) -> Union[Tuple[np.ndarray, Tuple[int, int], Tuple[int, int]], Tuple[None, None, None]]:
+        """Loads 1 image from dataset index 'i' without any resize ops."""
+        im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i]
+        if im is None:  # not cached in RAM
+            if fn.exists():  # load npy
+                im = np.load(fn)
+            else:  # read image
+                im = cv2.imread(f)  # BGR
+                if im is None:
+                    raise FileNotFoundError(f"Image Not Found {f}")
+            h0, w0 = im.shape[:2]  # orig hw
+            return im, (h0, w0), im.shape[:2]
+        return self.ims[i], self.im_hw0[i], self.im_hw[i]
+    def build_transforms(self, hyp: IterableSimpleNamespace = None):
+        """Creates transforms for dataset images without resizing."""
+        return Format(
+            bbox_format="xyxy",
+            normalize=False,
+            return_mask=self.use_segments,
+            return_keypoint=self.use_keypoints,
+            batch_idx=True,
+            mask_ratio=hyp.mask_ratio,
+            mask_overlap=hyp.overlap_mask,
+        )
+class Explorer:
+    def __init__(
+        self,
+        data: Union[str, Path] = "coco128.yaml",
+        model: str = "yolov8n.pt",
+        uri: str = USER_CONFIG_DIR / "explorer",
+    ) -> None:
+        checks.check_requirements(["lancedb>=0.4.3", "duckdb"])
+        import lancedb
+        self.connection = lancedb.connect(uri)
+        self.table_name = Path(data).name.lower() + "_" + model.lower()
+        self.sim_idx_base_name = (
+            f"{self.table_name}_sim_idx".lower()
+        )  # Use this name and append thres and top_k to reuse the table
+        self.model = YOLO(model)
+        self.data = data  # None
+        self.choice_set = None
+        self.table = None
+        self.progress = 0
+    def create_embeddings_table(self, force: bool = False, split: str = "train") -> None:
+        """
+        Create LanceDB table containing the embeddings of the images in the dataset. The table will be reused if it
+        already exists. Pass force=True to overwrite the existing table.
+        Args:
+            force (bool): Whether to overwrite the existing table or not. Defaults to False.
+            split (str): Split of the dataset to use. Defaults to 'train'.
+        Example:
+            ```python
+            exp = Explorer()
+            exp.create_embeddings_table()
+            ```
+        """
+        if self.table is not None and not force:
+            LOGGER.info("Table already exists. Reusing it. Pass force=True to overwrite it.")
+            return
+        if self.table_name in self.connection.table_names() and not force:
+            LOGGER.info(f"Table {self.table_name} already exists. Reusing it. Pass force=True to overwrite it.")
+            self.table = self.connection.open_table(self.table_name)
+            self.progress = 1
+            return
+        if self.data is None:
+            raise ValueError("Data must be provided to create embeddings table")
+        data_info = check_det_dataset(self.data)
+        if split not in data_info:
+            raise ValueError(
+                f"Split {split} is not found in the dataset. Available keys in the dataset are {list(data_info.keys())}"
+            )
+        choice_set = data_info[split]
+        choice_set = choice_set if isinstance(choice_set, list) else [choice_set]
+        self.choice_set = choice_set
+        dataset = ExplorerDataset(img_path=choice_set, data=data_info, augment=False, cache=False, task=self.model.task)
+        # Create the table schema
+        batch = dataset[0]
+        vector_size = self.model.embed(batch["im_file"], verbose=False)[0].shape[0]
+        table = self.connection.create_table(self.table_name, schema=get_table_schema(vector_size), mode="overwrite")
+        table.add(
+            self._yield_batches(
+                dataset,
+                data_info,
+                self.model,
+                exclude_keys=["img", "ratio_pad", "resized_shape", "ori_shape", "batch_idx"],
+            )
+        )
+        self.table = table
+    def _yield_batches(self, dataset: ExplorerDataset, data_info: dict, model: YOLO, exclude_keys: List[str]):
+        """Generates batches of data for embedding, excluding specified keys."""
+        for i in tqdm(range(len(dataset))):
+            self.progress = float(i + 1) / len(dataset)
+            batch = dataset[i]
+            for k in exclude_keys:
+                batch.pop(k, None)
+            batch = sanitize_batch(batch, data_info)
+            batch["vector"] = model.embed(batch["im_file"], verbose=False)[0].detach().tolist()
+            yield [batch]
+    def query(
+        self, imgs: Union[str, np.ndarray, List[str], List[np.ndarray]] = None, limit: int = 25
+    ) -> Any:  # pyarrow.Table
+        """
+        Query the table for similar images. Accepts a single image or a list of images.
+        Args:
+            imgs (str or list): Path to the image or a list of paths to the images.
+            limit (int): Number of results to return.
+        Returns:
+            (pyarrow.Table): An arrow table containing the results. Supports converting to:
+                - pandas dataframe: `result.to_pandas()`
+                - dict of lists: `result.to_pydict()`
+        Example:
+            ```python
+            exp = Explorer()
+            exp.create_embeddings_table()
+            similar = exp.query(img='https://ultralytics.com/images/zidane.jpg')
+            ```
+        """
+        if self.table is None:
+            raise ValueError("Table is not created. Please create the table first.")
+        if isinstance(imgs, str):
+            imgs = [imgs]
+        assert isinstance(imgs, list), f"img must be a string or a list of strings. Got {type(imgs)}"
+        embeds = self.model.embed(imgs)
+        # Get avg if multiple images are passed (len > 1)
+        embeds = torch.mean(torch.stack(embeds), 0).cpu().numpy() if len(embeds) > 1 else embeds[0].cpu().numpy()
+        return self.table.search(embeds).limit(limit).to_arrow()
+    def sql_query(
+        self, query: str, return_type: str = "pandas"
+    ) -> Union[DataFrame, Any, None]:  # pandas.dataframe or pyarrow.Table
+        """
+        Run a SQL-Like query on the table. Utilizes LanceDB predicate pushdown.
+        Args:
+            query (str): SQL query to run.
+            return_type (str): Type of the result to return. Can be either 'pandas' or 'arrow'. Defaults to 'pandas'.
+        Returns:
+            (pyarrow.Table): An arrow table containing the results.
+        Example:
+            ```python
+            exp = Explorer()
+            exp.create_embeddings_table()
+            query = "SELECT * FROM 'table' WHERE labels LIKE '%person%'"
+            result = exp.sql_query(query)
+            ```
+        """
+        assert return_type in {
+            "pandas",
+            "arrow",
+        }, f"Return type should be either `pandas` or `arrow`, but got {return_type}"
+        import duckdb
+        if self.table is None:
+            raise ValueError("Table is not created. Please create the table first.")
+        # Note: using filter pushdown would be a better long term solution. Temporarily using duckdb for this.
+        table = self.table.to_arrow()  # noqa NOTE: Don't comment this. This line is used by DuckDB
+        if not query.startswith("SELECT") and not query.startswith("WHERE"):
+            raise ValueError(
+                f"Query must start with SELECT or WHERE. You can either pass the entire query or just the WHERE clause. found {query}"
+            )
+        if query.startswith("WHERE"):
+            query = f"SELECT * FROM 'table' {query}"
+        LOGGER.info(f"Running query: {query}")
+        rs = duckdb.sql(query)
+        if return_type == "arrow":
+            return rs.arrow()
+        elif return_type == "pandas":
+            return rs.df()
+    def plot_sql_query(self, query: str, labels: bool = True) -> Image.Image:
+        """
+        Plot the results of a SQL-Like query on the table.
+        Args:
+            query (str): SQL query to run.
+            labels (bool): Whether to plot the labels or not.
+        Returns:
+            (PIL.Image): Image containing the plot.
+        Example:
+            ```python
+            exp = Explorer()
+            exp.create_embeddings_table()
+            query = "SELECT * FROM 'table' WHERE labels LIKE '%person%'"
+            result = exp.plot_sql_query(query)
+            ```
+        """
+        result = self.sql_query(query, return_type="arrow")
+        if len(result) == 0:
+            LOGGER.info("No results found.")
+            return None
+        img = plot_query_result(result, plot_labels=labels)
+        return Image.fromarray(img)
+    def get_similar(
+        self,
+        img: Union[str, np.ndarray, List[str], List[np.ndarray]] = None,
+        idx: Union[int, List[int]] = None,
+        limit: int = 25,
+        return_type: str = "pandas",
+    ) -> Union[DataFrame, Any]:  # pandas.dataframe or pyarrow.Table
+        """
+        Query the table for similar images. Accepts a single image or a list of images.
+        Args:
+            img (str or list): Path to the image or a list of paths to the images.
+            idx (int or list): Index of the image in the table or a list of indexes.
+            limit (int): Number of results to return. Defaults to 25.
+            return_type (str): Type of the result to return. Can be either 'pandas' or 'arrow'. Defaults to 'pandas'.
+        Returns:
+            (pandas.DataFrame): A dataframe containing the results.
+        Example:
+            ```python
+            exp = Explorer()
+            exp.create_embeddings_table()
+            similar = exp.get_similar(img='https://ultralytics.com/images/zidane.jpg')
+            ```
+        """
+        assert return_type in {
+            "pandas",
+            "arrow",
+        }, f"Return type should be either `pandas` or `arrow`, but got {return_type}"
+        img = self._check_imgs_or_idxs(img, idx)
+        similar = self.query(img, limit=limit)
+        if return_type == "arrow":
+            return similar
+        elif return_type == "pandas":
+            return similar.to_pandas()
+    def plot_similar(
+        self,
+        img: Union[str, np.ndarray, List[str], List[np.ndarray]] = None,
+        idx: Union[int, List[int]] = None,
+        limit: int = 25,
+        labels: bool = True,
+    ) -> Image.Image:
+        """
+        Plot the similar images. Accepts images or indexes.
+        Args:
+            img (str or list): Path to the image or a list of paths to the images.
+            idx (int or list): Index of the image in the table or a list of indexes.
+            labels (bool): Whether to plot the labels or not.
+            limit (int): Number of results to return. Defaults to 25.
+        Returns:
+            (PIL.Image): Image containing the plot.
+        Example:
+            ```python
+            exp = Explorer()
+            exp.create_embeddings_table()
+            similar = exp.plot_similar(img='https://ultralytics.com/images/zidane.jpg')
+            ```
+        """
+        similar = self.get_similar(img, idx, limit, return_type="arrow")
+        if len(similar) == 0:
+            LOGGER.info("No results found.")
+            return None
+        img = plot_query_result(similar, plot_labels=labels)
+        return Image.fromarray(img)
+    def similarity_index(self, max_dist: float = 0.2, top_k: float = None, force: bool = False) -> DataFrame:
+        """
+        Calculate the similarity index of all the images in the table. Here, the index will contain the data points that
+        are max_dist or closer to the image in the embedding space at a given index.
+        Args:
+            max_dist (float): maximum L2 distance between the embeddings to consider. Defaults to 0.2.
+            top_k (float): Percentage of the closest data points to consider when counting. Used to apply limit when running
+                           vector search. Defaults: None.
+            force (bool): Whether to overwrite the existing similarity index or not. Defaults to True.
+        Returns:
+            (pandas.DataFrame): A dataframe containing the similarity index. Each row corresponds to an image, and columns
+                                include indices of similar images and their respective distances.
+        Example:
+            ```python
+            exp = Explorer()
+            exp.create_embeddings_table()
+            sim_idx = exp.similarity_index()
+            ```
+        """
+        if self.table is None:
+            raise ValueError("Table is not created. Please create the table first.")
+        sim_idx_table_name = f"{self.sim_idx_base_name}_thres_{max_dist}_top_{top_k}".lower()
+        if sim_idx_table_name in self.connection.table_names() and not force:
+            LOGGER.info("Similarity matrix already exists. Reusing it. Pass force=True to overwrite it.")
+            return self.connection.open_table(sim_idx_table_name).to_pandas()
+        if top_k and not (1.0 >= top_k >= 0.0):
+            raise ValueError(f"top_k must be between 0.0 and 1.0. Got {top_k}")
+        if max_dist < 0.0:
+            raise ValueError(f"max_dist must be greater than 0. Got {max_dist}")
+        top_k = int(top_k * len(self.table)) if top_k else len(self.table)
+        top_k = max(top_k, 1)
+        features = self.table.to_lance().to_table(columns=["vector", "im_file"]).to_pydict()
+        im_files = features["im_file"]
+        embeddings = features["vector"]
+        sim_table = self.connection.create_table(sim_idx_table_name, schema=get_sim_index_schema(), mode="overwrite")
+        def _yield_sim_idx():
+            """Generates a dataframe with similarity indices and distances for images."""
+            for i in tqdm(range(len(embeddings))):
+                sim_idx = self.table.search(embeddings[i]).limit(top_k).to_pandas().query(f"_distance <= {max_dist}")
+                yield [
+                    {
+                        "idx": i,
+                        "im_file": im_files[i],
+                        "count": len(sim_idx),
+                        "sim_im_files": sim_idx["im_file"].tolist(),
+                    }
+                ]
+        sim_table.add(_yield_sim_idx())
+        self.sim_index = sim_table
+        return sim_table.to_pandas()
+    def plot_similarity_index(self, max_dist: float = 0.2, top_k: float = None, force: bool = False) -> Image:
+        """
+        Plot the similarity index of all the images in the table. Here, the index will contain the data points that are
+        max_dist or closer to the image in the embedding space at a given index.
+        Args:
+            max_dist (float): maximum L2 distance between the embeddings to consider. Defaults to 0.2.
+            top_k (float): Percentage of closest data points to consider when counting. Used to apply limit when
+                running vector search. Defaults to 0.01.
+            force (bool): Whether to overwrite the existing similarity index or not. Defaults to True.
+        Returns:
+            (PIL.Image): Image containing the plot.
+        Example:
+            ```python
+            exp = Explorer()
+            exp.create_embeddings_table()
+            similarity_idx_plot = exp.plot_similarity_index()
+            similarity_idx_plot.show() # view image preview
+            similarity_idx_plot.save('path/to/save/similarity_index_plot.png') # save contents to file
+            ```
+        """
+        sim_idx = self.similarity_index(max_dist=max_dist, top_k=top_k, force=force)
+        sim_count = sim_idx["count"].tolist()
+        sim_count = np.array(sim_count)
+        indices = np.arange(len(sim_count))
+        # Create the bar plot
+        plt.bar(indices, sim_count)
+        # Customize the plot (optional)
+        plt.xlabel("data idx")
+        plt.ylabel("Count")
+        plt.title("Similarity Count")
+        buffer = BytesIO()
+        plt.savefig(buffer, format="png")
+        buffer.seek(0)
+        # Use Pillow to open the image from the buffer
+        return Image.fromarray(np.array(Image.open(buffer)))
+    def _check_imgs_or_idxs(
+        self, img: Union[str, np.ndarray, List[str], List[np.ndarray], None], idx: Union[None, int, List[int]]
+    ) -> List[np.ndarray]:
+        if img is None and idx is None:
+            raise ValueError("Either img or idx must be provided.")
+        if img is not None and idx is not None:
+            raise ValueError("Only one of img or idx must be provided.")
+        if idx is not None:
+            idx = idx if isinstance(idx, list) else [idx]
+            img = self.table.to_lance().take(idx, columns=["im_file"]).to_pydict()["im_file"]
+        return img if isinstance(img, list) else [img]
+    def ask_ai(self, query):
+        """
+        Ask AI a question.
+        Args:
+            query (str): Question to ask.
+        Returns:
+            (pandas.DataFrame): A dataframe containing filtered results to the SQL query.
+        Example:
+            ```python
+            exp = Explorer()
+            exp.create_embeddings_table()
+            answer = exp.ask_ai('Show images with 1 person and 2 dogs')
+            ```
+        """
+        result = prompt_sql_query(query)
+        try:
+            df = self.sql_query(result)
+        except Exception as e:
+            LOGGER.error("AI generated query is not valid. Please try again with a different prompt")
+            LOGGER.error(e)
+            return None
+        return df
+    def visualize(self, result):
+        """
+        Visualize the results of a query. TODO.
+        Args:
+            result (pyarrow.Table): Table containing the results of a query.
+        """
+        pass
+    def generate_report(self, result):
+        """
+        Generate a report of the dataset.
+        TODO
+        """
+        pass

yolov8_model/ultralytics/data/explorer/gui/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Ultralytics YOLO 🚀, AGPL-3.0 license

yolov8_model/ultralytics/data/explorer/gui/dash.py ADDED Viewed

	@@ -0,0 +1,268 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+import time
+from threading import Thread
+import pandas as pd
+from ultralytics import Explorer
+from ultralytics.utils import ROOT, SETTINGS
+from ultralytics.utils.checks import check_requirements
+check_requirements(("streamlit>=1.29.0", "streamlit-select>=0.3"))
+import streamlit as st
+from streamlit_select import image_select
+def _get_explorer():
+    """Initializes and returns an instance of the Explorer class."""
+    exp = Explorer(data=st.session_state.get("dataset"), model=st.session_state.get("model"))
+    thread = Thread(
+        target=exp.create_embeddings_table, kwargs={"force": st.session_state.get("force_recreate_embeddings")}
+    )
+    thread.start()
+    progress_bar = st.progress(0, text="Creating embeddings table...")
+    while exp.progress < 1:
+        time.sleep(0.1)
+        progress_bar.progress(exp.progress, text=f"Progress: {exp.progress * 100}%")
+    thread.join()
+    st.session_state["explorer"] = exp
+    progress_bar.empty()
+def init_explorer_form():
+    """Initializes an Explorer instance and creates embeddings table with progress tracking."""
+    datasets = ROOT / "cfg" / "datasets"
+    ds = [d.name for d in datasets.glob("*.yaml")]
+    models = [
+        "yolov8n.pt",
+        "yolov8s.pt",
+        "yolov8m.pt",
+        "yolov8l.pt",
+        "yolov8x.pt",
+        "yolov8n-seg.pt",
+        "yolov8s-seg.pt",
+        "yolov8m-seg.pt",
+        "yolov8l-seg.pt",
+        "yolov8x-seg.pt",
+        "yolov8n-pose.pt",
+        "yolov8s-pose.pt",
+        "yolov8m-pose.pt",
+        "yolov8l-pose.pt",
+        "yolov8x-pose.pt",
+    ]
+    with st.form(key="explorer_init_form"):
+        col1, col2 = st.columns(2)
+        with col1:
+            st.selectbox("Select dataset", ds, key="dataset", index=ds.index("coco128.yaml"))
+        with col2:
+            st.selectbox("Select model", models, key="model")
+        st.checkbox("Force recreate embeddings", key="force_recreate_embeddings")
+        st.form_submit_button("Explore", on_click=_get_explorer)
+def query_form():
+    """Sets up a form in Streamlit to initialize Explorer with dataset and model selection."""
+    with st.form("query_form"):
+        col1, col2 = st.columns([0.8, 0.2])
+        with col1:
+            st.text_input(
+                "Query",
+                "WHERE labels LIKE '%person%' AND labels LIKE '%dog%'",
+                label_visibility="collapsed",
+                key="query",
+            )
+        with col2:
+            st.form_submit_button("Query", on_click=run_sql_query)
+def ai_query_form():
+    """Sets up a Streamlit form for user input to initialize Explorer with dataset and model selection."""
+    with st.form("ai_query_form"):
+        col1, col2 = st.columns([0.8, 0.2])
+        with col1:
+            st.text_input("Query", "Show images with 1 person and 1 dog", label_visibility="collapsed", key="ai_query")
+        with col2:
+            st.form_submit_button("Ask AI", on_click=run_ai_query)
+def find_similar_imgs(imgs):
+    """Initializes a Streamlit form for AI-based image querying with custom input."""
+    exp = st.session_state["explorer"]
+    similar = exp.get_similar(img=imgs, limit=st.session_state.get("limit"), return_type="arrow")
+    paths = similar.to_pydict()["im_file"]
+    st.session_state["imgs"] = paths
+    st.session_state["res"] = similar
+def similarity_form(selected_imgs):
+    """Initializes a form for AI-based image querying with custom input in Streamlit."""
+    st.write("Similarity Search")
+    with st.form("similarity_form"):
+        subcol1, subcol2 = st.columns([1, 1])
+        with subcol1:
+            st.number_input(
+                "limit", min_value=None, max_value=None, value=25, label_visibility="collapsed", key="limit"
+            )
+        with subcol2:
+            disabled = not len(selected_imgs)
+            st.write("Selected: ", len(selected_imgs))
+            st.form_submit_button(
+                "Search",
+                disabled=disabled,
+                on_click=find_similar_imgs,
+                args=(selected_imgs,),
+            )
+        if disabled:
+            st.error("Select at least one image to search.")
+# def persist_reset_form():
+#    with st.form("persist_reset"):
+#        col1, col2 = st.columns([1, 1])
+#        with col1:
+#            st.form_submit_button("Reset", on_click=reset)
+#
+#        with col2:
+#            st.form_submit_button("Persist", on_click=update_state, args=("PERSISTING", True))
+def run_sql_query():
+    """Executes an SQL query and returns the results."""
+    st.session_state["error"] = None
+    query = st.session_state.get("query")
+    if query.rstrip().lstrip():
+        exp = st.session_state["explorer"]
+        res = exp.sql_query(query, return_type="arrow")
+        st.session_state["imgs"] = res.to_pydict()["im_file"]
+        st.session_state["res"] = res
+def run_ai_query():
+    """Execute SQL query and update session state with query results."""
+    if not SETTINGS["openai_api_key"]:
+        st.session_state[
+            "error"
+        ] = 'OpenAI API key not found in settings. Please run yolo settings openai_api_key="..."'
+        return
+    st.session_state["error"] = None
+    query = st.session_state.get("ai_query")
+    if query.rstrip().lstrip():
+        exp = st.session_state["explorer"]
+        res = exp.ask_ai(query)
+        if not isinstance(res, pd.DataFrame) or res.empty:
+            st.session_state["error"] = "No results found using AI generated query. Try another query or rerun it."
+            return
+        st.session_state["imgs"] = res["im_file"].to_list()
+        st.session_state["res"] = res
+def reset_explorer():
+    """Resets the explorer to its initial state by clearing session variables."""
+    st.session_state["explorer"] = None
+    st.session_state["imgs"] = None
+    st.session_state["error"] = None
+def utralytics_explorer_docs_callback():
+    """Resets the explorer to its initial state by clearing session variables."""
+    with st.container(border=True):
+        st.image(
+            "https://raw.githubusercontent.com/ultralytics/assets/main/logo/Ultralytics_Logotype_Original.svg",
+            width=100,
+        )
+        st.markdown(
+            "<p>This demo is built using Ultralytics Explorer API. Visit <a href='https://docs.ultralytics.com/datasets/explorer/'>API docs</a> to try examples & learn more</p>",
+            unsafe_allow_html=True,
+            help=None,
+        )
+        st.link_button("Ultrlaytics Explorer API", "https://docs.ultralytics.com/datasets/explorer/")
+def layout():
+    """Resets explorer session variables and provides documentation with a link to API docs."""
+    st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
+    st.markdown("<h1 style='text-align: center;'>Ultralytics Explorer Demo</h1>", unsafe_allow_html=True)
+    if st.session_state.get("explorer") is None:
+        init_explorer_form()
+        return
+    st.button(":arrow_backward: Select Dataset", on_click=reset_explorer)
+    exp = st.session_state.get("explorer")
+    col1, col2 = st.columns([0.75, 0.25], gap="small")
+    imgs = []
+    if st.session_state.get("error"):
+        st.error(st.session_state["error"])
+    else:
+        if st.session_state.get("imgs"):
+            imgs = st.session_state.get("imgs")
+        else:
+            imgs = exp.table.to_lance().to_table(columns=["im_file"]).to_pydict()["im_file"]
+            st.session_state["res"] = exp.table.to_arrow()
+    total_imgs, selected_imgs = len(imgs), []
+    with col1:
+        subcol1, subcol2, subcol3, subcol4, subcol5 = st.columns(5)
+        with subcol1:
+            st.write("Max Images Displayed:")
+        with subcol2:
+            num = st.number_input(
+                "Max Images Displayed",
+                min_value=0,
+                max_value=total_imgs,
+                value=min(500, total_imgs),
+                key="num_imgs_displayed",
+                label_visibility="collapsed",
+            )
+        with subcol3:
+            st.write("Start Index:")
+        with subcol4:
+            start_idx = st.number_input(
+                "Start Index",
+                min_value=0,
+                max_value=total_imgs,
+                value=0,
+                key="start_index",
+                label_visibility="collapsed",
+            )
+        with subcol5:
+            reset = st.button("Reset", use_container_width=False, key="reset")
+            if reset:
+                st.session_state["imgs"] = None
+                st.experimental_rerun()
+        query_form()
+        ai_query_form()
+        if total_imgs:
+            labels, boxes, masks, kpts, classes = None, None, None, None, None
+            task = exp.model.task
+            if st.session_state.get("display_labels"):
+                labels = st.session_state.get("res").to_pydict()["labels"][start_idx : start_idx + num]
+                boxes = st.session_state.get("res").to_pydict()["bboxes"][start_idx : start_idx + num]
+                masks = st.session_state.get("res").to_pydict()["masks"][start_idx : start_idx + num]
+                kpts = st.session_state.get("res").to_pydict()["keypoints"][start_idx : start_idx + num]
+                classes = st.session_state.get("res").to_pydict()["cls"][start_idx : start_idx + num]
+            imgs_displayed = imgs[start_idx : start_idx + num]
+            selected_imgs = image_select(
+                f"Total samples: {total_imgs}",
+                images=imgs_displayed,
+                use_container_width=False,
+                # indices=[i for i in range(num)] if select_all else None,
+                labels=labels,
+                classes=classes,
+                bboxes=boxes,
+                masks=masks if task == "segment" else None,
+                kpts=kpts if task == "pose" else None,
+            )
+    with col2:
+        similarity_form(selected_imgs)
+        display_labels = st.checkbox("Labels", value=False, key="display_labels")
+        utralytics_explorer_docs_callback()
+if __name__ == "__main__":
+    layout()

yolov8_model/ultralytics/data/explorer/utils.py ADDED Viewed

	@@ -0,0 +1,166 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+import getpass
+from typing import List
+import cv2
+import numpy as np
+import pandas as pd
+from yolov8_model.ultralytics.data.augment import LetterBox
+from yolov8_model.ultralytics.utils import LOGGER as logger
+from yolov8_model.ultralytics.utils import SETTINGS
+from yolov8_model.ultralytics.utils.checks import check_requirements
+from yolov8_model.ultralytics.utils.ops import xyxy2xywh
+from yolov8_model.ultralytics.utils.plotting import plot_images
+def get_table_schema(vector_size):
+    """Extracts and returns the schema of a database table."""
+    from lancedb.pydantic import LanceModel, Vector
+    class Schema(LanceModel):
+        im_file: str
+        labels: List[str]
+        cls: List[int]
+        bboxes: List[List[float]]
+        masks: List[List[List[int]]]
+        keypoints: List[List[List[float]]]
+        vector: Vector(vector_size)
+    return Schema
+def get_sim_index_schema():
+    """Returns a LanceModel schema for a database table with specified vector size."""
+    from lancedb.pydantic import LanceModel
+    class Schema(LanceModel):
+        idx: int
+        im_file: str
+        count: int
+        sim_im_files: List[str]
+    return Schema
+def sanitize_batch(batch, dataset_info):
+    """Sanitizes input batch for inference, ensuring correct format and dimensions."""
+    batch["cls"] = batch["cls"].flatten().int().tolist()
+    box_cls_pair = sorted(zip(batch["bboxes"].tolist(), batch["cls"]), key=lambda x: x[1])
+    batch["bboxes"] = [box for box, _ in box_cls_pair]
+    batch["cls"] = [cls for _, cls in box_cls_pair]
+    batch["labels"] = [dataset_info["names"][i] for i in batch["cls"]]
+    batch["masks"] = batch["masks"].tolist() if "masks" in batch else [[[]]]
+    batch["keypoints"] = batch["keypoints"].tolist() if "keypoints" in batch else [[[]]]
+    return batch
+def plot_query_result(similar_set, plot_labels=True):
+    """
+    Plot images from the similar set.
+    Args:
+        similar_set (list): Pyarrow or pandas object containing the similar data points
+        plot_labels (bool): Whether to plot labels or not
+    """
+    similar_set = (
+        similar_set.to_dict(orient="list") if isinstance(similar_set, pd.DataFrame) else similar_set.to_pydict()
+    )
+    empty_masks = [[[]]]
+    empty_boxes = [[]]
+    images = similar_set.get("im_file", [])
+    bboxes = similar_set.get("bboxes", []) if similar_set.get("bboxes") is not empty_boxes else []
+    masks = similar_set.get("masks") if similar_set.get("masks")[0] != empty_masks else []
+    kpts = similar_set.get("keypoints") if similar_set.get("keypoints")[0] != empty_masks else []
+    cls = similar_set.get("cls", [])
+    plot_size = 640
+    imgs, batch_idx, plot_boxes, plot_masks, plot_kpts = [], [], [], [], []
+    for i, imf in enumerate(images):
+        im = cv2.imread(imf)
+        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+        h, w = im.shape[:2]
+        r = min(plot_size / h, plot_size / w)
+        imgs.append(LetterBox(plot_size, center=False)(image=im).transpose(2, 0, 1))
+        if plot_labels:
+            if len(bboxes) > i and len(bboxes[i]) > 0:
+                box = np.array(bboxes[i], dtype=np.float32)
+                box[:, [0, 2]] *= r
+                box[:, [1, 3]] *= r
+                plot_boxes.append(box)
+            if len(masks) > i and len(masks[i]) > 0:
+                mask = np.array(masks[i], dtype=np.uint8)[0]
+                plot_masks.append(LetterBox(plot_size, center=False)(image=mask))
+            if len(kpts) > i and kpts[i] is not None:
+                kpt = np.array(kpts[i], dtype=np.float32)
+                kpt[:, :, :2] *= r
+                plot_kpts.append(kpt)
+        batch_idx.append(np.ones(len(np.array(bboxes[i], dtype=np.float32))) * i)
+    imgs = np.stack(imgs, axis=0)
+    masks = np.stack(plot_masks, axis=0) if plot_masks else np.zeros(0, dtype=np.uint8)
+    kpts = np.concatenate(plot_kpts, axis=0) if plot_kpts else np.zeros((0, 51), dtype=np.float32)
+    boxes = xyxy2xywh(np.concatenate(plot_boxes, axis=0)) if plot_boxes else np.zeros(0, dtype=np.float32)
+    batch_idx = np.concatenate(batch_idx, axis=0)
+    cls = np.concatenate([np.array(c, dtype=np.int32) for c in cls], axis=0)
+    return plot_images(
+        imgs, batch_idx, cls, bboxes=boxes, masks=masks, kpts=kpts, max_subplots=len(images), save=False, threaded=False
+    )
+def prompt_sql_query(query):
+    """Plots images with optional labels from a similar data set."""
+    check_requirements("openai>=1.6.1")
+    from openai import OpenAI
+    if not SETTINGS["openai_api_key"]:
+        logger.warning("OpenAI API key not found in settings. Please enter your API key below.")
+        openai_api_key = getpass.getpass("OpenAI API key: ")
+        SETTINGS.update({"openai_api_key": openai_api_key})
+    openai = OpenAI(api_key=SETTINGS["openai_api_key"])
+    messages = [
+        {
+            "role": "system",
+            "content": """
+                You are a helpful data scientist proficient in SQL. You need to output exactly one SQL query based on
+                the following schema and a user request. You only need to output the format with fixed selection
+                statement that selects everything from "'table'", like `SELECT * from 'table'`
+                Schema:
+                im_file: string not null
+                labels: list<item: string> not null
+                child 0, item: string
+                cls: list<item: int64> not null
+                child 0, item: int64
+                bboxes: list<item: list<item: double>> not null
+                child 0, item: list<item: double>
+                    child 0, item: double
+                masks: list<item: list<item: list<item: int64>>> not null
+                child 0, item: list<item: list<item: int64>>
+                    child 0, item: list<item: int64>
+                        child 0, item: int64
+                keypoints: list<item: list<item: list<item: double>>> not null
+                child 0, item: list<item: list<item: double>>
+                    child 0, item: list<item: double>
+                        child 0, item: double
+                vector: fixed_size_list<item: float>[256] not null
+                child 0, item: float
+                Some details about the schema:
+                - the "labels" column contains the string values like 'person' and 'dog' for the respective objects
+                    in each image
+                - the "cls" column contains the integer values on these classes that map them the labels
+                Example of a correct query:
+                request - Get all data points that contain 2 or more people and at least one dog
+                correct query-
+                SELECT * FROM 'table' WHERE  ARRAY_LENGTH(cls) >= 2  AND ARRAY_LENGTH(FILTER(labels, x -> x = 'person')) >= 2  AND ARRAY_LENGTH(FILTER(labels, x -> x = 'dog')) >= 1;
+             """,
+        },
+        {"role": "user", "content": f"{query}"},
+    ]
+    response = openai.chat.completions.create(model="gpt-3.5-turbo", messages=messages)
+    return response.choices[0].message.content

yolov8_model/ultralytics/data/loaders.py ADDED Viewed

	@@ -0,0 +1,533 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+import glob
+import math
+import os
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from threading import Thread
+from urllib.parse import urlparse
+import cv2
+import numpy as np
+import requests
+import torch
+from PIL import Image
+from yolov8_model.ultralytics.data.utils import IMG_FORMATS, VID_FORMATS
+from yolov8_model.ultralytics.utils import LOGGER, is_colab, is_kaggle, ops
+from yolov8_model.ultralytics.utils.checks import check_requirements
+@dataclass
+class SourceTypes:
+    """Class to represent various types of input sources for predictions."""
+    webcam: bool = False
+    screenshot: bool = False
+    from_img: bool = False
+    tensor: bool = False
+class LoadStreams:
+    """
+    Stream Loader for various types of video streams.
+    Suitable for use with `yolo predict source='rtsp://example.com/media.mp4'`, supports RTSP, RTMP, HTTP, and TCP streams.
+    Attributes:
+        sources (str): The source input paths or URLs for the video streams.
+        vid_stride (int): Video frame-rate stride, defaults to 1.
+        buffer (bool): Whether to buffer input streams, defaults to False.
+        running (bool): Flag to indicate if the streaming thread is running.
+        mode (str): Set to 'stream' indicating real-time capture.
+        imgs (list): List of image frames for each stream.
+        fps (list): List of FPS for each stream.
+        frames (list): List of total frames for each stream.
+        threads (list): List of threads for each stream.
+        shape (list): List of shapes for each stream.
+        caps (list): List of cv2.VideoCapture objects for each stream.
+        bs (int): Batch size for processing.
+    Methods:
+        __init__: Initialize the stream loader.
+        update: Read stream frames in daemon thread.
+        close: Close stream loader and release resources.
+        __iter__: Returns an iterator object for the class.
+        __next__: Returns source paths, transformed, and original images for processing.
+        __len__: Return the length of the sources object.
+    """
+    def __init__(self, sources="file.streams", vid_stride=1, buffer=False):
+        """Initialize instance variables and check for consistent input stream shapes."""
+        torch.backends.cudnn.benchmark = True  # faster for fixed-size inference
+        self.buffer = buffer  # buffer input streams
+        self.running = True  # running flag for Thread
+        self.mode = "stream"
+        self.vid_stride = vid_stride  # video frame-rate stride
+        sources = Path(sources).read_text().rsplit() if os.path.isfile(sources) else [sources]
+        n = len(sources)
+        self.fps = [0] * n  # frames per second
+        self.frames = [0] * n
+        self.threads = [None] * n
+        self.caps = [None] * n  # video capture objects
+        self.imgs = [[] for _ in range(n)]  # images
+        self.shape = [[] for _ in range(n)]  # image shapes
+        self.sources = [ops.clean_str(x) for x in sources]  # clean source names for later
+        for i, s in enumerate(sources):  # index, source
+            # Start thread to read frames from video stream
+            st = f"{i + 1}/{n}: {s}... "
+            if urlparse(s).hostname in ("www.youtube.com", "youtube.com", "youtu.be"):  # if source is YouTube video
+                # YouTube format i.e. 'https://www.youtube.com/watch?v=Zgi9g1ksQHc' or 'https://youtu.be/LNwODJXcvt4'
+                s = get_best_youtube_url(s)
+            s = eval(s) if s.isnumeric() else s  # i.e. s = '0' local webcam
+            if s == 0 and (is_colab() or is_kaggle()):
+                raise NotImplementedError(
+                    "'source=0' webcam not supported in Colab and Kaggle notebooks. "
+                    "Try running 'source=0' in a local environment."
+                )
+            self.caps[i] = cv2.VideoCapture(s)  # store video capture object
+            if not self.caps[i].isOpened():
+                raise ConnectionError(f"{st}Failed to open {s}")
+            w = int(self.caps[i].get(cv2.CAP_PROP_FRAME_WIDTH))
+            h = int(self.caps[i].get(cv2.CAP_PROP_FRAME_HEIGHT))
+            fps = self.caps[i].get(cv2.CAP_PROP_FPS)  # warning: may return 0 or nan
+            self.frames[i] = max(int(self.caps[i].get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float(
+                "inf"
+            )  # infinite stream fallback
+            self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30  # 30 FPS fallback
+            success, im = self.caps[i].read()  # guarantee first frame
+            if not success or im is None:
+                raise ConnectionError(f"{st}Failed to read images from {s}")
+            self.imgs[i].append(im)
+            self.shape[i] = im.shape
+            self.threads[i] = Thread(target=self.update, args=([i, self.caps[i], s]), daemon=True)
+            LOGGER.info(f"{st}Success �� ({self.frames[i]} frames of shape {w}x{h} at {self.fps[i]:.2f} FPS)")
+            self.threads[i].start()
+        LOGGER.info("")  # newline
+        # Check for common shapes
+        self.bs = self.__len__()
+    def update(self, i, cap, stream):
+        """Read stream `i` frames in daemon thread."""
+        n, f = 0, self.frames[i]  # frame number, frame array
+        while self.running and cap.isOpened() and n < (f - 1):
+            if len(self.imgs[i]) < 30:  # keep a <=30-image buffer
+                n += 1
+                cap.grab()  # .read() = .grab() followed by .retrieve()
+                if n % self.vid_stride == 0:
+                    success, im = cap.retrieve()
+                    if not success:
+                        im = np.zeros(self.shape[i], dtype=np.uint8)
+                        LOGGER.warning("WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.")
+                        cap.open(stream)  # re-open stream if signal was lost
+                    if self.buffer:
+                        self.imgs[i].append(im)
+                    else:
+                        self.imgs[i] = [im]
+            else:
+                time.sleep(0.01)  # wait until the buffer is empty
+    def close(self):
+        """Close stream loader and release resources."""
+        self.running = False  # stop flag for Thread
+        for thread in self.threads:
+            if thread.is_alive():
+                thread.join(timeout=5)  # Add timeout
+        for cap in self.caps:  # Iterate through the stored VideoCapture objects
+            try:
+                cap.release()  # release video capture
+            except Exception as e:
+                LOGGER.warning(f"WARNING ⚠️ Could not release VideoCapture object: {e}")
+        cv2.destroyAllWindows()
+    def __iter__(self):
+        """Iterates through YOLO image feed and re-opens unresponsive streams."""
+        self.count = -1
+        return self
+    def __next__(self):
+        """Returns source paths, transformed and original images for processing."""
+        self.count += 1
+        images = []
+        for i, x in enumerate(self.imgs):
+            # Wait until a frame is available in each buffer
+            while not x:
+                if not self.threads[i].is_alive() or cv2.waitKey(1) == ord("q"):  # q to quit
+                    self.close()
+                    raise StopIteration
+                time.sleep(1 / min(self.fps))
+                x = self.imgs[i]
+                if not x:
+                    LOGGER.warning(f"WARNING ⚠️ Waiting for stream {i}")
+            # Get and remove the first frame from imgs buffer
+            if self.buffer:
+                images.append(x.pop(0))
+            # Get the last frame, and clear the rest from the imgs buffer
+            else:
+                images.append(x.pop(-1) if x else np.zeros(self.shape[i], dtype=np.uint8))
+                x.clear()
+        return self.sources, images, None, ""
+    def __len__(self):
+        """Return the length of the sources object."""
+        return len(self.sources)  # 1E12 frames = 32 streams at 30 FPS for 30 years
+class LoadScreenshots:
+    """
+    YOLOv8 screenshot dataloader.
+    This class manages the loading of screenshot images for processing with YOLOv8.
+    Suitable for use with `yolo predict source=screen`.
+    Attributes:
+        source (str): The source input indicating which screen to capture.
+        screen (int): The screen number to capture.
+        left (int): The left coordinate for screen capture area.
+        top (int): The top coordinate for screen capture area.
+        width (int): The width of the screen capture area.
+        height (int): The height of the screen capture area.
+        mode (str): Set to 'stream' indicating real-time capture.
+        frame (int): Counter for captured frames.
+        sct (mss.mss): Screen capture object from `mss` library.
+        bs (int): Batch size, set to 1.
+        monitor (dict): Monitor configuration details.
+    Methods:
+        __iter__: Returns an iterator object.
+        __next__: Captures the next screenshot and returns it.
+    """
+    def __init__(self, source):
+        """Source = [screen_number left top width height] (pixels)."""
+        check_requirements("mss")
+        import mss  # noqa
+        source, *params = source.split()
+        self.screen, left, top, width, height = 0, None, None, None, None  # default to full screen 0
+        if len(params) == 1:
+            self.screen = int(params[0])
+        elif len(params) == 4:
+            left, top, width, height = (int(x) for x in params)
+        elif len(params) == 5:
+            self.screen, left, top, width, height = (int(x) for x in params)
+        self.mode = "stream"
+        self.frame = 0
+        self.sct = mss.mss()
+        self.bs = 1
+        # Parse monitor shape
+        monitor = self.sct.monitors[self.screen]
+        self.top = monitor["top"] if top is None else (monitor["top"] + top)
+        self.left = monitor["left"] if left is None else (monitor["left"] + left)
+        self.width = width or monitor["width"]
+        self.height = height or monitor["height"]
+        self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height}
+    def __iter__(self):
+        """Returns an iterator of the object."""
+        return self
+    def __next__(self):
+        """mss screen capture: get raw pixels from the screen as np array."""
+        im0 = np.asarray(self.sct.grab(self.monitor))[:, :, :3]  # BGRA to BGR
+        s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "
+        self.frame += 1
+        return [str(self.screen)], [im0], None, s  # screen, img, vid_cap, string
+class LoadImages:
+    """
+    YOLOv8 image/video dataloader.
+    This class manages the loading and pre-processing of image and video data for YOLOv8. It supports loading from
+    various formats, including single image files, video files, and lists of image and video paths.
+    Attributes:
+        files (list): List of image and video file paths.
+        nf (int): Total number of files (images and videos).
+        video_flag (list): Flags indicating whether a file is a video (True) or an image (False).
+        mode (str): Current mode, 'image' or 'video'.
+        vid_stride (int): Stride for video frame-rate, defaults to 1.
+        bs (int): Batch size, set to 1 for this class.
+        cap (cv2.VideoCapture): Video capture object for OpenCV.
+        frame (int): Frame counter for video.
+        frames (int): Total number of frames in the video.
+        count (int): Counter for iteration, initialized at 0 during `__iter__()`.
+    Methods:
+        _new_video(path): Create a new cv2.VideoCapture object for a given video path.
+    """
+    def __init__(self, path, vid_stride=1):
+        """Initialize the Dataloader and raise FileNotFoundError if file not found."""
+        parent = None
+        if isinstance(path, str) and Path(path).suffix == ".txt":  # *.txt file with img/vid/dir on each line
+            parent = Path(path).parent
+            path = Path(path).read_text().splitlines()  # list of sources
+        files = []
+        for p in sorted(path) if isinstance(path, (list, tuple)) else [path]:
+            a = str(Path(p).absolute())  # do not use .resolve() https://github.com/ultralytics/ultralytics/issues/2912
+            if "*" in a:
+                files.extend(sorted(glob.glob(a, recursive=True)))  # glob
+            elif os.path.isdir(a):
+                files.extend(sorted(glob.glob(os.path.join(a, "*.*"))))  # dir
+            elif os.path.isfile(a):
+                files.append(a)  # files (absolute or relative to CWD)
+            elif parent and (parent / p).is_file():
+                files.append(str((parent / p).absolute()))  # files (relative to *.txt file parent)
+            else:
+                raise FileNotFoundError(f"{p} does not exist")
+        images = [x for x in files if x.split(".")[-1].lower() in IMG_FORMATS]
+        videos = [x for x in files if x.split(".")[-1].lower() in VID_FORMATS]
+        ni, nv = len(images), len(videos)
+        self.files = images + videos
+        self.nf = ni + nv  # number of files
+        self.video_flag = [False] * ni + [True] * nv
+        self.mode = "image"
+        self.vid_stride = vid_stride  # video frame-rate stride
+        self.bs = 1
+        if any(videos):
+            self._new_video(videos[0])  # new video
+        else:
+            self.cap = None
+        if self.nf == 0:
+            raise FileNotFoundError(
+                f"No images or videos found in {p}. "
+                f"Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}"
+            )
+    def __iter__(self):
+        """Returns an iterator object for VideoStream or ImageFolder."""
+        self.count = 0
+        return self
+    def __next__(self):
+        """Return next image, path and metadata from dataset."""
+        if self.count == self.nf:
+            raise StopIteration
+        path = self.files[self.count]
+        if self.video_flag[self.count]:
+            # Read video
+            self.mode = "video"
+            for _ in range(self.vid_stride):
+                self.cap.grab()
+            success, im0 = self.cap.retrieve()
+            while not success:
+                self.count += 1
+                self.cap.release()
+                if self.count == self.nf:  # last video
+                    raise StopIteration
+                path = self.files[self.count]
+                self._new_video(path)
+                success, im0 = self.cap.read()
+            self.frame += 1
+            # im0 = self._cv2_rotate(im0)  # for use if cv2 autorotation is False
+            s = f"video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: "
+        else:
+            # Read image
+            self.count += 1
+            im0 = cv2.imread(path)  # BGR
+            if im0 is None:
+                raise FileNotFoundError(f"Image Not Found {path}")
+            s = f"image {self.count}/{self.nf} {path}: "
+        return [path], [im0], self.cap, s
+    def _new_video(self, path):
+        """Create a new video capture object."""
+        self.frame = 0
+        self.cap = cv2.VideoCapture(path)
+        self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
+    def __len__(self):
+        """Returns the number of files in the object."""
+        return self.nf  # number of files
+class LoadPilAndNumpy:
+    """
+    Load images from PIL and Numpy arrays for batch processing.
+    This class is designed to manage loading and pre-processing of image data from both PIL and Numpy formats.
+    It performs basic validation and format conversion to ensure that the images are in the required format for
+    downstream processing.
+    Attributes:
+        paths (list): List of image paths or autogenerated filenames.
+        im0 (list): List of images stored as Numpy arrays.
+        mode (str): Type of data being processed, defaults to 'image'.
+        bs (int): Batch size, equivalent to the length of `im0`.
+        count (int): Counter for iteration, initialized at 0 during `__iter__()`.
+    Methods:
+        _single_check(im): Validate and format a single image to a Numpy array.
+    """
+    def __init__(self, im0):
+        """Initialize PIL and Numpy Dataloader."""
+        if not isinstance(im0, list):
+            im0 = [im0]
+        self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)]
+        self.im0 = [self._single_check(im) for im in im0]
+        self.mode = "image"
+        # Generate fake paths
+        self.bs = len(self.im0)
+    @staticmethod
+    def _single_check(im):
+        """Validate and format an image to numpy array."""
+        assert isinstance(im, (Image.Image, np.ndarray)), f"Expected PIL/np.ndarray image type, but got {type(im)}"
+        if isinstance(im, Image.Image):
+            if im.mode != "RGB":
+                im = im.convert("RGB")
+            im = np.asarray(im)[:, :, ::-1]
+            im = np.ascontiguousarray(im)  # contiguous
+        return im
+    def __len__(self):
+        """Returns the length of the 'im0' attribute."""
+        return len(self.im0)
+    def __next__(self):
+        """Returns batch paths, images, processed images, None, ''."""
+        if self.count == 1:  # loop only once as it's batch inference
+            raise StopIteration
+        self.count += 1
+        return self.paths, self.im0, None, ""
+    def __iter__(self):
+        """Enables iteration for class LoadPilAndNumpy."""
+        self.count = 0
+        return self
+class LoadTensor:
+    """
+    Load images from torch.Tensor data.
+    This class manages the loading and pre-processing of image data from PyTorch tensors for further processing.
+    Attributes:
+        im0 (torch.Tensor): The input tensor containing the image(s).
+        bs (int): Batch size, inferred from the shape of `im0`.
+        mode (str): Current mode, set to 'image'.
+        paths (list): List of image paths or filenames.
+        count (int): Counter for iteration, initialized at 0 during `__iter__()`.
+    Methods:
+        _single_check(im, stride): Validate and possibly modify the input tensor.
+    """
+    def __init__(self, im0) -> None:
+        """Initialize Tensor Dataloader."""
+        self.im0 = self._single_check(im0)
+        self.bs = self.im0.shape[0]
+        self.mode = "image"
+        self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)]
+    @staticmethod
+    def _single_check(im, stride=32):
+        """Validate and format an image to torch.Tensor."""
+        s = (
+            f"WARNING ⚠️ torch.Tensor inputs should be BCHW i.e. shape(1, 3, 640, 640) "
+            f"divisible by stride {stride}. Input shape{tuple(im.shape)} is incompatible."
+        )
+        if len(im.shape) != 4:
+            if len(im.shape) != 3:
+                raise ValueError(s)
+            LOGGER.warning(s)
+            im = im.unsqueeze(0)
+        if im.shape[2] % stride or im.shape[3] % stride:
+            raise ValueError(s)
+        if im.max() > 1.0 + torch.finfo(im.dtype).eps:  # torch.float32 eps is 1.2e-07
+            LOGGER.warning(
+                f"WARNING ⚠️ torch.Tensor inputs should be normalized 0.0-1.0 but max value is {im.max()}. "
+                f"Dividing input by 255."
+            )
+            im = im.float() / 255.0
+        return im
+    def __iter__(self):
+        """Returns an iterator object."""
+        self.count = 0
+        return self
+    def __next__(self):
+        """Return next item in the iterator."""
+        if self.count == 1:
+            raise StopIteration
+        self.count += 1
+        return self.paths, self.im0, None, ""
+    def __len__(self):
+        """Returns the batch size."""
+        return self.bs
+def autocast_list(source):
+    """Merges a list of source of different types into a list of numpy arrays or PIL images."""
+    files = []
+    for im in source:
+        if isinstance(im, (str, Path)):  # filename or uri
+            files.append(Image.open(requests.get(im, stream=True).raw if str(im).startswith("http") else im))
+        elif isinstance(im, (Image.Image, np.ndarray)):  # PIL or np Image
+            files.append(im)
+        else:
+            raise TypeError(
+                f"type {type(im).__name__} is not a supported Ultralytics prediction source type. \n"
+                f"See https://docs.ultralytics.com/modes/predict for supported source types."
+            )
+    return files
+LOADERS = LoadStreams, LoadPilAndNumpy, LoadImages, LoadScreenshots  # tuple
+def get_best_youtube_url(url, use_pafy=True):
+    """
+    Retrieves the URL of the best quality MP4 video stream from a given YouTube video.
+    This function uses the pafy or yt_dlp library to extract the video info from YouTube. It then finds the highest
+    quality MP4 format that has video codec but no audio codec, and returns the URL of this video stream.
+    Args:
+        url (str): The URL of the YouTube video.
+        use_pafy (bool): Use the pafy package, default=True, otherwise use yt_dlp package.
+    Returns:
+        (str): The URL of the best quality MP4 video stream, or None if no suitable stream is found.
+    """
+    if use_pafy:
+        check_requirements(("pafy", "youtube_dl==2020.12.2"))
+        import pafy  # noqa
+        return pafy.new(url).getbestvideo(preftype="mp4").url
+    else:
+        check_requirements("yt-dlp")
+        import yt_dlp
+        with yt_dlp.YoutubeDL({"quiet": True}) as ydl:
+            info_dict = ydl.extract_info(url, download=False)  # extract info
+        for f in reversed(info_dict.get("formats", [])):  # reversed because best is usually last
+            # Find a format with video codec, no audio, *.mp4 extension at least 1920x1080 size
+            good_size = (f.get("width") or 0) >= 1920 or (f.get("height") or 0) >= 1080
+            if good_size and f["vcodec"] != "none" and f["acodec"] == "none" and f["ext"] == "mp4":
+                return f.get("url")

yolov8_model/ultralytics/data/scripts/download_weights.sh ADDED Viewed

	@@ -0,0 +1,18 @@

+#!/bin/bash
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Download latest models from https://github.com/ultralytics/assets/releases
+# Example usage: bash ultralytics/data/scripts/download_weights.sh
+# parent
+# └── weights
+#     ├── yolov8n.pt  ← downloads here
+#     ├── yolov8s.pt
+#     └── ...
+python - <<EOF
+from ultralytics.utils.downloads import attempt_download_asset
+assets = [f'yolov8{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '-cls', '-seg', '-pose')]
+for x in assets:
+    attempt_download_asset(f'weights/{x}')
+EOF

yolov8_model/ultralytics/data/scripts/get_coco.sh ADDED Viewed

	@@ -0,0 +1,60 @@

+#!/bin/bash
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Download COCO 2017 dataset https://cocodataset.org
+# Example usage: bash data/scripts/get_coco.sh
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco  ← downloads here
+# Arguments (optional) Usage: bash data/scripts/get_coco.sh --train --val --test --segments
+if [ "$#" -gt 0 ]; then
+  for opt in "$@"; do
+    case "${opt}" in
+    --train) train=true ;;
+    --val) val=true ;;
+    --test) test=true ;;
+    --segments) segments=true ;;
+    --sama) sama=true ;;
+    esac
+  done
+else
+  train=true
+  val=true
+  test=false
+  segments=false
+  sama=false
+fi
+# Download/unzip labels
+d='../datasets' # unzip directory
+url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
+if [ "$segments" == "true" ]; then
+  f='coco2017labels-segments.zip' # 169 MB
+elif [ "$sama" == "true" ]; then
+  f='coco2017labels-segments-sama.zip' # 199 MB https://www.sama.com/sama-coco-dataset/
+else
+  f='coco2017labels.zip' # 46 MB
+fi
+echo 'Downloading' $url$f ' ...'
+curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
+# Download/unzip images
+d='../datasets/coco/images' # unzip directory
+url=http://images.cocodataset.org/zips/
+if [ "$train" == "true" ]; then
+  f='train2017.zip' # 19G, 118k images
+  echo 'Downloading' $url$f '...'
+  curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
+fi
+if [ "$val" == "true" ]; then
+  f='val2017.zip' # 1G, 5k images
+  echo 'Downloading' $url$f '...'
+  curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
+fi
+if [ "$test" == "true" ]; then
+  f='test2017.zip' # 7G, 41k images (optional)
+  echo 'Downloading' $url$f '...'
+  curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
+fi
+wait # finish background tasks

yolov8_model/ultralytics/data/scripts/get_coco128.sh ADDED Viewed

	@@ -0,0 +1,17 @@

+#!/bin/bash
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Download COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017)
+# Example usage: bash data/scripts/get_coco128.sh
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco128  ← downloads here
+# Download/unzip images and labels
+d='../datasets' # unzip directory
+url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
+f='coco128.zip' # or 'coco128-segments.zip', 68 MB
+echo 'Downloading' $url$f ' ...'
+curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
+wait # finish background tasks

yolov8_model/ultralytics/data/scripts/get_imagenet.sh ADDED Viewed

	@@ -0,0 +1,51 @@

+#!/bin/bash
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Download ILSVRC2012 ImageNet dataset https://image-net.org
+# Example usage: bash data/scripts/get_imagenet.sh
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── imagenet  ← downloads here
+# Arguments (optional) Usage: bash data/scripts/get_imagenet.sh --train --val
+if [ "$#" -gt 0 ]; then
+  for opt in "$@"; do
+    case "${opt}" in
+    --train) train=true ;;
+    --val) val=true ;;
+    esac
+  done
+else
+  train=true
+  val=true
+fi
+# Make dir
+d='../datasets/imagenet' # unzip directory
+mkdir -p $d && cd $d
+# Download/unzip train
+if [ "$train" == "true" ]; then
+  wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar # download 138G, 1281167 images
+  mkdir train && mv ILSVRC2012_img_train.tar train/ && cd train
+  tar -xf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar
+  find . -name "*.tar" | while read NAME; do
+    mkdir -p "${NAME%.tar}"
+    tar -xf "${NAME}" -C "${NAME%.tar}"
+    rm -f "${NAME}"
+  done
+  cd ..
+fi
+# Download/unzip val
+if [ "$val" == "true" ]; then
+  wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar # download 6.3G, 50000 images
+  mkdir val && mv ILSVRC2012_img_val.tar val/ && cd val && tar -xf ILSVRC2012_img_val.tar
+  wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash # move into subdirs
+fi
+# Delete corrupted image (optional: PNG under JPEG name that may cause dataloaders to fail)
+# rm train/n04266014/n04266014_10835.JPEG
+# TFRecords (optional)
+# wget https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/imagenet_lsvrc_2015_synsets.txt

yolov8_model/ultralytics/data/split_dota.py ADDED Viewed

	@@ -0,0 +1,288 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+import itertools
+from glob import glob
+from math import ceil
+from pathlib import Path
+import cv2
+import numpy as np
+from PIL import Image
+from tqdm import tqdm
+from yolov8_model.ultralytics.data.utils import exif_size, img2label_paths
+from yolov8_model.ultralytics.utils.checks import check_requirements
+check_requirements("shapely")
+from shapely.geometry import Polygon
+def bbox_iof(polygon1, bbox2, eps=1e-6):
+    """
+    Calculate iofs between bbox1 and bbox2.
+    Args:
+        polygon1 (np.ndarray): Polygon coordinates, (n, 8).
+        bbox2 (np.ndarray): Bounding boxes, (n ,4).
+    """
+    polygon1 = polygon1.reshape(-1, 4, 2)
+    lt_point = np.min(polygon1, axis=-2)
+    rb_point = np.max(polygon1, axis=-2)
+    bbox1 = np.concatenate([lt_point, rb_point], axis=-1)
+    lt = np.maximum(bbox1[:, None, :2], bbox2[..., :2])
+    rb = np.minimum(bbox1[:, None, 2:], bbox2[..., 2:])
+    wh = np.clip(rb - lt, 0, np.inf)
+    h_overlaps = wh[..., 0] * wh[..., 1]
+    l, t, r, b = (bbox2[..., i] for i in range(4))
+    polygon2 = np.stack([l, t, r, t, r, b, l, b], axis=-1).reshape(-1, 4, 2)
+    sg_polys1 = [Polygon(p) for p in polygon1]
+    sg_polys2 = [Polygon(p) for p in polygon2]
+    overlaps = np.zeros(h_overlaps.shape)
+    for p in zip(*np.nonzero(h_overlaps)):
+        overlaps[p] = sg_polys1[p[0]].intersection(sg_polys2[p[-1]]).area
+    unions = np.array([p.area for p in sg_polys1], dtype=np.float32)
+    unions = unions[..., None]
+    unions = np.clip(unions, eps, np.inf)
+    outputs = overlaps / unions
+    if outputs.ndim == 1:
+        outputs = outputs[..., None]
+    return outputs
+def load_yolo_dota(data_root, split="train"):
+    """
+    Load DOTA dataset.
+    Args:
+        data_root (str): Data root.
+        split (str): The split data set, could be train or val.
+    Notes:
+        The directory structure assumed for the DOTA dataset:
+            - data_root
+                - images
+                    - train
+                    - val
+                - labels
+                    - train
+                    - val
+    """
+    assert split in ["train", "val"]
+    im_dir = Path(data_root) / "images" / split
+    assert im_dir.exists(), f"Can't find {im_dir}, please check your data root."
+    im_files = glob(str(Path(data_root) / "images" / split / "*"))
+    lb_files = img2label_paths(im_files)
+    annos = []
+    for im_file, lb_file in zip(im_files, lb_files):
+        w, h = exif_size(Image.open(im_file))
+        with open(lb_file) as f:
+            lb = [x.split() for x in f.read().strip().splitlines() if len(x)]
+            lb = np.array(lb, dtype=np.float32)
+        annos.append(dict(ori_size=(h, w), label=lb, filepath=im_file))
+    return annos
+def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.01):
+    """
+    Get the coordinates of windows.
+    Args:
+        im_size (tuple): Original image size, (h, w).
+        crop_sizes (List(int)): Crop size of windows.
+        gaps (List(int)): Gap between crops.
+        im_rate_thr (float): Threshold of windows areas divided by image ares.
+    """
+    h, w = im_size
+    windows = []
+    for crop_size, gap in zip(crop_sizes, gaps):
+        assert crop_size > gap, f"invalid crop_size gap pair [{crop_size} {gap}]"
+        step = crop_size - gap
+        xn = 1 if w <= crop_size else ceil((w - crop_size) / step + 1)
+        xs = [step * i for i in range(xn)]
+        if len(xs) > 1 and xs[-1] + crop_size > w:
+            xs[-1] = w - crop_size
+        yn = 1 if h <= crop_size else ceil((h - crop_size) / step + 1)
+        ys = [step * i for i in range(yn)]
+        if len(ys) > 1 and ys[-1] + crop_size > h:
+            ys[-1] = h - crop_size
+        start = np.array(list(itertools.product(xs, ys)), dtype=np.int64)
+        stop = start + crop_size
+        windows.append(np.concatenate([start, stop], axis=1))
+    windows = np.concatenate(windows, axis=0)
+    im_in_wins = windows.copy()
+    im_in_wins[:, 0::2] = np.clip(im_in_wins[:, 0::2], 0, w)
+    im_in_wins[:, 1::2] = np.clip(im_in_wins[:, 1::2], 0, h)
+    im_areas = (im_in_wins[:, 2] - im_in_wins[:, 0]) * (im_in_wins[:, 3] - im_in_wins[:, 1])
+    win_areas = (windows[:, 2] - windows[:, 0]) * (windows[:, 3] - windows[:, 1])
+    im_rates = im_areas / win_areas
+    if not (im_rates > im_rate_thr).any():
+        max_rate = im_rates.max()
+        im_rates[abs(im_rates - max_rate) < eps] = 1
+    return windows[im_rates > im_rate_thr]
+def get_window_obj(anno, windows, iof_thr=0.7):
+    """Get objects for each window."""
+    h, w = anno["ori_size"]
+    label = anno["label"]
+    if len(label):
+        label[:, 1::2] *= w
+        label[:, 2::2] *= h
+        iofs = bbox_iof(label[:, 1:], windows)
+        # Unnormalized and misaligned coordinates
+        return [(label[iofs[:, i] >= iof_thr]) for i in range(len(windows))]  # window_anns
+    else:
+        return [np.zeros((0, 9), dtype=np.float32) for _ in range(len(windows))]  # window_anns
+def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
+    """
+    Crop images and save new labels.
+    Args:
+        anno (dict): Annotation dict, including `filepath`, `label`, `ori_size` as its keys.
+        windows (list): A list of windows coordinates.
+        window_objs (list): A list of labels inside each window.
+        im_dir (str): The output directory path of images.
+        lb_dir (str): The output directory path of labels.
+    Notes:
+        The directory structure assumed for the DOTA dataset:
+            - data_root
+                - images
+                    - train
+                    - val
+                - labels
+                    - train
+                    - val
+    """
+    im = cv2.imread(anno["filepath"])
+    name = Path(anno["filepath"]).stem
+    for i, window in enumerate(windows):
+        x_start, y_start, x_stop, y_stop = window.tolist()
+        new_name = f"{name}__{x_stop - x_start}__{x_start}___{y_start}"
+        patch_im = im[y_start:y_stop, x_start:x_stop]
+        ph, pw = patch_im.shape[:2]
+        cv2.imwrite(str(Path(im_dir) / f"{new_name}.jpg"), patch_im)
+        label = window_objs[i]
+        if len(label) == 0:
+            continue
+        label[:, 1::2] -= x_start
+        label[:, 2::2] -= y_start
+        label[:, 1::2] /= pw
+        label[:, 2::2] /= ph
+        with open(Path(lb_dir) / f"{new_name}.txt", "w") as f:
+            for lb in label:
+                formatted_coords = ["{:.6g}".format(coord) for coord in lb[1:]]
+                f.write(f"{int(lb[0])} {' '.join(formatted_coords)}\n")
+def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=[1024], gaps=[200]):
+    """
+    Split both images and labels.
+    Notes:
+        The directory structure assumed for the DOTA dataset:
+            - data_root
+                - images
+                    - split
+                - labels
+                    - split
+        and the output directory structure is:
+            - save_dir
+                - images
+                    - split
+                - labels
+                    - split
+    """
+    im_dir = Path(save_dir) / "images" / split
+    im_dir.mkdir(parents=True, exist_ok=True)
+    lb_dir = Path(save_dir) / "labels" / split
+    lb_dir.mkdir(parents=True, exist_ok=True)
+    annos = load_yolo_dota(data_root, split=split)
+    for anno in tqdm(annos, total=len(annos), desc=split):
+        windows = get_windows(anno["ori_size"], crop_sizes, gaps)
+        window_objs = get_window_obj(anno, windows)
+        crop_and_save(anno, windows, window_objs, str(im_dir), str(lb_dir))
+def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
+    """
+    Split train and val set of DOTA.
+    Notes:
+        The directory structure assumed for the DOTA dataset:
+            - data_root
+                - images
+                    - train
+                    - val
+                - labels
+                    - train
+                    - val
+        and the output directory structure is:
+            - save_dir
+                - images
+                    - train
+                    - val
+                - labels
+                    - train
+                    - val
+    """
+    crop_sizes, gaps = [], []
+    for r in rates:
+        crop_sizes.append(int(crop_size / r))
+        gaps.append(int(gap / r))
+    for split in ["train", "val"]:
+        split_images_and_labels(data_root, save_dir, split, crop_sizes, gaps)
+def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
+    """
+    Split test set of DOTA, labels are not included within this set.
+    Notes:
+        The directory structure assumed for the DOTA dataset:
+            - data_root
+                - images
+                    - test
+        and the output directory structure is:
+            - save_dir
+                - images
+                    - test
+    """
+    crop_sizes, gaps = [], []
+    for r in rates:
+        crop_sizes.append(int(crop_size / r))
+        gaps.append(int(gap / r))
+    save_dir = Path(save_dir) / "images" / "test"
+    save_dir.mkdir(parents=True, exist_ok=True)
+    im_dir = Path(data_root) / "images" / "test"
+    assert im_dir.exists(), f"Can't find {im_dir}, please check your data root."
+    im_files = glob(str(im_dir / "*"))
+    for im_file in tqdm(im_files, total=len(im_files), desc="test"):
+        w, h = exif_size(Image.open(im_file))
+        windows = get_windows((h, w), crop_sizes=crop_sizes, gaps=gaps)
+        im = cv2.imread(im_file)
+        name = Path(im_file).stem
+        for window in windows:
+            x_start, y_start, x_stop, y_stop = window.tolist()
+            new_name = f"{name}__{x_stop - x_start}__{x_start}___{y_start}"
+            patch_im = im[y_start:y_stop, x_start:x_stop]
+            cv2.imwrite(str(save_dir / f"{new_name}.jpg"), patch_im)
+if __name__ == "__main__":
+    split_trainval(data_root="DOTAv2", save_dir="DOTAv2-split")
+    split_test(data_root="DOTAv2", save_dir="DOTAv2-split")

yolov8_model/ultralytics/data/utils.py ADDED Viewed

	@@ -0,0 +1,647 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+import contextlib
+import hashlib
+import json
+import os
+import random
+import subprocess
+import time
+import zipfile
+from multiprocessing.pool import ThreadPool
+from pathlib import Path
+from tarfile import is_tarfile
+import cv2
+import numpy as np
+from PIL import Image, ImageOps
+from yolov8_model.ultralytics.nn.autobackend import check_class_names
+from yolov8_model.ultralytics.utils import (
+    DATASETS_DIR,
+    LOGGER,
+    NUM_THREADS,
+    ROOT,
+    SETTINGS_YAML,
+    TQDM,
+    clean_url,
+    colorstr,
+    emojis,
+    yaml_load,
+    yaml_save,
+)
+from yolov8_model.ultralytics.utils.checks import check_file, check_font, is_ascii
+from yolov8_model.ultralytics.utils.downloads import download, safe_download, unzip_file
+from yolov8_model.ultralytics.utils.ops import segments2boxes
+HELP_URL = "See https://docs.ultralytics.com/datasets/detect for dataset formatting guidance."
+IMG_FORMATS = "bmp", "dng", "jpeg", "jpg", "mpo", "png", "tif", "tiff", "webp", "pfm"  # image suffixes
+VID_FORMATS = "asf", "avi", "gif", "m4v", "mkv", "mov", "mp4", "mpeg", "mpg", "ts", "wmv", "webm"  # video suffixes
+PIN_MEMORY = str(os.getenv("PIN_MEMORY", True)).lower() == "true"  # global pin_memory for dataloaders
+def img2label_paths(img_paths):
+    """Define label paths as a function of image paths."""
+    sa, sb = f"{os.sep}images{os.sep}", f"{os.sep}labels{os.sep}"  # /images/, /labels/ substrings
+    return [sb.join(x.rsplit(sa, 1)).rsplit(".", 1)[0] + ".txt" for x in img_paths]
+def get_hash(paths):
+    """Returns a single hash value of a list of paths (files or dirs)."""
+    size = sum(os.path.getsize(p) for p in paths if os.path.exists(p))  # sizes
+    h = hashlib.sha256(str(size).encode())  # hash sizes
+    h.update("".join(paths).encode())  # hash paths
+    return h.hexdigest()  # return hash
+def exif_size(img: Image.Image):
+    """Returns exif-corrected PIL size."""
+    s = img.size  # (width, height)
+    if img.format == "JPEG":  # only support JPEG images
+        with contextlib.suppress(Exception):
+            exif = img.getexif()
+            if exif:
+                rotation = exif.get(274, None)  # the EXIF key for the orientation tag is 274
+                if rotation in [6, 8]:  # rotation 270 or 90
+                    s = s[1], s[0]
+    return s
+def verify_image(args):
+    """Verify one image."""
+    (im_file, cls), prefix = args
+    # Number (found, corrupt), message
+    nf, nc, msg = 0, 0, ""
+    try:
+        im = Image.open(im_file)
+        im.verify()  # PIL verify
+        shape = exif_size(im)  # image size
+        shape = (shape[1], shape[0])  # hw
+        assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels"
+        assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}"
+        if im.format.lower() in ("jpg", "jpeg"):
+            with open(im_file, "rb") as f:
+                f.seek(-2, 2)
+                if f.read() != b"\xff\xd9":  # corrupt JPEG
+                    ImageOps.exif_transpose(Image.open(im_file)).save(im_file, "JPEG", subsampling=0, quality=100)
+                    msg = f"{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved"
+        nf = 1
+    except Exception as e:
+        nc = 1
+        msg = f"{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}"
+    return (im_file, cls), nf, nc, msg
+def verify_image_label(args):
+    """Verify one image-label pair."""
+    im_file, lb_file, prefix, keypoint, num_cls, nkpt, ndim = args
+    # Number (missing, found, empty, corrupt), message, segments, keypoints
+    nm, nf, ne, nc, msg, segments, keypoints = 0, 0, 0, 0, "", [], None
+    try:
+        # Verify images
+        im = Image.open(im_file)
+        im.verify()  # PIL verify
+        shape = exif_size(im)  # image size
+        shape = (shape[1], shape[0])  # hw
+        assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels"
+        assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}"
+        if im.format.lower() in ("jpg", "jpeg"):
+            with open(im_file, "rb") as f:
+                f.seek(-2, 2)
+                if f.read() != b"\xff\xd9":  # corrupt JPEG
+                    ImageOps.exif_transpose(Image.open(im_file)).save(im_file, "JPEG", subsampling=0, quality=100)
+                    msg = f"{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved"
+        # Verify labels
+        if os.path.isfile(lb_file):
+            nf = 1  # label found
+            with open(lb_file) as f:
+                lb = [x.split() for x in f.read().strip().splitlines() if len(x)]
+                if any(len(x) > 6 for x in lb) and (not keypoint):  # is segment
+                    classes = np.array([x[0] for x in lb], dtype=np.float32)
+                    segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb]  # (cls, xy1...)
+                    lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1)  # (cls, xywh)
+                lb = np.array(lb, dtype=np.float32)
+            nl = len(lb)
+            if nl:
+                if keypoint:
+                    assert lb.shape[1] == (5 + nkpt * ndim), f"labels require {(5 + nkpt * ndim)} columns each"
+                    points = lb[:, 5:].reshape(-1, ndim)[:, :2]
+                else:
+                    assert lb.shape[1] == 5, f"labels require 5 columns, {lb.shape[1]} columns detected"
+                    points = lb[:, 1:]
+                assert points.max() <= 1, f"non-normalized or out of bounds coordinates {points[points > 1]}"
+                assert lb.min() >= 0, f"negative label values {lb[lb < 0]}"
+                # All labels
+                max_cls = lb[:, 0].max()  # max label count
+                assert max_cls <= num_cls, (
+                    f"Label class {int(max_cls)} exceeds dataset class count {num_cls}. "
+                    f"Possible class labels are 0-{num_cls - 1}"
+                )
+                _, i = np.unique(lb, axis=0, return_index=True)
+                if len(i) < nl:  # duplicate row check
+                    lb = lb[i]  # remove duplicates
+                    if segments:
+                        segments = [segments[x] for x in i]
+                    msg = f"{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed"
+            else:
+                ne = 1  # label empty
+                lb = np.zeros((0, (5 + nkpt * ndim) if keypoint else 5), dtype=np.float32)
+        else:
+            nm = 1  # label missing
+            lb = np.zeros((0, (5 + nkpt * ndim) if keypoints else 5), dtype=np.float32)
+        if keypoint:
+            keypoints = lb[:, 5:].reshape(-1, nkpt, ndim)
+            if ndim == 2:
+                kpt_mask = np.where((keypoints[..., 0] < 0) | (keypoints[..., 1] < 0), 0.0, 1.0).astype(np.float32)
+                keypoints = np.concatenate([keypoints, kpt_mask[..., None]], axis=-1)  # (nl, nkpt, 3)
+        lb = lb[:, :5]
+        return im_file, lb, shape, segments, keypoints, nm, nf, ne, nc, msg
+    except Exception as e:
+        nc = 1
+        msg = f"{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}"
+        return [None, None, None, None, None, nm, nf, ne, nc, msg]
+def polygon2mask(imgsz, polygons, color=1, downsample_ratio=1):
+    """
+    Convert a list of polygons to a binary mask of the specified image size.
+    Args:
+        imgsz (tuple): The size of the image as (height, width).
+        polygons (list[np.ndarray]): A list of polygons. Each polygon is an array with shape [N, M], where
+                                     N is the number of polygons, and M is the number of points such that M % 2 = 0.
+        color (int, optional): The color value to fill in the polygons on the mask. Defaults to 1.
+        downsample_ratio (int, optional): Factor by which to downsample the mask. Defaults to 1.
+    Returns:
+        (np.ndarray): A binary mask of the specified image size with the polygons filled in.
+    """
+    mask = np.zeros(imgsz, dtype=np.uint8)
+    polygons = np.asarray(polygons, dtype=np.int32)
+    polygons = polygons.reshape((polygons.shape[0], -1, 2))
+    cv2.fillPoly(mask, polygons, color=color)
+    nh, nw = (imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio)
+    # Note: fillPoly first then resize is trying to keep the same loss calculation method when mask-ratio=1
+    return cv2.resize(mask, (nw, nh))
+def polygons2masks(imgsz, polygons, color, downsample_ratio=1):
+    """
+    Convert a list of polygons to a set of binary masks of the specified image size.
+    Args:
+        imgsz (tuple): The size of the image as (height, width).
+        polygons (list[np.ndarray]): A list of polygons. Each polygon is an array with shape [N, M], where
+                                     N is the number of polygons, and M is the number of points such that M % 2 = 0.
+        color (int): The color value to fill in the polygons on the masks.
+        downsample_ratio (int, optional): Factor by which to downsample each mask. Defaults to 1.
+    Returns:
+        (np.ndarray): A set of binary masks of the specified image size with the polygons filled in.
+    """
+    return np.array([polygon2mask(imgsz, [x.reshape(-1)], color, downsample_ratio) for x in polygons])
+def polygons2masks_overlap(imgsz, segments, downsample_ratio=1):
+    """Return a (640, 640) overlap mask."""
+    masks = np.zeros(
+        (imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio),
+        dtype=np.int32 if len(segments) > 255 else np.uint8,
+    )
+    areas = []
+    ms = []
+    for si in range(len(segments)):
+        mask = polygon2mask(imgsz, [segments[si].reshape(-1)], downsample_ratio=downsample_ratio, color=1)
+        ms.append(mask)
+        areas.append(mask.sum())
+    areas = np.asarray(areas)
+    index = np.argsort(-areas)
+    ms = np.array(ms)[index]
+    for i in range(len(segments)):
+        mask = ms[i] * (i + 1)
+        masks = masks + mask
+        masks = np.clip(masks, a_min=0, a_max=i + 1)
+    return masks, index
+def find_dataset_yaml(path: Path) -> Path:
+    """
+    Find and return the YAML file associated with a Detect, Segment or Pose dataset.
+    This function searches for a YAML file at the root level of the provided directory first, and if not found, it
+    performs a recursive search. It prefers YAML files that have the same stem as the provided path. An AssertionError
+    is raised if no YAML file is found or if multiple YAML files are found.
+    Args:
+        path (Path): The directory path to search for the YAML file.
+    Returns:
+        (Path): The path of the found YAML file.
+    """
+    files = list(path.glob("*.yaml")) or list(path.rglob("*.yaml"))  # try root level first and then recursive
+    assert files, f"No YAML file found in '{path.resolve()}'"
+    if len(files) > 1:
+        files = [f for f in files if f.stem == path.stem]  # prefer *.yaml files that match
+    assert len(files) == 1, f"Expected 1 YAML file in '{path.resolve()}', but found {len(files)}.\n{files}"
+    return files[0]
+def check_det_dataset(dataset, autodownload=True):
+    """
+    Download, verify, and/or unzip a dataset if not found locally.
+    This function checks the availability of a specified dataset, and if not found, it has the option to download and
+    unzip the dataset. It then reads and parses the accompanying YAML data, ensuring key requirements are met and also
+    resolves paths related to the dataset.
+    Args:
+        dataset (str): Path to the dataset or dataset descriptor (like a YAML file).
+        autodownload (bool, optional): Whether to automatically download the dataset if not found. Defaults to True.
+    Returns:
+        (dict): Parsed dataset information and paths.
+    """
+    file = check_file(dataset)
+    # Download (optional)
+    extract_dir = ""
+    if zipfile.is_zipfile(file) or is_tarfile(file):
+        new_dir = safe_download(file, dir=DATASETS_DIR, unzip=True, delete=False)
+        file = find_dataset_yaml(DATASETS_DIR / new_dir)
+        extract_dir, autodownload = file.parent, False
+    # Read YAML
+    data = yaml_load(file, append_filename=True)  # dictionary
+    # Checks
+    for k in "train", "val":
+        if k not in data:
+            if k != "val" or "validation" not in data:
+                raise SyntaxError(
+                    emojis(f"{dataset} '{k}:' key missing ❌.\n'train' and 'val' are required in all data YAMLs.")
+                )
+            LOGGER.info("WARNING ⚠️ renaming data YAML 'validation' key to 'val' to match YOLO format.")
+            data["val"] = data.pop("validation")  # replace 'validation' key with 'val' key
+    if "names" not in data and "nc" not in data:
+        raise SyntaxError(emojis(f"{dataset} key missing ❌.\n either 'names' or 'nc' are required in all data YAMLs."))
+    if "names" in data and "nc" in data and len(data["names"]) != data["nc"]:
+        raise SyntaxError(emojis(f"{dataset} 'names' length {len(data['names'])} and 'nc: {data['nc']}' must match."))
+    if "names" not in data:
+        data["names"] = [f"class_{i}" for i in range(data["nc"])]
+    else:
+        data["nc"] = len(data["names"])
+    data["names"] = check_class_names(data["names"])
+    # Resolve paths
+    path = Path(extract_dir or data.get("path") or Path(data.get("yaml_file", "")).parent)  # dataset root
+    if not path.is_absolute():
+        path = (DATASETS_DIR / path).resolve()
+    # Set paths
+    data["path"] = path  # download scripts
+    for k in "train", "val", "test":
+        if data.get(k):  # prepend path
+            if isinstance(data[k], str):
+                x = (path / data[k]).resolve()
+                if not x.exists() and data[k].startswith("../"):
+                    x = (path / data[k][3:]).resolve()
+                data[k] = str(x)
+            else:
+                data[k] = [str((path / x).resolve()) for x in data[k]]
+    # Parse YAML
+    val, s = (data.get(x) for x in ("val", "download"))
+    if val:
+        val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])]  # val path
+        if not all(x.exists() for x in val):
+            name = clean_url(dataset)  # dataset name with URL auth stripped
+            m = f"\nDataset '{name}' images not found ⚠️, missing path '{[x for x in val if not x.exists()][0]}'"
+            if s and autodownload:
+                LOGGER.warning(m)
+            else:
+                m += f"\nNote dataset download directory is '{DATASETS_DIR}'. You can update this in '{SETTINGS_YAML}'"
+                raise FileNotFoundError(m)
+            t = time.time()
+            r = None  # success
+            if s.startswith("http") and s.endswith(".zip"):  # URL
+                safe_download(url=s, dir=DATASETS_DIR, delete=True)
+            elif s.startswith("bash "):  # bash script
+                LOGGER.info(f"Running {s} ...")
+                r = os.system(s)
+            else:  # python script
+                exec(s, {"yaml": data})
+            dt = f"({round(time.time() - t, 1)}s)"
+            s = f"success ✅ {dt}, saved to {colorstr('bold', DATASETS_DIR)}" if r in (0, None) else f"failure {dt} ❌"
+            LOGGER.info(f"Dataset download {s}\n")
+    check_font("Arial.ttf" if is_ascii(data["names"]) else "Arial.Unicode.ttf")  # download fonts
+    return data  # dictionary
+def check_cls_dataset(dataset, split=""):
+    """
+    Checks a classification dataset such as Imagenet.
+    This function accepts a `dataset` name and attempts to retrieve the corresponding dataset information.
+    If the dataset is not found locally, it attempts to download the dataset from the internet and save it locally.
+    Args:
+        dataset (str | Path): The name of the dataset.
+        split (str, optional): The split of the dataset. Either 'val', 'test', or ''. Defaults to ''.
+    Returns:
+        (dict): A dictionary containing the following keys:
+            - 'train' (Path): The directory path containing the training set of the dataset.
+            - 'val' (Path): The directory path containing the validation set of the dataset.
+            - 'test' (Path): The directory path containing the test set of the dataset.
+            - 'nc' (int): The number of classes in the dataset.
+            - 'names' (dict): A dictionary of class names in the dataset.
+    """
+    # Download (optional if dataset=https://file.zip is passed directly)
+    if str(dataset).startswith(("http:/", "https:/")):
+        dataset = safe_download(dataset, dir=DATASETS_DIR, unzip=True, delete=False)
+    dataset = Path(dataset)
+    data_dir = (dataset if dataset.is_dir() else (DATASETS_DIR / dataset)).resolve()
+    if not data_dir.is_dir():
+        LOGGER.warning(f"\nDataset not found ⚠️, missing path {data_dir}, attempting download...")
+        t = time.time()
+        if str(dataset) == "imagenet":
+            subprocess.run(f"bash {ROOT / 'data/scripts/get_imagenet.sh'}", shell=True, check=True)
+        else:
+            url = f"https://github.com/ultralytics/yolov5/releases/download/v1.0/{dataset}.zip"
+            download(url, dir=data_dir.parent)
+        s = f"Dataset download success ✅ ({time.time() - t:.1f}s), saved to {colorstr('bold', data_dir)}\n"
+        LOGGER.info(s)
+    train_set = data_dir / "train"
+    val_set = (
+        data_dir / "val"
+        if (data_dir / "val").exists()
+        else data_dir / "validation"
+        if (data_dir / "validation").exists()
+        else None
+    )  # data/test or data/val
+    test_set = data_dir / "test" if (data_dir / "test").exists() else None  # data/val or data/test
+    if split == "val" and not val_set:
+        LOGGER.warning("WARNING ⚠️ Dataset 'split=val' not found, using 'split=test' instead.")
+    elif split == "test" and not test_set:
+        LOGGER.warning("WARNING ⚠️ Dataset 'split=test' not found, using 'split=val' instead.")
+    nc = len([x for x in (data_dir / "train").glob("*") if x.is_dir()])  # number of classes
+    names = [x.name for x in (data_dir / "train").iterdir() if x.is_dir()]  # class names list
+    names = dict(enumerate(sorted(names)))
+    # Print to console
+    for k, v in {"train": train_set, "val": val_set, "test": test_set}.items():
+        prefix = f'{colorstr(f"{k}:")} {v}...'
+        if v is None:
+            LOGGER.info(prefix)
+        else:
+            files = [path for path in v.rglob("*.*") if path.suffix[1:].lower() in IMG_FORMATS]
+            nf = len(files)  # number of files
+            nd = len({file.parent for file in files})  # number of directories
+            if nf == 0:
+                if k == "train":
+                    raise FileNotFoundError(emojis(f"{dataset} '{k}:' no training images found ❌ "))
+                else:
+                    LOGGER.warning(f"{prefix} found {nf} images in {nd} classes: WARNING ⚠️ no images found")
+            elif nd != nc:
+                LOGGER.warning(f"{prefix} found {nf} images in {nd} classes: ERROR ❌️ requires {nc} classes, not {nd}")
+            else:
+                LOGGER.info(f"{prefix} found {nf} images in {nd} classes ✅ ")
+    return {"train": train_set, "val": val_set, "test": test_set, "nc": nc, "names": names}
+class HUBDatasetStats:
+    """
+    A class for generating HUB dataset JSON and `-hub` dataset directory.
+    Args:
+        path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco8.yaml'.
+        task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Default is 'detect'.
+        autodownload (bool): Attempt to download dataset if not found locally. Default is False.
+    Example:
+        Download *.zip files from https://github.com/ultralytics/hub/tree/main/example_datasets
+            i.e. https://github.com/ultralytics/hub/raw/main/example_datasets/coco8.zip for coco8.zip.
+        ```python
+        from ultralytics.data.utils import HUBDatasetStats
+        stats = HUBDatasetStats('path/to/coco8.zip', task='detect')  # detect dataset
+        stats = HUBDatasetStats('path/to/coco8-seg.zip', task='segment')  # segment dataset
+        stats = HUBDatasetStats('path/to/coco8-pose.zip', task='pose')  # pose dataset
+        stats = HUBDatasetStats('path/to/imagenet10.zip', task='classify')  # classification dataset
+        stats.get_json(save=True)
+        stats.process_images()
+        ```
+    """
+    def __init__(self, path="coco8.yaml", task="detect", autodownload=False):
+        """Initialize class."""
+        path = Path(path).resolve()
+        LOGGER.info(f"Starting HUB dataset checks for {path}....")
+        self.task = task  # detect, segment, pose, classify
+        if self.task == "classify":
+            unzip_dir = unzip_file(path)
+            data = check_cls_dataset(unzip_dir)
+            data["path"] = unzip_dir
+        else:  # detect, segment, pose
+            _, data_dir, yaml_path = self._unzip(Path(path))
+            try:
+                # Load YAML with checks
+                data = yaml_load(yaml_path)
+                data["path"] = ""  # strip path since YAML should be in dataset root for all HUB datasets
+                yaml_save(yaml_path, data)
+                data = check_det_dataset(yaml_path, autodownload)  # dict
+                data["path"] = data_dir  # YAML path should be set to '' (relative) or parent (absolute)
+            except Exception as e:
+                raise Exception("error/HUB/dataset_stats/init") from e
+        self.hub_dir = Path(f'{data["path"]}-hub')
+        self.im_dir = self.hub_dir / "images"
+        self.im_dir.mkdir(parents=True, exist_ok=True)  # makes /images
+        self.stats = {"nc": len(data["names"]), "names": list(data["names"].values())}  # statistics dictionary
+        self.data = data
+    @staticmethod
+    def _unzip(path):
+        """Unzip data.zip."""
+        if not str(path).endswith(".zip"):  # path is data.yaml
+            return False, None, path
+        unzip_dir = unzip_file(path, path=path.parent)
+        assert unzip_dir.is_dir(), (
+            f"Error unzipping {path}, {unzip_dir} not found. " f"path/to/abc.zip MUST unzip to path/to/abc/"
+        )
+        return True, str(unzip_dir), find_dataset_yaml(unzip_dir)  # zipped, data_dir, yaml_path
+    def _hub_ops(self, f):
+        """Saves a compressed image for HUB previews."""
+        compress_one_image(f, self.im_dir / Path(f).name)  # save to dataset-hub
+    def get_json(self, save=False, verbose=False):
+        """Return dataset JSON for Ultralytics HUB."""
+        def _round(labels):
+            """Update labels to integer class and 4 decimal place floats."""
+            if self.task == "detect":
+                coordinates = labels["bboxes"]
+            elif self.task == "segment":
+                coordinates = [x.flatten() for x in labels["segments"]]
+            elif self.task == "pose":
+                n = labels["keypoints"].shape[0]
+                coordinates = np.concatenate((labels["bboxes"], labels["keypoints"].reshape(n, -1)), 1)
+            else:
+                raise ValueError("Undefined dataset task.")
+            zipped = zip(labels["cls"], coordinates)
+            return [[int(c[0]), *(round(float(x), 4) for x in points)] for c, points in zipped]
+        for split in "train", "val", "test":
+            self.stats[split] = None  # predefine
+            path = self.data.get(split)
+            # Check split
+            if path is None:  # no split
+                continue
+            files = [f for f in Path(path).rglob("*.*") if f.suffix[1:].lower() in IMG_FORMATS]  # image files in split
+            if not files:  # no images
+                continue
+            # Get dataset statistics
+            if self.task == "classify":
+                from torchvision.datasets import ImageFolder
+                dataset = ImageFolder(self.data[split])
+                x = np.zeros(len(dataset.classes)).astype(int)
+                for im in dataset.imgs:
+                    x[im[1]] += 1
+                self.stats[split] = {
+                    "instance_stats": {"total": len(dataset), "per_class": x.tolist()},
+                    "image_stats": {"total": len(dataset), "unlabelled": 0, "per_class": x.tolist()},
+                    "labels": [{Path(k).name: v} for k, v in dataset.imgs],
+                }
+            else:
+                from yolov8_model.ultralytics.data import YOLODataset
+                dataset = YOLODataset(img_path=self.data[split], data=self.data, task=self.task)
+                x = np.array(
+                    [
+                        np.bincount(label["cls"].astype(int).flatten(), minlength=self.data["nc"])
+                        for label in TQDM(dataset.labels, total=len(dataset), desc="Statistics")
+                    ]
+                )  # shape(128x80)
+                self.stats[split] = {
+                    "instance_stats": {"total": int(x.sum()), "per_class": x.sum(0).tolist()},
+                    "image_stats": {
+                        "total": len(dataset),
+                        "unlabelled": int(np.all(x == 0, 1).sum()),
+                        "per_class": (x > 0).sum(0).tolist(),
+                    },
+                    "labels": [{Path(k).name: _round(v)} for k, v in zip(dataset.im_files, dataset.labels)],
+                }
+        # Save, print and return
+        if save:
+            stats_path = self.hub_dir / "stats.json"
+            LOGGER.info(f"Saving {stats_path.resolve()}...")
+            with open(stats_path, "w") as f:
+                json.dump(self.stats, f)  # save stats.json
+        if verbose:
+            LOGGER.info(json.dumps(self.stats, indent=2, sort_keys=False))
+        return self.stats
+    def process_images(self):
+        """Compress images for Ultralytics HUB."""
+        from yolov8_model.ultralytics.data import YOLODataset  # ClassificationDataset
+        for split in "train", "val", "test":
+            if self.data.get(split) is None:
+                continue
+            dataset = YOLODataset(img_path=self.data[split], data=self.data)
+            with ThreadPool(NUM_THREADS) as pool:
+                for _ in TQDM(pool.imap(self._hub_ops, dataset.im_files), total=len(dataset), desc=f"{split} images"):
+                    pass
+        LOGGER.info(f"Done. All images saved to {self.im_dir}")
+        return self.im_dir
+def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
+    """
+    Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the Python
+    Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will not be
+    resized.
+    Args:
+        f (str): The path to the input image file.
+        f_new (str, optional): The path to the output image file. If not specified, the input file will be overwritten.
+        max_dim (int, optional): The maximum dimension (width or height) of the output image. Default is 1920 pixels.
+        quality (int, optional): The image compression quality as a percentage. Default is 50%.
+    Example:
+        ```python
+        from pathlib import Path
+        from ultralytics.data.utils import compress_one_image
+        for f in Path('path/to/dataset').rglob('*.jpg'):
+            compress_one_image(f)
+        ```
+    """
+    try:  # use PIL
+        im = Image.open(f)
+        r = max_dim / max(im.height, im.width)  # ratio
+        if r < 1.0:  # image too large
+            im = im.resize((int(im.width * r), int(im.height * r)))
+        im.save(f_new or f, "JPEG", quality=quality, optimize=True)  # save
+    except Exception as e:  # use OpenCV
+        LOGGER.info(f"WARNING ⚠️ HUB ops PIL failure {f}: {e}")
+        im = cv2.imread(f)
+        im_height, im_width = im.shape[:2]
+        r = max_dim / max(im_height, im_width)  # ratio
+        if r < 1.0:  # image too large
+            im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA)
+        cv2.imwrite(str(f_new or f), im)
+def autosplit(path=DATASETS_DIR / "coco8/images", weights=(0.9, 0.1, 0.0), annotated_only=False):
+    """
+    Automatically split a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files.
+    Args:
+        path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco8/images'.
+        weights (list | tuple, optional): Train, validation, and test split fractions. Defaults to (0.9, 0.1, 0.0).
+        annotated_only (bool, optional): If True, only images with an associated txt file are used. Defaults to False.
+    Example:
+        ```python
+        from ultralytics.data.utils import autosplit
+        autosplit()
+        ```
+    """
+    path = Path(path)  # images dir
+    files = sorted(x for x in path.rglob("*.*") if x.suffix[1:].lower() in IMG_FORMATS)  # image files only
+    n = len(files)  # number of files
+    random.seed(0)  # for reproducibility
+    indices = random.choices([0, 1, 2], weights=weights, k=n)  # assign each image to a split
+    txt = ["autosplit_train.txt", "autosplit_val.txt", "autosplit_test.txt"]  # 3 txt files
+    for x in txt:
+        if (path.parent / x).exists():
+            (path.parent / x).unlink()  # remove existing
+    LOGGER.info(f"Autosplitting images from {path}" + ", using *.txt labeled images only" * annotated_only)
+    for i, img in TQDM(zip(indices, files), total=n):
+        if not annotated_only or Path(img2label_paths([str(img)])[0]).exists():  # check label
+            with open(path.parent / txt[i], "a") as f:
+                f.write(f"./{img.relative_to(path.parent).as_posix()}" + "\n")  # add image to txt file

yolov8_model/ultralytics/engine/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Ultralytics YOLO 🚀, AGPL-3.0 license

yolov8_model/ultralytics/engine/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (172 Bytes). View file

yolov8_model/ultralytics/engine/__pycache__/exporter.cpython-310.pyc ADDED Viewed

Binary file (38.5 kB). View file

yolov8_model/ultralytics/engine/__pycache__/model.cpython-310.pyc ADDED Viewed

Binary file (35.1 kB). View file

yolov8_model/ultralytics/engine/__pycache__/predictor.cpython-310.pyc ADDED Viewed

Binary file (14.7 kB). View file

yolov8_model/ultralytics/engine/__pycache__/results.cpython-310.pyc ADDED Viewed

Binary file (27.3 kB). View file

yolov8_model/ultralytics/engine/__pycache__/trainer.cpython-310.pyc ADDED Viewed

Binary file (26.3 kB). View file

yolov8_model/ultralytics/engine/__pycache__/validator.cpython-310.pyc ADDED Viewed

Binary file (13.2 kB). View file

yolov8_model/ultralytics/engine/exporter.py ADDED Viewed

	@@ -0,0 +1,1099 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+"""
+Export a YOLOv8 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit
+Format                  | `format=argument`         | Model
+---                     | ---                       | ---
+PyTorch                 | -                         | yolov8n.pt
+TorchScript             | `torchscript`             | yolov8n.torchscript
+ONNX                    | `onnx`                    | yolov8n.onnx
+OpenVINO                | `openvino`                | yolov8n_openvino_model/
+TensorRT                | `engine`                  | yolov8n.engine
+CoreML                  | `coreml`                  | yolov8n.mlpackage
+TensorFlow SavedModel   | `saved_model`             | yolov8n_saved_model/
+TensorFlow GraphDef     | `pb`                      | yolov8n.pb
+TensorFlow Lite         | `tflite`                  | yolov8n.tflite
+TensorFlow Edge TPU     | `edgetpu`                 | yolov8n_edgetpu.tflite
+TensorFlow.js           | `tfjs`                    | yolov8n_web_model/
+PaddlePaddle            | `paddle`                  | yolov8n_paddle_model/
+ncnn                    | `ncnn`                    | yolov8n_ncnn_model/
+Requirements:
+    $ pip install "ultralytics[export]"
+Python:
+    from ultralytics import YOLO
+    model = YOLO('yolov8n.pt')
+    results = model.export(format='onnx')
+CLI:
+    $ yolo mode=export model=yolov8n.pt format=onnx
+Inference:
+    $ yolo predict model=yolov8n.pt                 # PyTorch
+                         yolov8n.torchscript        # TorchScript
+                         yolov8n.onnx               # ONNX Runtime or OpenCV DNN with dnn=True
+                         yolov8n_openvino_model     # OpenVINO
+                         yolov8n.engine             # TensorRT
+                         yolov8n.mlpackage          # CoreML (macOS-only)
+                         yolov8n_saved_model        # TensorFlow SavedModel
+                         yolov8n.pb                 # TensorFlow GraphDef
+                         yolov8n.tflite             # TensorFlow Lite
+                         yolov8n_edgetpu.tflite     # TensorFlow Edge TPU
+                         yolov8n_paddle_model       # PaddlePaddle
+TensorFlow.js:
+    $ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
+    $ npm install
+    $ ln -s ../../yolov5/yolov8n_web_model public/yolov8n_web_model
+    $ npm start
+"""
+import json
+import os
+import shutil
+import subprocess
+import time
+import warnings
+from copy import deepcopy
+from datetime import datetime
+from pathlib import Path
+import numpy as np
+import torch
+from yolov8_model.ultralytics.cfg import get_cfg
+from yolov8_model.ultralytics.data.dataset import YOLODataset
+from yolov8_model.ultralytics.data.utils import check_det_dataset
+from yolov8_model.ultralytics.nn.autobackend import check_class_names, default_class_names
+from yolov8_model.ultralytics.nn.modules import C2f, Detect, RTDETRDecoder
+from yolov8_model.ultralytics.nn.tasks import DetectionModel, SegmentationModel
+from yolov8_model.ultralytics.utils import (
+    ARM64,
+    DEFAULT_CFG,
+    LINUX,
+    LOGGER,
+    MACOS,
+    ROOT,
+    WINDOWS,
+    __version__,
+    callbacks,
+    colorstr,
+    get_default_args,
+    yaml_save,
+)
+from yolov8_model.ultralytics.utils.checks import check_imgsz, check_is_path_safe, check_requirements, check_version
+from yolov8_model.ultralytics.utils.downloads import attempt_download_asset, get_github_assets
+from yolov8_model.ultralytics.utils.files import file_size, spaces_in_path
+from yolov8_model.ultralytics.utils.ops import Profile
+from yolov8_model.ultralytics.utils.torch_utils import get_latest_opset, select_device, smart_inference_mode
+def export_formats():
+    """YOLOv8 export formats."""
+    import pandas
+    x = [
+        ["PyTorch", "-", ".pt", True, True],
+        ["TorchScript", "torchscript", ".torchscript", True, True],
+        ["ONNX", "onnx", ".onnx", True, True],
+        ["OpenVINO", "openvino", "_openvino_model", True, False],
+        ["TensorRT", "engine", ".engine", False, True],
+        ["CoreML", "coreml", ".mlpackage", True, False],
+        ["TensorFlow SavedModel", "saved_model", "_saved_model", True, True],
+        ["TensorFlow GraphDef", "pb", ".pb", True, True],
+        ["TensorFlow Lite", "tflite", ".tflite", True, False],
+        ["TensorFlow Edge TPU", "edgetpu", "_edgetpu.tflite", True, False],
+        ["TensorFlow.js", "tfjs", "_web_model", True, False],
+        ["PaddlePaddle", "paddle", "_paddle_model", True, True],
+        ["ncnn", "ncnn", "_ncnn_model", True, True],
+    ]
+    return pandas.DataFrame(x, columns=["Format", "Argument", "Suffix", "CPU", "GPU"])
+def gd_outputs(gd):
+    """TensorFlow GraphDef model output node names."""
+    name_list, input_list = [], []
+    for node in gd.node:  # tensorflow.core.framework.node_def_pb2.NodeDef
+        name_list.append(node.name)
+        input_list.extend(node.input)
+    return sorted(f"{x}:0" for x in list(set(name_list) - set(input_list)) if not x.startswith("NoOp"))
+def try_export(inner_func):
+    """YOLOv8 export decorator, i..e @try_export."""
+    inner_args = get_default_args(inner_func)
+    def outer_func(*args, **kwargs):
+        """Export a model."""
+        prefix = inner_args["prefix"]
+        try:
+            with Profile() as dt:
+                f, model = inner_func(*args, **kwargs)
+            LOGGER.info(f"{prefix} export success ✅ {dt.t:.1f}s, saved as '{f}' ({file_size(f):.1f} MB)")
+            return f, model
+        except Exception as e:
+            LOGGER.info(f"{prefix} export failure ❌ {dt.t:.1f}s: {e}")
+            raise e
+    return outer_func
+class Exporter:
+    """
+    A class for exporting a model.
+    Attributes:
+        args (SimpleNamespace): Configuration for the exporter.
+        callbacks (list, optional): List of callback functions. Defaults to None.
+    """
+    def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+        """
+        Initializes the Exporter class.
+        Args:
+            cfg (str, optional): Path to a configuration file. Defaults to DEFAULT_CFG.
+            overrides (dict, optional): Configuration overrides. Defaults to None.
+            _callbacks (dict, optional): Dictionary of callback functions. Defaults to None.
+        """
+        self.args = get_cfg(cfg, overrides)
+        if self.args.format.lower() in ("coreml", "mlmodel"):  # fix attempt for protobuf<3.20.x errors
+            os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"  # must run before TensorBoard callback
+        self.callbacks = _callbacks or callbacks.get_default_callbacks()
+        callbacks.add_integration_callbacks(self)
+    @smart_inference_mode()
+    def __call__(self, model=None):
+        """Returns list of exported files/dirs after running callbacks."""
+        self.run_callbacks("on_export_start")
+        t = time.time()
+        fmt = self.args.format.lower()  # to lowercase
+        if fmt in ("tensorrt", "trt"):  # 'engine' aliases
+            fmt = "engine"
+        if fmt in ("mlmodel", "mlpackage", "mlprogram", "apple", "ios", "coreml"):  # 'coreml' aliases
+            fmt = "coreml"
+        fmts = tuple(export_formats()["Argument"][1:])  # available export formats
+        flags = [x == fmt for x in fmts]
+        if sum(flags) != 1:
+            raise ValueError(f"Invalid export format='{fmt}'. Valid formats are {fmts}")
+        jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, ncnn = flags  # export booleans
+        # Device
+        if fmt == "engine" and self.args.device is None:
+            LOGGER.warning("WARNING ⚠️ TensorRT requires GPU export, automatically assigning device=0")
+            self.args.device = "0"
+        self.device = select_device("cpu" if self.args.device is None else self.args.device)
+        # Checks
+        if not hasattr(model, "names"):
+            model.names = default_class_names()
+        model.names = check_class_names(model.names)
+        if self.args.half and onnx and self.device.type == "cpu":
+            LOGGER.warning("WARNING ⚠️ half=True only compatible with GPU export, i.e. use device=0")
+            self.args.half = False
+            assert not self.args.dynamic, "half=True not compatible with dynamic=True, i.e. use only one."
+        self.imgsz = check_imgsz(self.args.imgsz, stride=model.stride, min_dim=2)  # check image size
+        if self.args.optimize:
+            assert not ncnn, "optimize=True not compatible with format='ncnn', i.e. use optimize=False"
+            assert self.device.type == "cpu", "optimize=True not compatible with cuda devices, i.e. use device='cpu'"
+        if edgetpu and not LINUX:
+            raise SystemError("Edge TPU export only supported on Linux. See https://coral.ai/docs/edgetpu/compiler/")
+        # Input
+        im = torch.zeros(self.args.batch, 3, *self.imgsz).to(self.device)
+        file = Path(
+            getattr(model, "pt_path", None) or getattr(model, "yaml_file", None) or model.yaml.get("yaml_file", "")
+        )
+        if file.suffix in {".yaml", ".yml"}:
+            file = Path(file.name)
+        # Update model
+        model = deepcopy(model).to(self.device)
+        for p in model.parameters():
+            p.requires_grad = False
+        model.eval()
+        model.float()
+        model = model.fuse()
+        for m in model.modules():
+            if isinstance(m, (Detect, RTDETRDecoder)):  # Segment and Pose use Detect base class
+                m.dynamic = self.args.dynamic
+                m.export = True
+                m.format = self.args.format
+            elif isinstance(m, C2f) and not any((saved_model, pb, tflite, edgetpu, tfjs)):
+                # EdgeTPU does not support FlexSplitV while split provides cleaner ONNX graph
+                m.forward = m.forward_split
+        y = None
+        for _ in range(2):
+            y = model(im)  # dry runs
+        if self.args.half and onnx and self.device.type != "cpu":
+            im, model = im.half(), model.half()  # to FP16
+        # Filter warnings
+        warnings.filterwarnings("ignore", category=torch.jit.TracerWarning)  # suppress TracerWarning
+        warnings.filterwarnings("ignore", category=UserWarning)  # suppress shape prim::Constant missing ONNX warning
+        warnings.filterwarnings("ignore", category=DeprecationWarning)  # suppress CoreML np.bool deprecation warning
+        # Assign
+        self.im = im
+        self.model = model
+        self.file = file
+        self.output_shape = (
+            tuple(y.shape)
+            if isinstance(y, torch.Tensor)
+            else tuple(tuple(x.shape if isinstance(x, torch.Tensor) else []) for x in y)
+        )
+        self.pretty_name = Path(self.model.yaml.get("yaml_file", self.file)).stem.replace("yolo", "YOLO")
+        data = model.args["data"] if hasattr(model, "args") and isinstance(model.args, dict) else ""
+        description = f'Ultralytics {self.pretty_name} model {f"trained on {data}" if data else ""}'
+        self.metadata = {
+            "description": description,
+            "author": "Ultralytics",
+            "license": "AGPL-3.0 https://ultralytics.com/license",
+            "date": datetime.now().isoformat(),
+            "version": __version__,
+            "stride": int(max(model.stride)),
+            "task": model.task,
+            "batch": self.args.batch,
+            "imgsz": self.imgsz,
+            "names": model.names,
+        }  # model metadata
+        if model.task == "pose":
+            self.metadata["kpt_shape"] = model.model[-1].kpt_shape
+        LOGGER.info(
+            f"\n{colorstr('PyTorch:')} starting from '{file}' with input shape {tuple(im.shape)} BCHW and "
+            f'output shape(s) {self.output_shape} ({file_size(file):.1f} MB)'
+        )
+        # Exports
+        f = [""] * len(fmts)  # exported filenames
+        if jit or ncnn:  # TorchScript
+            f[0], _ = self.export_torchscript()
+        if engine:  # TensorRT required before ONNX
+            f[1], _ = self.export_engine()
+        if onnx or xml:  # OpenVINO requires ONNX
+            f[2], _ = self.export_onnx()
+        if xml:  # OpenVINO
+            f[3], _ = self.export_openvino()
+        if coreml:  # CoreML
+            f[4], _ = self.export_coreml()
+        if any((saved_model, pb, tflite, edgetpu, tfjs)):  # TensorFlow formats
+            self.args.int8 |= edgetpu
+            f[5], keras_model = self.export_saved_model()
+            if pb or tfjs:  # pb prerequisite to tfjs
+                f[6], _ = self.export_pb(keras_model=keras_model)
+            if tflite:
+                f[7], _ = self.export_tflite(keras_model=keras_model, nms=False, agnostic_nms=self.args.agnostic_nms)
+            if edgetpu:
+                f[8], _ = self.export_edgetpu(tflite_model=Path(f[5]) / f"{self.file.stem}_full_integer_quant.tflite")
+            if tfjs:
+                f[9], _ = self.export_tfjs()
+        if paddle:  # PaddlePaddle
+            f[10], _ = self.export_paddle()
+        if ncnn:  # ncnn
+            f[11], _ = self.export_ncnn()
+        # Finish
+        f = [str(x) for x in f if x]  # filter out '' and None
+        if any(f):
+            f = str(Path(f[-1]))
+            square = self.imgsz[0] == self.imgsz[1]
+            s = (
+                ""
+                if square
+                else f"WARNING ⚠️ non-PyTorch val requires square images, 'imgsz={self.imgsz}' will not "
+                f"work. Use export 'imgsz={max(self.imgsz)}' if val is required."
+            )
+            imgsz = self.imgsz[0] if square else str(self.imgsz)[1:-1].replace(" ", "")
+            predict_data = f"data={data}" if model.task == "segment" and fmt == "pb" else ""
+            q = "int8" if self.args.int8 else "half" if self.args.half else ""  # quantization
+            LOGGER.info(
+                f'\nExport complete ({time.time() - t:.1f}s)'
+                f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
+                f'\nPredict:         yolo predict task={model.task} model={f} imgsz={imgsz} {q} {predict_data}'
+                f'\nValidate:        yolo val task={model.task} model={f} imgsz={imgsz} data={data} {q} {s}'
+                f'\nVisualize:       https://netron.app'
+            )
+        self.run_callbacks("on_export_end")
+        return f  # return list of exported files/dirs
+    @try_export
+    def export_torchscript(self, prefix=colorstr("TorchScript:")):
+        """YOLOv8 TorchScript model export."""
+        LOGGER.info(f"\n{prefix} starting export with torch {torch.__version__}...")
+        f = self.file.with_suffix(".torchscript")
+        ts = torch.jit.trace(self.model, self.im, strict=False)
+        extra_files = {"config.txt": json.dumps(self.metadata)}  # torch._C.ExtraFilesMap()
+        if self.args.optimize:  # https://pytorch.org/tutorials/recipes/mobile_interpreter.html
+            LOGGER.info(f"{prefix} optimizing for mobile...")
+            from torch.utils.mobile_optimizer import optimize_for_mobile
+            optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files)
+        else:
+            ts.save(str(f), _extra_files=extra_files)
+        return f, None
+    @try_export
+    def export_onnx(self, prefix=colorstr("ONNX:")):
+        """YOLOv8 ONNX export."""
+        requirements = ["onnx>=1.12.0"]
+        if self.args.simplify:
+            requirements += ["onnxsim>=0.4.33", "onnxruntime-gpu" if torch.cuda.is_available() else "onnxruntime"]
+        check_requirements(requirements)
+        import onnx  # noqa
+        opset_version = self.args.opset or get_latest_opset()
+        LOGGER.info(f"\n{prefix} starting export with onnx {onnx.__version__} opset {opset_version}...")
+        f = str(self.file.with_suffix(".onnx"))
+        output_names = ["output0", "output1"] if isinstance(self.model, SegmentationModel) else ["output0"]
+        dynamic = self.args.dynamic
+        if dynamic:
+            dynamic = {"images": {0: "batch", 2: "height", 3: "width"}}  # shape(1,3,640,640)
+            if isinstance(self.model, SegmentationModel):
+                dynamic["output0"] = {0: "batch", 2: "anchors"}  # shape(1, 116, 8400)
+                dynamic["output1"] = {0: "batch", 2: "mask_height", 3: "mask_width"}  # shape(1,32,160,160)
+            elif isinstance(self.model, DetectionModel):
+                dynamic["output0"] = {0: "batch", 2: "anchors"}  # shape(1, 84, 8400)
+        torch.onnx.export(
+            self.model.cpu() if dynamic else self.model,  # dynamic=True only compatible with cpu
+            self.im.cpu() if dynamic else self.im,
+            f,
+            verbose=False,
+            opset_version=opset_version,
+            do_constant_folding=True,  # WARNING: DNN inference with torch>=1.12 may require do_constant_folding=False
+            input_names=["images"],
+            output_names=output_names,
+            dynamic_axes=dynamic or None,
+        )
+        # Checks
+        model_onnx = onnx.load(f)  # load onnx model
+        # onnx.checker.check_model(model_onnx)  # check onnx model
+        # Simplify
+        if self.args.simplify:
+            try:
+                import onnxsim
+                LOGGER.info(f"{prefix} simplifying with onnxsim {onnxsim.__version__}...")
+                # subprocess.run(f'onnxsim "{f}" "{f}"', shell=True)
+                model_onnx, check = onnxsim.simplify(model_onnx)
+                assert check, "Simplified ONNX model could not be validated"
+            except Exception as e:
+                LOGGER.info(f"{prefix} simplifier failure: {e}")
+        # Metadata
+        for k, v in self.metadata.items():
+            meta = model_onnx.metadata_props.add()
+            meta.key, meta.value = k, str(v)
+        onnx.save(model_onnx, f)
+        return f, model_onnx
+    @try_export
+    def export_openvino(self, prefix=colorstr("OpenVINO:")):
+        """YOLOv8 OpenVINO export."""
+        check_requirements("openvino-dev>=2023.0")  # requires openvino-dev: https://pypi.org/project/openvino-dev/
+        import openvino.runtime as ov  # noqa
+        from openvino.tools import mo  # noqa
+        LOGGER.info(f"\n{prefix} starting export with openvino {ov.__version__}...")
+        f = str(self.file).replace(self.file.suffix, f"_openvino_model{os.sep}")
+        fq = str(self.file).replace(self.file.suffix, f"_int8_openvino_model{os.sep}")
+        f_onnx = self.file.with_suffix(".onnx")
+        f_ov = str(Path(f) / self.file.with_suffix(".xml").name)
+        fq_ov = str(Path(fq) / self.file.with_suffix(".xml").name)
+        def serialize(ov_model, file):
+            """Set RT info, serialize and save metadata YAML."""
+            ov_model.set_rt_info("YOLOv8", ["model_info", "model_type"])
+            ov_model.set_rt_info(True, ["model_info", "reverse_input_channels"])
+            ov_model.set_rt_info(114, ["model_info", "pad_value"])
+            ov_model.set_rt_info([255.0], ["model_info", "scale_values"])
+            ov_model.set_rt_info(self.args.iou, ["model_info", "iou_threshold"])
+            ov_model.set_rt_info([v.replace(" ", "_") for v in self.model.names.values()], ["model_info", "labels"])
+            if self.model.task != "classify":
+                ov_model.set_rt_info("fit_to_window_letterbox", ["model_info", "resize_type"])
+            ov.serialize(ov_model, file)  # save
+            yaml_save(Path(file).parent / "metadata.yaml", self.metadata)  # add metadata.yaml
+        ov_model = mo.convert_model(
+            f_onnx, model_name=self.pretty_name, framework="onnx", compress_to_fp16=self.args.half
+        )  # export
+        if self.args.int8:
+            if not self.args.data:
+                self.args.data = DEFAULT_CFG.data or "coco128.yaml"
+                LOGGER.warning(
+                    f"{prefix} WARNING ⚠️ INT8 export requires a missing 'data' arg for calibration. "
+                    f"Using default 'data={self.args.data}'."
+                )
+            check_requirements("nncf>=2.5.0")
+            import nncf
+            def transform_fn(data_item):
+                """Quantization transform function."""
+                assert (
+                    data_item["img"].dtype == torch.uint8
+                ), "Input image must be uint8 for the quantization preprocessing"
+                im = data_item["img"].numpy().astype(np.float32) / 255.0  # uint8 to fp16/32 and 0 - 255 to 0.0 - 1.0
+                return np.expand_dims(im, 0) if im.ndim == 3 else im
+            # Generate calibration data for integer quantization
+            LOGGER.info(f"{prefix} collecting INT8 calibration images from 'data={self.args.data}'")
+            data = check_det_dataset(self.args.data)
+            dataset = YOLODataset(data["val"], data=data, imgsz=self.imgsz[0], augment=False)
+            n = len(dataset)
+            if n < 300:
+                LOGGER.warning(f"{prefix} WARNING ⚠️ >300 images recommended for INT8 calibration, found {n} images.")
+            quantization_dataset = nncf.Dataset(dataset, transform_fn)
+            ignored_scope = nncf.IgnoredScope(types=["Multiply", "Subtract", "Sigmoid"])  # ignore operation
+            quantized_ov_model = nncf.quantize(
+                ov_model, quantization_dataset, preset=nncf.QuantizationPreset.MIXED, ignored_scope=ignored_scope
+            )
+            serialize(quantized_ov_model, fq_ov)
+            return fq, None
+        serialize(ov_model, f_ov)
+        return f, None
+    @try_export
+    def export_paddle(self, prefix=colorstr("PaddlePaddle:")):
+        """YOLOv8 Paddle export."""
+        check_requirements(("paddlepaddle", "x2paddle"))
+        import x2paddle  # noqa
+        from x2paddle.convert import pytorch2paddle  # noqa
+        LOGGER.info(f"\n{prefix} starting export with X2Paddle {x2paddle.__version__}...")
+        f = str(self.file).replace(self.file.suffix, f"_paddle_model{os.sep}")
+        pytorch2paddle(module=self.model, save_dir=f, jit_type="trace", input_examples=[self.im])  # export
+        yaml_save(Path(f) / "metadata.yaml", self.metadata)  # add metadata.yaml
+        return f, None
+    @try_export
+    def export_ncnn(self, prefix=colorstr("ncnn:")):
+        """
+        YOLOv8 ncnn export using PNNX https://github.com/pnnx/pnnx.
+        """
+        check_requirements("git+https://github.com/Tencent/ncnn.git" if ARM64 else "ncnn")  # requires ncnn
+        import ncnn  # noqa
+        LOGGER.info(f"\n{prefix} starting export with ncnn {ncnn.__version__}...")
+        f = Path(str(self.file).replace(self.file.suffix, f"_ncnn_model{os.sep}"))
+        f_ts = self.file.with_suffix(".torchscript")
+        name = Path("pnnx.exe" if WINDOWS else "pnnx")  # PNNX filename
+        pnnx = name if name.is_file() else ROOT / name
+        if not pnnx.is_file():
+            LOGGER.warning(
+                f"{prefix} WARNING ⚠️ PNNX not found. Attempting to download binary file from "
+                "https://github.com/pnnx/pnnx/.\nNote PNNX Binary file must be placed in current working directory "
+                f"or in {ROOT}. See PNNX repo for full installation instructions."
+            )
+            system = ["macos"] if MACOS else ["windows"] if WINDOWS else ["ubuntu", "linux"]  # operating system
+            try:
+                _, assets = get_github_assets(repo="pnnx/pnnx", retry=True)
+                url = [x for x in assets if any(s in x for s in system)][0]
+            except Exception as e:
+                url = f"https://github.com/pnnx/pnnx/releases/download/20231127/pnnx-20231127-{system[0]}.zip"
+                LOGGER.warning(f"{prefix} WARNING ⚠️ PNNX GitHub assets not found: {e}, using default {url}")
+            asset = attempt_download_asset(url, repo="pnnx/pnnx", release="latest")
+            if check_is_path_safe(Path.cwd(), asset):  # avoid path traversal security vulnerability
+                unzip_dir = Path(asset).with_suffix("")
+                (unzip_dir / name).rename(pnnx)  # move binary to ROOT
+                shutil.rmtree(unzip_dir)  # delete unzip dir
+                Path(asset).unlink()  # delete zip
+                pnnx.chmod(0o777)  # set read, write, and execute permissions for everyone
+        ncnn_args = [
+            f'ncnnparam={f / "model.ncnn.param"}',
+            f'ncnnbin={f / "model.ncnn.bin"}',
+            f'ncnnpy={f / "model_ncnn.py"}',
+        ]
+        pnnx_args = [
+            f'pnnxparam={f / "model.pnnx.param"}',
+            f'pnnxbin={f / "model.pnnx.bin"}',
+            f'pnnxpy={f / "model_pnnx.py"}',
+            f'pnnxonnx={f / "model.pnnx.onnx"}',
+        ]
+        cmd = [
+            str(pnnx),
+            str(f_ts),
+            *ncnn_args,
+            *pnnx_args,
+            f"fp16={int(self.args.half)}",
+            f"device={self.device.type}",
+            f'inputshape="{[self.args.batch, 3, *self.imgsz]}"',
+        ]
+        f.mkdir(exist_ok=True)  # make ncnn_model directory
+        LOGGER.info(f"{prefix} running '{' '.join(cmd)}'")
+        subprocess.run(cmd, check=True)
+        # Remove debug files
+        pnnx_files = [x.split("=")[-1] for x in pnnx_args]
+        for f_debug in ("debug.bin", "debug.param", "debug2.bin", "debug2.param", *pnnx_files):
+            Path(f_debug).unlink(missing_ok=True)
+        yaml_save(f / "metadata.yaml", self.metadata)  # add metadata.yaml
+        return str(f), None
+    @try_export
+    def export_coreml(self, prefix=colorstr("CoreML:")):
+        """YOLOv8 CoreML export."""
+        mlmodel = self.args.format.lower() == "mlmodel"  # legacy *.mlmodel export format requested
+        check_requirements("coremltools>=6.0,<=6.2" if mlmodel else "coremltools>=7.0")
+        import coremltools as ct  # noqa
+        LOGGER.info(f"\n{prefix} starting export with coremltools {ct.__version__}...")
+        assert not WINDOWS, "CoreML export is not supported on Windows, please run on macOS or Linux."
+        f = self.file.with_suffix(".mlmodel" if mlmodel else ".mlpackage")
+        if f.is_dir():
+            shutil.rmtree(f)
+        bias = [0.0, 0.0, 0.0]
+        scale = 1 / 255
+        classifier_config = None
+        if self.model.task == "classify":
+            classifier_config = ct.ClassifierConfig(list(self.model.names.values())) if self.args.nms else None
+            model = self.model
+        elif self.model.task == "detect":
+            model = IOSDetectModel(self.model, self.im) if self.args.nms else self.model
+        else:
+            if self.args.nms:
+                LOGGER.warning(f"{prefix} WARNING ⚠️ 'nms=True' is only available for Detect models like 'yolov8n.pt'.")
+                # TODO CoreML Segment and Pose model pipelining
+            model = self.model
+        ts = torch.jit.trace(model.eval(), self.im, strict=False)  # TorchScript model
+        ct_model = ct.convert(
+            ts,
+            inputs=[ct.ImageType("image", shape=self.im.shape, scale=scale, bias=bias)],
+            classifier_config=classifier_config,
+            convert_to="neuralnetwork" if mlmodel else "mlprogram",
+        )
+        bits, mode = (8, "kmeans") if self.args.int8 else (16, "linear") if self.args.half else (32, None)
+        if bits < 32:
+            if "kmeans" in mode:
+                check_requirements("scikit-learn")  # scikit-learn package required for k-means quantization
+            if mlmodel:
+                ct_model = ct.models.neural_network.quantization_utils.quantize_weights(ct_model, bits, mode)
+            elif bits == 8:  # mlprogram already quantized to FP16
+                import coremltools.optimize.coreml as cto
+                op_config = cto.OpPalettizerConfig(mode="kmeans", nbits=bits, weight_threshold=512)
+                config = cto.OptimizationConfig(global_config=op_config)
+                ct_model = cto.palettize_weights(ct_model, config=config)
+        if self.args.nms and self.model.task == "detect":
+            if mlmodel:
+                import platform
+                # coremltools<=6.2 NMS export requires Python<3.11
+                check_version(platform.python_version(), "<3.11", name="Python ", hard=True)
+                weights_dir = None
+            else:
+                ct_model.save(str(f))  # save otherwise weights_dir does not exist
+                weights_dir = str(f / "Data/com.apple.CoreML/weights")
+            ct_model = self._pipeline_coreml(ct_model, weights_dir=weights_dir)
+        m = self.metadata  # metadata dict
+        ct_model.short_description = m.pop("description")
+        ct_model.author = m.pop("author")
+        ct_model.license = m.pop("license")
+        ct_model.version = m.pop("version")
+        ct_model.user_defined_metadata.update({k: str(v) for k, v in m.items()})
+        try:
+            ct_model.save(str(f))  # save *.mlpackage
+        except Exception as e:
+            LOGGER.warning(
+                f"{prefix} WARNING ⚠️ CoreML export to *.mlpackage failed ({e}), reverting to *.mlmodel export. "
+                f"Known coremltools Python 3.11 and Windows bugs https://github.com/apple/coremltools/issues/1928."
+            )
+            f = f.with_suffix(".mlmodel")
+            ct_model.save(str(f))
+        return f, ct_model
+    @try_export
+    def export_engine(self, prefix=colorstr("TensorRT:")):
+        """YOLOv8 TensorRT export https://developer.nvidia.com/tensorrt."""
+        assert self.im.device.type != "cpu", "export running on CPU but must be on GPU, i.e. use 'device=0'"
+        f_onnx, _ = self.export_onnx()  # run before trt import https://github.com/ultralytics/ultralytics/issues/7016
+        try:
+            import tensorrt as trt  # noqa
+        except ImportError:
+            if LINUX:
+                check_requirements("nvidia-tensorrt", cmds="-U --index-url https://pypi.ngc.nvidia.com")
+            import tensorrt as trt  # noqa
+        check_version(trt.__version__, "7.0.0", hard=True)  # require tensorrt>=7.0.0
+        self.args.simplify = True
+        LOGGER.info(f"\n{prefix} starting export with TensorRT {trt.__version__}...")
+        assert Path(f_onnx).exists(), f"failed to export ONNX file: {f_onnx}"
+        f = self.file.with_suffix(".engine")  # TensorRT engine file
+        logger = trt.Logger(trt.Logger.INFO)
+        if self.args.verbose:
+            logger.min_severity = trt.Logger.Severity.VERBOSE
+        builder = trt.Builder(logger)
+        config = builder.create_builder_config()
+        config.max_workspace_size = self.args.workspace * 1 << 30
+        # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30)  # fix TRT 8.4 deprecation notice
+        flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+        network = builder.create_network(flag)
+        parser = trt.OnnxParser(network, logger)
+        if not parser.parse_from_file(f_onnx):
+            raise RuntimeError(f"failed to load ONNX file: {f_onnx}")
+        inputs = [network.get_input(i) for i in range(network.num_inputs)]
+        outputs = [network.get_output(i) for i in range(network.num_outputs)]
+        for inp in inputs:
+            LOGGER.info(f'{prefix} input "{inp.name}" with shape{inp.shape} {inp.dtype}')
+        for out in outputs:
+            LOGGER.info(f'{prefix} output "{out.name}" with shape{out.shape} {out.dtype}')
+        if self.args.dynamic:
+            shape = self.im.shape
+            if shape[0] <= 1:
+                LOGGER.warning(f"{prefix} WARNING ⚠️ 'dynamic=True' model requires max batch size, i.e. 'batch=16'")
+            profile = builder.create_optimization_profile()
+            for inp in inputs:
+                profile.set_shape(inp.name, (1, *shape[1:]), (max(1, shape[0] // 2), *shape[1:]), shape)
+            config.add_optimization_profile(profile)
+        LOGGER.info(
+            f"{prefix} building FP{16 if builder.platform_has_fast_fp16 and self.args.half else 32} engine as {f}"
+        )
+        if builder.platform_has_fast_fp16 and self.args.half:
+            config.set_flag(trt.BuilderFlag.FP16)
+        del self.model
+        torch.cuda.empty_cache()
+        # Write file
+        with builder.build_engine(network, config) as engine, open(f, "wb") as t:
+            # Metadata
+            meta = json.dumps(self.metadata)
+            t.write(len(meta).to_bytes(4, byteorder="little", signed=True))
+            t.write(meta.encode())
+            # Model
+            t.write(engine.serialize())
+        return f, None
+    @try_export
+    def export_saved_model(self, prefix=colorstr("TensorFlow SavedModel:")):
+        """YOLOv8 TensorFlow SavedModel export."""
+        cuda = torch.cuda.is_available()
+        try:
+            import tensorflow as tf  # noqa
+        except ImportError:
+            check_requirements(f"tensorflow{'-macos' if MACOS else '-aarch64' if ARM64 else '' if cuda else '-cpu'}")
+            import tensorflow as tf  # noqa
+        check_requirements(
+            (
+                "onnx",
+                "onnx2tf>=1.15.4,<=1.17.5",
+                "sng4onnx>=1.0.1",
+                "onnxsim>=0.4.33",
+                "onnx_graphsurgeon>=0.3.26",
+                "tflite_support",
+                "onnxruntime-gpu" if cuda else "onnxruntime",
+            ),
+            cmds="--extra-index-url https://pypi.ngc.nvidia.com",
+        )  # onnx_graphsurgeon only on NVIDIA
+        LOGGER.info(f"\n{prefix} starting export with tensorflow {tf.__version__}...")
+        check_version(
+            tf.__version__,
+            "<=2.13.1",
+            name="tensorflow",
+            verbose=True,
+            msg="https://github.com/ultralytics/ultralytics/issues/5161",
+        )
+        f = Path(str(self.file).replace(self.file.suffix, "_saved_model"))
+        if f.is_dir():
+            import shutil
+            shutil.rmtree(f)  # delete output folder
+        # Pre-download calibration file to fix https://github.com/PINTO0309/onnx2tf/issues/545
+        onnx2tf_file = Path("calibration_image_sample_data_20x128x128x3_float32.npy")
+        if not onnx2tf_file.exists():
+            attempt_download_asset(f"{onnx2tf_file}.zip", unzip=True, delete=True)
+        # Export to ONNX
+        self.args.simplify = True
+        f_onnx, _ = self.export_onnx()
+        # Export to TF
+        tmp_file = f / "tmp_tflite_int8_calibration_images.npy"  # int8 calibration images file
+        if self.args.int8:
+            verbosity = "--verbosity info"
+            if self.args.data:
+                # Generate calibration data for integer quantization
+                LOGGER.info(f"{prefix} collecting INT8 calibration images from 'data={self.args.data}'")
+                data = check_det_dataset(self.args.data)
+                dataset = YOLODataset(data["val"], data=data, imgsz=self.imgsz[0], augment=False)
+                images = []
+                for i, batch in enumerate(dataset):
+                    if i >= 100:  # maximum number of calibration images
+                        break
+                    im = batch["img"].permute(1, 2, 0)[None]  # list to nparray, CHW to BHWC
+                    images.append(im)
+                f.mkdir()
+                images = torch.cat(images, 0).float()
+                # mean = images.view(-1, 3).mean(0)  # imagenet mean [123.675, 116.28, 103.53]
+                # std = images.view(-1, 3).std(0)  # imagenet std [58.395, 57.12, 57.375]
+                np.save(str(tmp_file), images.numpy())  # BHWC
+                int8 = f'-oiqt -qt per-tensor -cind images "{tmp_file}" "[[[[0, 0, 0]]]]" "[[[[255, 255, 255]]]]"'
+            else:
+                int8 = "-oiqt -qt per-tensor"
+        else:
+            verbosity = "--non_verbose"
+            int8 = ""
+        cmd = f'onnx2tf -i "{f_onnx}" -o "{f}" -nuo {verbosity} {int8}'.strip()
+        LOGGER.info(f"{prefix} running '{cmd}'")
+        subprocess.run(cmd, shell=True)
+        yaml_save(f / "metadata.yaml", self.metadata)  # add metadata.yaml
+        # Remove/rename TFLite models
+        if self.args.int8:
+            tmp_file.unlink(missing_ok=True)
+            for file in f.rglob("*_dynamic_range_quant.tflite"):
+                file.rename(file.with_name(file.stem.replace("_dynamic_range_quant", "_int8") + file.suffix))
+            for file in f.rglob("*_integer_quant_with_int16_act.tflite"):
+                file.unlink()  # delete extra fp16 activation TFLite files
+        # Add TFLite metadata
+        for file in f.rglob("*.tflite"):
+            f.unlink() if "quant_with_int16_act.tflite" in str(f) else self._add_tflite_metadata(file)
+        return str(f), tf.saved_model.load(f, tags=None, options=None)  # load saved_model as Keras model
+    @try_export
+    def export_pb(self, keras_model, prefix=colorstr("TensorFlow GraphDef:")):
+        """YOLOv8 TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow."""
+        import tensorflow as tf  # noqa
+        from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2  # noqa
+        LOGGER.info(f"\n{prefix} starting export with tensorflow {tf.__version__}...")
+        f = self.file.with_suffix(".pb")
+        m = tf.function(lambda x: keras_model(x))  # full model
+        m = m.get_concrete_function(tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype))
+        frozen_func = convert_variables_to_constants_v2(m)
+        frozen_func.graph.as_graph_def()
+        tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(f.parent), name=f.name, as_text=False)
+        return f, None
+    @try_export
+    def export_tflite(self, keras_model, nms, agnostic_nms, prefix=colorstr("TensorFlow Lite:")):
+        """YOLOv8 TensorFlow Lite export."""
+        import tensorflow as tf  # noqa
+        LOGGER.info(f"\n{prefix} starting export with tensorflow {tf.__version__}...")
+        saved_model = Path(str(self.file).replace(self.file.suffix, "_saved_model"))
+        if self.args.int8:
+            f = saved_model / f"{self.file.stem}_int8.tflite"  # fp32 in/out
+        elif self.args.half:
+            f = saved_model / f"{self.file.stem}_float16.tflite"  # fp32 in/out
+        else:
+            f = saved_model / f"{self.file.stem}_float32.tflite"
+        return str(f), None
+    @try_export
+    def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")):
+        """YOLOv8 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/."""
+        LOGGER.warning(f"{prefix} WARNING ⚠️ Edge TPU known bug https://github.com/ultralytics/ultralytics/issues/1185")
+        cmd = "edgetpu_compiler --version"
+        help_url = "https://coral.ai/docs/edgetpu/compiler/"
+        assert LINUX, f"export only supported on Linux. See {help_url}"
+        if subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True).returncode != 0:
+            LOGGER.info(f"\n{prefix} export requires Edge TPU compiler. Attempting install from {help_url}")
+            sudo = subprocess.run("sudo --version >/dev/null", shell=True).returncode == 0  # sudo installed on system
+            for c in (
+                "curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -",
+                'echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | '
+                "sudo tee /etc/apt/sources.list.d/coral-edgetpu.list",
+                "sudo apt-get update",
+                "sudo apt-get install edgetpu-compiler",
+            ):
+                subprocess.run(c if sudo else c.replace("sudo ", ""), shell=True, check=True)
+        ver = subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1]
+        LOGGER.info(f"\n{prefix} starting export with Edge TPU compiler {ver}...")
+        f = str(tflite_model).replace(".tflite", "_edgetpu.tflite")  # Edge TPU model
+        cmd = f'edgetpu_compiler -s -d -k 10 --out_dir "{Path(f).parent}" "{tflite_model}"'
+        LOGGER.info(f"{prefix} running '{cmd}'")
+        subprocess.run(cmd, shell=True)
+        self._add_tflite_metadata(f)
+        return f, None
+    @try_export
+    def export_tfjs(self, prefix=colorstr("TensorFlow.js:")):
+        """YOLOv8 TensorFlow.js export."""
+        # JAX bug requiring install constraints in https://github.com/google/jax/issues/18978
+        check_requirements(["jax<=0.4.21", "jaxlib<=0.4.21", "tensorflowjs"])
+        import tensorflow as tf
+        import tensorflowjs as tfjs  # noqa
+        LOGGER.info(f"\n{prefix} starting export with tensorflowjs {tfjs.__version__}...")
+        f = str(self.file).replace(self.file.suffix, "_web_model")  # js dir
+        f_pb = str(self.file.with_suffix(".pb"))  # *.pb path
+        gd = tf.Graph().as_graph_def()  # TF GraphDef
+        with open(f_pb, "rb") as file:
+            gd.ParseFromString(file.read())
+        outputs = ",".join(gd_outputs(gd))
+        LOGGER.info(f"\n{prefix} output node names: {outputs}")
+        quantization = "--quantize_float16" if self.args.half else "--quantize_uint8" if self.args.int8 else ""
+        with spaces_in_path(f_pb) as fpb_, spaces_in_path(f) as f_:  # exporter can not handle spaces in path
+            cmd = f'tensorflowjs_converter --input_format=tf_frozen_model {quantization} --output_node_names={outputs} "{fpb_}" "{f_}"'
+            LOGGER.info(f"{prefix} running '{cmd}'")
+            subprocess.run(cmd, shell=True)
+        if " " in f:
+            LOGGER.warning(f"{prefix} WARNING ⚠️ your model may not work correctly with spaces in path '{f}'.")
+        # f_json = Path(f) / 'model.json'  # *.json path
+        # with open(f_json, 'w') as j:  # sort JSON Identity_* in ascending order
+        #     subst = re.sub(
+        #         r'{"outputs": {"Identity.?.?": {"name": "Identity.?.?"}, '
+        #         r'"Identity.?.?": {"name": "Identity.?.?"}, '
+        #         r'"Identity.?.?": {"name": "Identity.?.?"}, '
+        #         r'"Identity.?.?": {"name": "Identity.?.?"}}}',
+        #         r'{"outputs": {"Identity": {"name": "Identity"}, '
+        #         r'"Identity_1": {"name": "Identity_1"}, '
+        #         r'"Identity_2": {"name": "Identity_2"}, '
+        #         r'"Identity_3": {"name": "Identity_3"}}}',
+        #         f_json.read_text(),
+        #     )
+        #     j.write(subst)
+        yaml_save(Path(f) / "metadata.yaml", self.metadata)  # add metadata.yaml
+        return f, None
+    def _add_tflite_metadata(self, file):
+        """Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata."""
+        from tflite_support import flatbuffers  # noqa
+        from tflite_support import metadata as _metadata  # noqa
+        from tflite_support import metadata_schema_py_generated as _metadata_fb  # noqa
+        # Create model info
+        model_meta = _metadata_fb.ModelMetadataT()
+        model_meta.name = self.metadata["description"]
+        model_meta.version = self.metadata["version"]
+        model_meta.author = self.metadata["author"]
+        model_meta.license = self.metadata["license"]
+        # Label file
+        tmp_file = Path(file).parent / "temp_meta.txt"
+        with open(tmp_file, "w") as f:
+            f.write(str(self.metadata))
+        label_file = _metadata_fb.AssociatedFileT()
+        label_file.name = tmp_file.name
+        label_file.type = _metadata_fb.AssociatedFileType.TENSOR_AXIS_LABELS
+        # Create input info
+        input_meta = _metadata_fb.TensorMetadataT()
+        input_meta.name = "image"
+        input_meta.description = "Input image to be detected."
+        input_meta.content = _metadata_fb.ContentT()
+        input_meta.content.contentProperties = _metadata_fb.ImagePropertiesT()
+        input_meta.content.contentProperties.colorSpace = _metadata_fb.ColorSpaceType.RGB
+        input_meta.content.contentPropertiesType = _metadata_fb.ContentProperties.ImageProperties
+        # Create output info
+        output1 = _metadata_fb.TensorMetadataT()
+        output1.name = "output"
+        output1.description = "Coordinates of detected objects, class labels, and confidence score"
+        output1.associatedFiles = [label_file]
+        if self.model.task == "segment":
+            output2 = _metadata_fb.TensorMetadataT()
+            output2.name = "output"
+            output2.description = "Mask protos"
+            output2.associatedFiles = [label_file]
+        # Create subgraph info
+        subgraph = _metadata_fb.SubGraphMetadataT()
+        subgraph.inputTensorMetadata = [input_meta]
+        subgraph.outputTensorMetadata = [output1, output2] if self.model.task == "segment" else [output1]
+        model_meta.subgraphMetadata = [subgraph]
+        b = flatbuffers.Builder(0)
+        b.Finish(model_meta.Pack(b), _metadata.MetadataPopulator.METADATA_FILE_IDENTIFIER)
+        metadata_buf = b.Output()
+        populator = _metadata.MetadataPopulator.with_model_file(str(file))
+        populator.load_metadata_buffer(metadata_buf)
+        populator.load_associated_files([str(tmp_file)])
+        populator.populate()
+        tmp_file.unlink()
+    def _pipeline_coreml(self, model, weights_dir=None, prefix=colorstr("CoreML Pipeline:")):
+        """YOLOv8 CoreML pipeline."""
+        import coremltools as ct  # noqa
+        LOGGER.info(f"{prefix} starting pipeline with coremltools {ct.__version__}...")
+        _, _, h, w = list(self.im.shape)  # BCHW
+        # Output shapes
+        spec = model.get_spec()
+        out0, out1 = iter(spec.description.output)
+        if MACOS:
+            from PIL import Image
+            img = Image.new("RGB", (w, h))  # w=192, h=320
+            out = model.predict({"image": img})
+            out0_shape = out[out0.name].shape  # (3780, 80)
+            out1_shape = out[out1.name].shape  # (3780, 4)
+        else:  # linux and windows can not run model.predict(), get sizes from PyTorch model output y
+            out0_shape = self.output_shape[2], self.output_shape[1] - 4  # (3780, 80)
+            out1_shape = self.output_shape[2], 4  # (3780, 4)
+        # Checks
+        names = self.metadata["names"]
+        nx, ny = spec.description.input[0].type.imageType.width, spec.description.input[0].type.imageType.height
+        _, nc = out0_shape  # number of anchors, number of classes
+        # _, nc = out0.type.multiArrayType.shape
+        assert len(names) == nc, f"{len(names)} names found for nc={nc}"  # check
+        # Define output shapes (missing)
+        out0.type.multiArrayType.shape[:] = out0_shape  # (3780, 80)
+        out1.type.multiArrayType.shape[:] = out1_shape  # (3780, 4)
+        # spec.neuralNetwork.preprocessing[0].featureName = '0'
+        # Flexible input shapes
+        # from coremltools.models.neural_network import flexible_shape_utils
+        # s = [] # shapes
+        # s.append(flexible_shape_utils.NeuralNetworkImageSize(320, 192))
+        # s.append(flexible_shape_utils.NeuralNetworkImageSize(640, 384))  # (height, width)
+        # flexible_shape_utils.add_enumerated_image_sizes(spec, feature_name='image', sizes=s)
+        # r = flexible_shape_utils.NeuralNetworkImageSizeRange()  # shape ranges
+        # r.add_height_range((192, 640))
+        # r.add_width_range((192, 640))
+        # flexible_shape_utils.update_image_size_range(spec, feature_name='image', size_range=r)
+        # Print
+        # print(spec.description)
+        # Model from spec
+        model = ct.models.MLModel(spec, weights_dir=weights_dir)
+        # 3. Create NMS protobuf
+        nms_spec = ct.proto.Model_pb2.Model()
+        nms_spec.specificationVersion = 5
+        for i in range(2):
+            decoder_output = model._spec.description.output[i].SerializeToString()
+            nms_spec.description.input.add()
+            nms_spec.description.input[i].ParseFromString(decoder_output)
+            nms_spec.description.output.add()
+            nms_spec.description.output[i].ParseFromString(decoder_output)
+        nms_spec.description.output[0].name = "confidence"
+        nms_spec.description.output[1].name = "coordinates"
+        output_sizes = [nc, 4]
+        for i in range(2):
+            ma_type = nms_spec.description.output[i].type.multiArrayType
+            ma_type.shapeRange.sizeRanges.add()
+            ma_type.shapeRange.sizeRanges[0].lowerBound = 0
+            ma_type.shapeRange.sizeRanges[0].upperBound = -1
+            ma_type.shapeRange.sizeRanges.add()
+            ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i]
+            ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i]
+            del ma_type.shape[:]
+        nms = nms_spec.nonMaximumSuppression
+        nms.confidenceInputFeatureName = out0.name  # 1x507x80
+        nms.coordinatesInputFeatureName = out1.name  # 1x507x4
+        nms.confidenceOutputFeatureName = "confidence"
+        nms.coordinatesOutputFeatureName = "coordinates"
+        nms.iouThresholdInputFeatureName = "iouThreshold"
+        nms.confidenceThresholdInputFeatureName = "confidenceThreshold"
+        nms.iouThreshold = 0.45
+        nms.confidenceThreshold = 0.25
+        nms.pickTop.perClass = True
+        nms.stringClassLabels.vector.extend(names.values())
+        nms_model = ct.models.MLModel(nms_spec)
+        # 4. Pipeline models together
+        pipeline = ct.models.pipeline.Pipeline(
+            input_features=[
+                ("image", ct.models.datatypes.Array(3, ny, nx)),
+                ("iouThreshold", ct.models.datatypes.Double()),
+                ("confidenceThreshold", ct.models.datatypes.Double()),
+            ],
+            output_features=["confidence", "coordinates"],
+        )
+        pipeline.add_model(model)
+        pipeline.add_model(nms_model)
+        # Correct datatypes
+        pipeline.spec.description.input[0].ParseFromString(model._spec.description.input[0].SerializeToString())
+        pipeline.spec.description.output[0].ParseFromString(nms_model._spec.description.output[0].SerializeToString())
+        pipeline.spec.description.output[1].ParseFromString(nms_model._spec.description.output[1].SerializeToString())
+        # Update metadata
+        pipeline.spec.specificationVersion = 5
+        pipeline.spec.description.metadata.userDefined.update(
+            {"IoU threshold": str(nms.iouThreshold), "Confidence threshold": str(nms.confidenceThreshold)}
+        )
+        # Save the model
+        model = ct.models.MLModel(pipeline.spec, weights_dir=weights_dir)
+        model.input_description["image"] = "Input image"
+        model.input_description["iouThreshold"] = f"(optional) IOU threshold override (default: {nms.iouThreshold})"
+        model.input_description[
+            "confidenceThreshold"
+        ] = f"(optional) Confidence threshold override (default: {nms.confidenceThreshold})"
+        model.output_description["confidence"] = 'Boxes × Class confidence (see user-defined metadata "classes")'
+        model.output_description["coordinates"] = "Boxes × [x, y, width, height] (relative to image size)"
+        LOGGER.info(f"{prefix} pipeline success")
+        return model
+    def add_callback(self, event: str, callback):
+        """Appends the given callback."""
+        self.callbacks[event].append(callback)
+    def run_callbacks(self, event: str):
+        """Execute all callbacks for a given event."""
+        for callback in self.callbacks.get(event, []):
+            callback(self)
+class IOSDetectModel(torch.nn.Module):
+    """Wrap an Ultralytics YOLO model for Apple iOS CoreML export."""
+    def __init__(self, model, im):
+        """Initialize the IOSDetectModel class with a YOLO model and example image."""
+        super().__init__()
+        _, _, h, w = im.shape  # batch, channel, height, width
+        self.model = model
+        self.nc = len(model.names)  # number of classes
+        if w == h:
+            self.normalize = 1.0 / w  # scalar
+        else:
+            self.normalize = torch.tensor([1.0 / w, 1.0 / h, 1.0 / w, 1.0 / h])  # broadcast (slower, smaller)
+    def forward(self, x):
+        """Normalize predictions of object detection model with input size-dependent factors."""
+        xywh, cls = self.model(x)[0].transpose(0, 1).split((4, self.nc), 1)
+        return cls, xywh * self.normalize  # confidence (3780, 80), coordinates (3780, 4)

yolov8_model/ultralytics/engine/model.py ADDED Viewed

	@@ -0,0 +1,772 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+import torch
+import inspect
+import sys
+from pathlib import Path
+from typing import Union
+from yolov8_model.ultralytics.cfg import TASK2DATA, get_cfg, get_save_dir
+from yolov8_model.ultralytics.hub.utils import HUB_WEB_ROOT
+from yolov8_model.ultralytics.nn.tasks import attempt_load_one_weight, guess_model_task, nn, yaml_model_load
+from yolov8_model.ultralytics.utils import ASSETS, DEFAULT_CFG_DICT, LOGGER, RANK, SETTINGS, callbacks, checks, emojis, yaml_load
+class Model(nn.Module):
+    """
+    A base class for implementing YOLO models, unifying APIs across different model types.
+    This class provides a common interface for various operations related to YOLO models, such as training,
+    validation, prediction, exporting, and benchmarking. It handles different types of models, including those
+    loaded from local files, Ultralytics HUB, or Triton Server. The class is designed to be flexible and
+    extendable for different tasks and model configurations.
+    Args:
+        model (Union[str, Path], optional): Path or name of the model to load or create. This can be a local file
+            path, a model name from Ultralytics HUB, or a Triton Server model. Defaults to 'yolov8n.pt'.
+        task (Any, optional): The task type associated with the YOLO model. This can be used to specify the model's
+            application domain, such as object detection, segmentation, etc. Defaults to None.
+        verbose (bool, optional): If True, enables verbose output during the model's operations. Defaults to False.
+    Attributes:
+        callbacks (dict): A dictionary of callback functions for various events during model operations.
+        predictor (BasePredictor): The predictor object used for making predictions.
+        model (nn.Module): The underlying PyTorch model.
+        trainer (BaseTrainer): The trainer object used for training the model.
+        ckpt (dict): The checkpoint data if the model is loaded from a *.pt file.
+        cfg (str): The configuration of the model if loaded from a *.yaml file.
+        ckpt_path (str): The path to the checkpoint file.
+        overrides (dict): A dictionary of overrides for model configuration.
+        metrics (dict): The latest training/validation metrics.
+        session (HUBTrainingSession): The Ultralytics HUB session, if applicable.
+        task (str): The type of task the model is intended for.
+        model_name (str): The name of the model.
+    Methods:
+        __call__: Alias for the predict method, enabling the model instance to be callable.
+        _new: Initializes a new model based on a configuration file.
+        _load: Loads a model from a checkpoint file.
+        _check_is_pytorch_model: Ensures that the model is a PyTorch model.
+        reset_weights: Resets the model's weights to their initial state.
+        load: Loads model weights from a specified file.
+        save: Saves the current state of the model to a file.
+        info: Logs or returns information about the model.
+        fuse: Fuses Conv2d and BatchNorm2d layers for optimized inference.
+        predict: Performs object detection predictions.
+        track: Performs object tracking.
+        val: Validates the model on a dataset.
+        benchmark: Benchmarks the model on various export formats.
+        export: Exports the model to different formats.
+        train: Trains the model on a dataset.
+        tune: Performs hyperparameter tuning.
+        _apply: Applies a function to the model's tensors.
+        add_callback: Adds a callback function for an event.
+        clear_callback: Clears all callbacks for an event.
+        reset_callbacks: Resets all callbacks to their default functions.
+        _get_hub_session: Retrieves or creates an Ultralytics HUB session.
+        is_triton_model: Checks if a model is a Triton Server model.
+        is_hub_model: Checks if a model is an Ultralytics HUB model.
+        _reset_ckpt_args: Resets checkpoint arguments when loading a PyTorch model.
+        _smart_load: Loads the appropriate module based on the model task.
+        task_map: Provides a mapping from model tasks to corresponding classes.
+    Raises:
+        FileNotFoundError: If the specified model file does not exist or is inaccessible.
+        ValueError: If the model file or configuration is invalid or unsupported.
+        ImportError: If required dependencies for specific model types (like HUB SDK) are not installed.
+        TypeError: If the model is not a PyTorch model when required.
+        AttributeError: If required attributes or methods are not implemented or available.
+        NotImplementedError: If a specific model task or mode is not supported.
+    """
+    def __init__(self, model: Union[str, Path] = "yolov8n.pt", task=None, verbose=False) -> None:
+        """
+        Initializes a new instance of the YOLO model class.
+        This constructor sets up the model based on the provided model path or name. It handles various types of model
+        sources, including local files, Ultralytics HUB models, and Triton Server models. The method initializes several
+        important attributes of the model and prepares it for operations like training, prediction, or export.
+        Args:
+            model (Union[str, Path], optional): The path or model file to load or create. This can be a local
+                file path, a model name from Ultralytics HUB, or a Triton Server model. Defaults to 'yolov8n.pt'.
+            task (Any, optional): The task type associated with the YOLO model, specifying its application domain.
+                Defaults to None.
+            verbose (bool, optional): If True, enables verbose output during the model's initialization and subsequent
+                operations. Defaults to False.
+        Raises:
+            FileNotFoundError: If the specified model file does not exist or is inaccessible.
+            ValueError: If the model file or configuration is invalid or unsupported.
+            ImportError: If required dependencies for specific model types (like HUB SDK) are not installed.
+        """
+        super().__init__()
+        self.callbacks = callbacks.get_default_callbacks()
+        self.predictor = None  # reuse predictor
+        self.model = None  # model object
+        self.trainer = None  # trainer object
+        self.ckpt = None  # if loaded from *.pt
+        self.cfg = None  # if loaded from *.yaml
+        self.ckpt_path = None
+        self.overrides = {}  # overrides for trainer object
+        self.metrics = None  # validation/training metrics
+        self.session = None  # HUB session
+        self.task = task  # task type
+        self.model_name = model = str(model).strip()  # strip spaces
+        # Check if Ultralytics HUB model from https://hub.ultralytics.com
+        if self.is_hub_model(model):
+            # Fetch model from HUB
+            checks.check_requirements("hub-sdk>0.0.2")
+            self.session = self._get_hub_session(model)
+            model = self.session.model_file
+        # Check if Triton Server model
+        elif self.is_triton_model(model):
+            self.model = model
+            self.task = task
+            return
+        # Load or create new YOLO model
+        model = checks.check_model_file_from_stem(model)  # add suffix, i.e. yolov8n -> yolov8n.pt
+        if Path(model).suffix in (".yaml", ".yml"):
+            self._new(model, task=task)
+        else:
+            self._load(model, task=task)
+        self.model_name = model
+    def __call__(self, source=None, stream=False, **kwargs):
+        """
+        An alias for the predict method, enabling the model instance to be callable.
+        This method simplifies the process of making predictions by allowing the model instance to be called directly
+        with the required arguments for prediction.
+        Args:
+            source (str | int | PIL.Image | np.ndarray, optional): The source of the image for making predictions.
+                Accepts various types, including file paths, URLs, PIL images, and numpy arrays. Defaults to None.
+            stream (bool, optional): If True, treats the input source as a continuous stream for predictions.
+                Defaults to False.
+            **kwargs (dict): Additional keyword arguments for configuring the prediction process.
+        Returns:
+            (List[ultralytics.engine.results.Results]): A list of prediction results, encapsulated in the Results class.
+        """
+        return self.predict(source, stream, **kwargs)
+    @staticmethod
+    def _get_hub_session(model: str):
+        """Creates a session for Hub Training."""
+        from ultralytics.hub.session import HUBTrainingSession
+        session = HUBTrainingSession(model)
+        return session if session.client.authenticated else None
+    @staticmethod
+    def is_triton_model(model):
+        """Is model a Triton Server URL string, i.e. <scheme>://<netloc>/<endpoint>/<task_name>"""
+        from urllib.parse import urlsplit
+        url = urlsplit(model)
+        return url.netloc and url.path and url.scheme in {"http", "grpc"}
+    @staticmethod
+    def is_hub_model(model):
+        """Check if the provided model is a HUB model."""
+        return any(
+            (
+                model.startswith(f"{HUB_WEB_ROOT}/models/"),  # i.e. https://hub.ultralytics.com/models/MODEL_ID
+                [len(x) for x in model.split("_")] == [42, 20],  # APIKEY_MODELID
+                len(model) == 20 and not Path(model).exists() and all(x not in model for x in "./\\"),  # MODELID
+            )
+        )
+    def _new(self, cfg: str, task=None, model=None, verbose=True):
+        """
+        Initializes a new model and infers the task type from the model definitions.
+        Args:
+            cfg (str): model configuration file
+            task (str | None): model task
+            model (BaseModel): Customized model.
+            verbose (bool): display model info on load
+        """
+        cfg_dict = yaml_model_load(cfg)
+        self.cfg = cfg
+        self.task = task or guess_model_task(cfg_dict)
+        self.model = (model or self._smart_load("model"))(cfg_dict, verbose=verbose and RANK == -1)  # build model
+        self.overrides["model"] = self.cfg
+        self.overrides["task"] = self.task
+        # Below added to allow export from YAMLs
+        self.model.args = {**DEFAULT_CFG_DICT, **self.overrides}  # combine default and model args (prefer model args)
+        self.model.task = self.task
+    def _load(self, weights: str, task=None):
+        """
+        Initializes a new model and infers the task type from the model head.
+        Args:
+            weights (str): model checkpoint to be loaded
+            task (str | None): model task
+        """
+        suffix = Path(weights).suffix
+        if suffix == ".pt":
+            self.model, self.ckpt = attempt_load_one_weight(weights)
+            self.task = self.model.args["task"]
+            self.overrides = self.model.args = self._reset_ckpt_args(self.model.args)
+            self.ckpt_path = self.model.pt_path
+        else:
+            weights = checks.check_file(weights)
+            self.model, self.ckpt = weights, None
+            self.task = task or guess_model_task(weights)
+            self.ckpt_path = weights
+        self.overrides["model"] = weights
+        self.overrides["task"] = self.task
+    def _check_is_pytorch_model(self):
+        """Raises TypeError is model is not a PyTorch model."""
+        pt_str = isinstance(self.model, (str, Path)) and Path(self.model).suffix == ".pt"
+        pt_module = isinstance(self.model, nn.Module)
+        if not (pt_module or pt_str):
+            raise TypeError(
+                f"model='{self.model}' should be a *.pt PyTorch model to run this method, but is a different format. "
+                f"PyTorch models can train, val, predict and export, i.e. 'model.train(data=...)', but exported "
+                f"formats like ONNX, TensorRT etc. only support 'predict' and 'val' modes, "
+                f"i.e. 'yolo predict model=yolov8n.onnx'.\nTo run CUDA or MPS inference please pass the device "
+                f"argument directly in your inference command, i.e. 'model.predict(source=..., device=0)'"
+            )
+    def reset_weights(self):
+        """
+        Resets the model parameters to randomly initialized values, effectively discarding all training information.
+        This method iterates through all modules in the model and resets their parameters if they have a
+        'reset_parameters' method. It also ensures that all parameters have 'requires_grad' set to True, enabling them
+        to be updated during training.
+        Returns:
+            self (ultralytics.engine.model.Model): The instance of the class with reset weights.
+        Raises:
+            AssertionError: If the model is not a PyTorch model.
+        """
+        self._check_is_pytorch_model()
+        for m in self.model.modules():
+            if hasattr(m, "reset_parameters"):
+                m.reset_parameters()
+        for p in self.model.parameters():
+            p.requires_grad = True
+        return self
+    def load(self, weights="yolov8n.pt"):
+        """
+        Loads parameters from the specified weights file into the model.
+        This method supports loading weights from a file or directly from a weights object. It matches parameters by
+        name and shape and transfers them to the model.
+        Args:
+            weights (str | Path): Path to the weights file or a weights object. Defaults to 'yolov8n.pt'.
+        Returns:
+            self (ultralytics.engine.model.Model): The instance of the class with loaded weights.
+        Raises:
+            AssertionError: If the model is not a PyTorch model.
+        """
+        self._check_is_pytorch_model()
+        if isinstance(weights, (str, Path)):
+            weights, self.ckpt = attempt_load_one_weight(weights)
+        self.model.load(weights)
+        return self
+    def save(self, filename="model.pt"):
+        """
+        Saves the current model state to a file.
+        This method exports the model's checkpoint (ckpt) to the specified filename.
+        Args:
+            filename (str): The name of the file to save the model to. Defaults to 'model.pt'.
+        Raises:
+            AssertionError: If the model is not a PyTorch model.
+        """
+        self._check_is_pytorch_model()
+        import torch
+        torch.save(self.ckpt, filename)
+    def info(self, detailed=False, verbose=True):
+        """
+        Logs or returns model information.
+        This method provides an overview or detailed information about the model, depending on the arguments passed.
+        It can control the verbosity of the output.
+        Args:
+            detailed (bool): If True, shows detailed information about the model. Defaults to False.
+            verbose (bool): If True, prints the information. If False, returns the information. Defaults to True.
+        Returns:
+            (list): Various types of information about the model, depending on the 'detailed' and 'verbose' parameters.
+        Raises:
+            AssertionError: If the model is not a PyTorch model.
+        """
+        self._check_is_pytorch_model()
+        return self.model.info(detailed=detailed, verbose=verbose)
+    def fuse(self):
+        """
+        Fuses Conv2d and BatchNorm2d layers in the model.
+        This method optimizes the model by fusing Conv2d and BatchNorm2d layers, which can improve inference speed.
+        Raises:
+            AssertionError: If the model is not a PyTorch model.
+        """
+        self._check_is_pytorch_model()
+        self.model.fuse()
+    def embed(self, source=None, stream=False, **kwargs):
+        """
+        Generates image embeddings based on the provided source.
+        This method is a wrapper around the 'predict()' method, focusing on generating embeddings from an image source.
+        It allows customization of the embedding process through various keyword arguments.
+        Args:
+            source (str | int | PIL.Image | np.ndarray): The source of the image for generating embeddings.
+                The source can be a file path, URL, PIL image, numpy array, etc. Defaults to None.
+            stream (bool): If True, predictions are streamed. Defaults to False.
+            **kwargs (dict): Additional keyword arguments for configuring the embedding process.
+        Returns:
+            (List[torch.Tensor]): A list containing the image embeddings.
+        Raises:
+            AssertionError: If the model is not a PyTorch model.
+        """
+        if not kwargs.get("embed"):
+            kwargs["embed"] = [len(self.model.model) - 2]  # embed second-to-last layer if no indices passed
+        return self.predict(source, stream, **kwargs)
+    def predict(self, source=None, stream=False, predictor=None, **kwargs):
+        """
+        Performs predictions on the given image source using the YOLO model.
+        This method facilitates the prediction process, allowing various configurations through keyword arguments.
+        It supports predictions with custom predictors or the default predictor method. The method handles different
+        types of image sources and can operate in a streaming mode. It also provides support for SAM-type models
+        through 'prompts'.
+        The method sets up a new predictor if not already present and updates its arguments with each call.
+        It also issues a warning and uses default assets if the 'source' is not provided. The method determines if it
+        is being called from the command line interface and adjusts its behavior accordingly, including setting defaults
+        for confidence threshold and saving behavior.
+        Args:
+            source (str | int | PIL.Image | np.ndarray, optional): The source of the image for making predictions.
+                Accepts various types, including file paths, URLs, PIL images, and numpy arrays. Defaults to ASSETS.
+            stream (bool, optional): Treats the input source as a continuous stream for predictions. Defaults to False.
+            predictor (BasePredictor, optional): An instance of a custom predictor class for making predictions.
+                If None, the method uses a default predictor. Defaults to None.
+            **kwargs (dict): Additional keyword arguments for configuring the prediction process. These arguments allow
+                for further customization of the prediction behavior.
+        Returns:
+            (List[ultralytics.engine.results.Results]): A list of prediction results, encapsulated in the Results class.
+        Raises:
+            AttributeError: If the predictor is not properly set up.
+        """
+        if source is None:
+            source = ASSETS
+            LOGGER.warning(f"WARNING ⚠️ 'source' is missing. Using 'source={source}'.")
+        is_cli = (sys.argv[0].endswith("yolo") or sys.argv[0].endswith("ultralytics")) and any(
+            x in sys.argv for x in ("predict", "track", "mode=predict", "mode=track")
+        )
+        custom = {"conf": 0.25, "save": is_cli, "mode": "predict"}  # method defaults
+        args = {**self.overrides, **custom, **kwargs}  # highest priority args on the right
+        prompts = args.pop("prompts", None)  # for SAM-type models
+        if not self.predictor:
+            self.predictor = predictor or self._smart_load("predictor")(overrides=args, _callbacks=self.callbacks)
+            self.predictor.setup_model(model=self.model, verbose=is_cli)
+        else:  # only update args if predictor is already setup
+            self.predictor.args = get_cfg(self.predictor.args, args)
+            if "project" in args or "name" in args:
+                self.predictor.save_dir = get_save_dir(self.predictor.args)
+        if prompts and hasattr(self.predictor, "set_prompts"):  # for SAM-type models
+            self.predictor.set_prompts(prompts)
+        return self.predictor.predict_cli(source=source) if is_cli else self.predictor(source=source, stream=stream)
+    def track(self, source=None, stream=False, persist=False, **kwargs):
+        """
+        Conducts object tracking on the specified input source using the registered trackers.
+        This method performs object tracking using the model's predictors and optionally registered trackers. It is
+        capable of handling different types of input sources such as file paths or video streams. The method supports
+        customization of the tracking process through various keyword arguments. It registers trackers if they are not
+        already present and optionally persists them based on the 'persist' flag.
+        The method sets a default confidence threshold specifically for ByteTrack-based tracking, which requires low
+        confidence predictions as input. The tracking mode is explicitly set in the keyword arguments.
+        Args:
+            source (str, optional): The input source for object tracking. It can be a file path, URL, or video stream.
+            stream (bool, optional): Treats the input source as a continuous video stream. Defaults to False.
+            persist (bool, optional): Persists the trackers between different calls to this method. Defaults to False.
+            **kwargs (dict): Additional keyword arguments for configuring the tracking process. These arguments allow
+                for further customization of the tracking behavior.
+        Returns:
+            (List[ultralytics.engine.results.Results]): A list of tracking results, encapsulated in the Results class.
+        Raises:
+            AttributeError: If the predictor does not have registered trackers.
+        """
+        if not hasattr(self.predictor, "trackers"):
+            from ultralytics.trackers import register_tracker
+            register_tracker(self, persist)
+        kwargs["conf"] = kwargs.get("conf") or 0.1  # ByteTrack-based method needs low confidence predictions as input
+        kwargs["mode"] = "track"
+        return self.predict(source=source, stream=stream, **kwargs)
+    def val(self, validator=None, **kwargs):
+        """
+        Validates the model using a specified dataset and validation configuration.
+        This method facilitates the model validation process, allowing for a range of customization through various
+        settings and configurations. It supports validation with a custom validator or the default validation approach.
+        The method combines default configurations, method-specific defaults, and user-provided arguments to configure
+        the validation process. After validation, it updates the model's metrics with the results obtained from the
+        validator.
+        The method supports various arguments that allow customization of the validation process. For a comprehensive
+        list of all configurable options, users should refer to the 'configuration' section in the documentation.
+        Args:
+            validator (BaseValidator, optional): An instance of a custom validator class for validating the model. If
+                None, the method uses a default validator. Defaults to None.
+            **kwargs (dict): Arbitrary keyword arguments representing the validation configuration. These arguments are
+                used to customize various aspects of the validation process.
+        Returns:
+            (dict): Validation metrics obtained from the validation process.
+        Raises:
+            AssertionError: If the model is not a PyTorch model.
+        """
+        custom = {"rect": True}  # method defaults
+        args = {**self.overrides, **custom, **kwargs, "mode": "val"}  # highest priority args on the right
+        validator = (validator or self._smart_load("validator"))(args=args, _callbacks=self.callbacks)
+        validator(model=self.model)
+        self.metrics = validator.metrics
+        return validator.metrics
+    def benchmark(self, **kwargs):
+        """
+        Benchmarks the model across various export formats to evaluate performance.
+        This method assesses the model's performance in different export formats, such as ONNX, TorchScript, etc.
+        It uses the 'benchmark' function from the ultralytics.utils.benchmarks module. The benchmarking is configured
+        using a combination of default configuration values, model-specific arguments, method-specific defaults, and
+        any additional user-provided keyword arguments.
+        The method supports various arguments that allow customization of the benchmarking process, such as dataset
+        choice, image size, precision modes, device selection, and verbosity. For a comprehensive list of all
+        configurable options, users should refer to the 'configuration' section in the documentation.
+        Args:
+            **kwargs (dict): Arbitrary keyword arguments to customize the benchmarking process. These are combined with
+                default configurations, model-specific arguments, and method defaults.
+        Returns:
+            (dict): A dictionary containing the results of the benchmarking process.
+        Raises:
+            AssertionError: If the model is not a PyTorch model.
+        """
+        self._check_is_pytorch_model()
+        from ultralytics.utils.benchmarks import benchmark
+        custom = {"verbose": False}  # method defaults
+        args = {**DEFAULT_CFG_DICT, **self.model.args, **custom, **kwargs, "mode": "benchmark"}
+        return benchmark(
+            model=self,
+            data=kwargs.get("data"),  # if no 'data' argument passed set data=None for default datasets
+            imgsz=args["imgsz"],
+            half=args["half"],
+            int8=args["int8"],
+            device=args["device"],
+            verbose=kwargs.get("verbose"),
+        )
+    def export(self, **kwargs):
+        """
+        Exports the model to a different format suitable for deployment.
+        This method facilitates the export of the model to various formats (e.g., ONNX, TorchScript) for deployment
+        purposes. It uses the 'Exporter' class for the export process, combining model-specific overrides, method
+        defaults, and any additional arguments provided. The combined arguments are used to configure export settings.
+        The method supports a wide range of arguments to customize the export process. For a comprehensive list of all
+        possible arguments, refer to the 'configuration' section in the documentation.
+        Args:
+            **kwargs (dict): Arbitrary keyword arguments to customize the export process. These are combined with the
+                model's overrides and method defaults.
+        Returns:
+            (object): The exported model in the specified format, or an object related to the export process.
+        Raises:
+            AssertionError: If the model is not a PyTorch model.
+        """
+        self._check_is_pytorch_model()
+        from .exporter import Exporter
+        custom = {"imgsz": self.model.args["imgsz"], "batch": 1, "data": None, "verbose": False}  # method defaults
+        args = {**self.overrides, **custom, **kwargs, "mode": "export"}  # highest priority args on the right
+        return Exporter(overrides=args, _callbacks=self.callbacks)(model=self.model)
+    def train(self, trainer=None, **kwargs):
+        """
+        Trains the model using the specified dataset and training configuration.
+        This method facilitates model training with a range of customizable settings and configurations. It supports
+        training with a custom trainer or the default training approach defined in the method. The method handles
+        different scenarios, such as resuming training from a checkpoint, integrating with Ultralytics HUB, and
+        updating model and configuration after training.
+        When using Ultralytics HUB, if the session already has a loaded model, the method prioritizes HUB training
+        arguments and issues a warning if local arguments are provided. It checks for pip updates and combines default
+        configurations, method-specific defaults, and user-provided arguments to configure the training process. After
+        training, it updates the model and its configurations, and optionally attaches metrics.
+        Args:
+            trainer (BaseTrainer, optional): An instance of a custom trainer class for training the model. If None, the
+                method uses a default trainer. Defaults to None.
+            **kwargs (dict): Arbitrary keyword arguments representing the training configuration. These arguments are
+                used to customize various aspects of the training process.
+        Returns:
+            (dict | None): Training metrics if available and training is successful; otherwise, None.
+        Raises:
+            AssertionError: If the model is not a PyTorch model.
+            PermissionError: If there is a permission issue with the HUB session.
+            ModuleNotFoundError: If the HUB SDK is not installed.
+        """
+        self._check_is_pytorch_model()
+        if hasattr(self.session, "model") and self.session.model.id:  # Ultralytics HUB session with loaded model
+            if any(kwargs):
+                LOGGER.warning("WARNING ⚠️ using HUB training arguments, ignoring local training arguments.")
+            kwargs = self.session.train_args  # overwrite kwargs
+        checks.check_pip_update_available()
+        overrides = yaml_load(checks.check_yaml(kwargs["cfg"])) if kwargs.get("cfg") else self.overrides
+        custom = {"data": DEFAULT_CFG_DICT["data"] or TASK2DATA[self.task]}  # method defaults
+        args = {**overrides, **custom, **kwargs, "mode": "train"}  # highest priority args on the right
+        # if args.get("resume"):
+        #     args["resume"] = self.ckpt_path
+        self.trainer = (trainer or self._smart_load("trainer"))(overrides=args, _callbacks=self.callbacks)
+        if not args.get("resume"):  # manually set model only if not resuming
+            self.trainer.model = self.trainer.get_model(weights=self.model if self.ckpt else None, cfg=self.model.yaml)
+            self.model = self.trainer.model
+            if SETTINGS["hub"] is True and not self.session:
+                # Create a model in HUB
+                try:
+                    self.session = self._get_hub_session(self.model_name)
+                    if self.session:
+                        self.session.create_model(args)
+                        # Check model was created
+                        if not getattr(self.session.model, "id", None):
+                            self.session = None
+                except (PermissionError, ModuleNotFoundError):
+                    # Ignore PermissionError and ModuleNotFoundError which indicates hub-sdk not installed
+                    pass
+        self.trainer.hub_session = self.session  # attach optional HUB session
+        self.trainer.train()
+        # Update model and cfg after training
+        if RANK in (-1, 0):
+            ckpt = self.trainer.best if self.trainer.best.exists() else self.trainer.last
+            self.model, _ = attempt_load_one_weight(ckpt)
+            self.overrides = self.model.args
+            self.metrics = getattr(self.trainer.validator, "metrics", None)  # TODO: no metrics returned by DDP
+        return self.metrics
+    def tune(self, use_ray=False, iterations=10, *args, **kwargs):
+        """
+        Conducts hyperparameter tuning for the model, with an option to use Ray Tune.
+        This method supports two modes of hyperparameter tuning: using Ray Tune or a custom tuning method.
+        When Ray Tune is enabled, it leverages the 'run_ray_tune' function from the ultralytics.utils.tuner module.
+        Otherwise, it uses the internal 'Tuner' class for tuning. The method combines default, overridden, and
+        custom arguments to configure the tuning process.
+        Args:
+            use_ray (bool): If True, uses Ray Tune for hyperparameter tuning. Defaults to False.
+            iterations (int): The number of tuning iterations to perform. Defaults to 10.
+            *args (list): Variable length argument list for additional arguments.
+            **kwargs (dict): Arbitrary keyword arguments. These are combined with the model's overrides and defaults.
+        Returns:
+            (dict): A dictionary containing the results of the hyperparameter search.
+        Raises:
+            AssertionError: If the model is not a PyTorch model.
+        """
+        self._check_is_pytorch_model()
+        if use_ray:
+            from ultralytics.utils.tuner import run_ray_tune
+            return run_ray_tune(self, max_samples=iterations, *args, **kwargs)
+        else:
+            from .tuner import Tuner
+            custom = {}  # method defaults
+            args = {**self.overrides, **custom, **kwargs, "mode": "train"}  # highest priority args on the right
+            return Tuner(args=args, _callbacks=self.callbacks)(model=self, iterations=iterations)
+    def _apply(self, fn):
+        """Apply to(), cpu(), cuda(), half(), float() to model tensors that are not parameters or registered buffers."""
+        self._check_is_pytorch_model()
+        self = super()._apply(fn)  # noqa
+        self.predictor = None  # reset predictor as device may have changed
+        self.overrides["device"] = self.device  # was str(self.device) i.e. device(type='cuda', index=0) -> 'cuda:0'
+        return self
+    @property
+    def names(self):
+        """
+        Retrieves the class names associated with the loaded model.
+        This property returns the class names if they are defined in the model. It checks the class names for validity
+        using the 'check_class_names' function from the ultralytics.nn.autobackend module.
+        Returns:
+            (list | None): The class names of the model if available, otherwise None.
+        """
+        from ultralytics.nn.autobackend import check_class_names
+        return check_class_names(self.model.names) if hasattr(self.model, "names") else None
+    @property
+    def device(self):
+        """
+        Retrieves the device on which the model's parameters are allocated.
+        This property is used to determine whether the model's parameters are on CPU or GPU. It only applies to models
+        that are instances of nn.Module.
+        Returns:
+            (torch.device | None): The device (CPU/GPU) of the model if it is a PyTorch model, otherwise None.
+        """
+        return next(self.model.parameters()).device if isinstance(self.model, nn.Module) else None
+    @property
+    def transforms(self):
+        """
+        Retrieves the transformations applied to the input data of the loaded model.
+        This property returns the transformations if they are defined in the model.
+        Returns:
+            (object | None): The transform object of the model if available, otherwise None.
+        """
+        return self.model.transforms if hasattr(self.model, "transforms") else None
+    def add_callback(self, event: str, func):
+        """
+        Adds a callback function for a specified event.
+        This method allows the user to register a custom callback function that is triggered on a specific event during
+        model training or inference.
+        Args:
+            event (str): The name of the event to attach the callback to.
+            func (callable): The callback function to be registered.
+        Raises:
+            ValueError: If the event name is not recognized.
+        """
+        self.callbacks[event].append(func)
+    def clear_callback(self, event: str):
+        """
+        Clears all callback functions registered for a specified event.
+        This method removes all custom and default callback functions associated with the given event.
+        Args:
+            event (str): The name of the event for which to clear the callbacks.
+        Raises:
+            ValueError: If the event name is not recognized.
+        """
+        self.callbacks[event] = []
+    def reset_callbacks(self):
+        """
+        Resets all callbacks to their default functions.
+        This method reinstates the default callback functions for all events, removing any custom callbacks that were
+        added previously.
+        """
+        for event in callbacks.default_callbacks.keys():
+            self.callbacks[event] = [callbacks.default_callbacks[event][0]]
+    @staticmethod
+    def _reset_ckpt_args(args):
+        """Reset arguments when loading a PyTorch model."""
+        include = {"imgsz", "data", "task", "single_cls"}  # only remember these arguments when loading a PyTorch model
+        return {k: v for k, v in args.items() if k in include}
+    # def __getattr__(self, attr):
+    #    """Raises error if object has no requested attribute."""
+    #    name = self.__class__.__name__
+    #    raise AttributeError(f"'{name}' object has no attribute '{attr}'. See valid attributes below.\n{self.__doc__}")
+    def _smart_load(self, key):
+        """Load model/trainer/validator/predictor."""
+        try:
+            return self.task_map[self.task][key]
+        except Exception as e:
+            name = self.__class__.__name__
+            mode = inspect.stack()[1][3]  # get the function name.
+            raise NotImplementedError(
+                emojis(f"WARNING ⚠️ '{name}' model does not support '{mode}' mode for '{self.task}' task yet.")
+            ) from e
+    @property
+    def task_map(self):
+        """
+        Map head to model, trainer, validator, and predictor classes.
+        Returns:
+            task_map (dict): The map of model task to mode classes.
+        """
+        raise NotImplementedError("Please provide task map for your model!")
+    def profile(self, imgsz):
+        if type(imgsz) is int:
+            inputs = torch.randn((2, 3, imgsz, imgsz))
+        else:
+            inputs = torch.randn((2, 3, imgsz[0], imgsz[1]))
+        if next(self.model.parameters()).device.type == 'cuda':
+            return self.model.predict(inputs.to(torch.device('cuda')), profile=True)
+        else:
+            self.model.predict(inputs, profile=True)

yolov8_model/ultralytics/engine/predictor.py ADDED Viewed

	@@ -0,0 +1,407 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+"""
+Run prediction on images, videos, directories, globs, YouTube, webcam, streams, etc.
+Usage - sources:
+    $ yolo mode=predict model=yolov8n.pt source=0                               # webcam
+                                                img.jpg                         # image
+                                                vid.mp4                         # video
+                                                screen                          # screenshot
+                                                path/                           # directory
+                                                list.txt                        # list of images
+                                                list.streams                    # list of streams
+                                                'path/*.jpg'                    # glob
+                                                'https://youtu.be/LNwODJXcvt4'  # YouTube
+                                                'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP, TCP stream
+Usage - formats:
+    $ yolo mode=predict model=yolov8n.pt                 # PyTorch
+                              yolov8n.torchscript        # TorchScript
+                              yolov8n.onnx               # ONNX Runtime or OpenCV DNN with dnn=True
+                              yolov8n_openvino_model     # OpenVINO
+                              yolov8n.engine             # TensorRT
+                              yolov8n.mlpackage          # CoreML (macOS-only)
+                              yolov8n_saved_model        # TensorFlow SavedModel
+                              yolov8n.pb                 # TensorFlow GraphDef
+                              yolov8n.tflite             # TensorFlow Lite
+                              yolov8n_edgetpu.tflite     # TensorFlow Edge TPU
+                              yolov8n_paddle_model       # PaddlePaddle
+"""
+import platform
+import threading
+from pathlib import Path
+import cv2
+import numpy as np
+import torch
+from PIL import Image
+from yolov8_model.ultralytics.cfg import get_cfg, get_save_dir
+from yolov8_model.ultralytics.data import load_inference_source
+from yolov8_model.ultralytics.data.augment import LetterBox, classify_transforms
+from yolov8_model.ultralytics.nn.autobackend import AutoBackend
+from yolov8_model.ultralytics.utils import DEFAULT_CFG, LOGGER, MACOS, WINDOWS, callbacks, colorstr, ops
+from yolov8_model.ultralytics.utils.checks import check_imgsz, check_imshow
+from yolov8_model.ultralytics.utils.files import increment_path
+from yolov8_model.ultralytics.utils.torch_utils import select_device, smart_inference_mode
+STREAM_WARNING = """
+WARNING ⚠️ inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
+errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.
+Example:
+    results = model(source=..., stream=True)  # generator of Results objects
+    for r in results:
+        boxes = r.boxes  # Boxes object for bbox outputs
+        masks = r.masks  # Masks object for segment masks outputs
+        probs = r.probs  # Class probabilities for classification outputs
+"""
+class BasePredictor:
+    """
+    BasePredictor.
+    A base class for creating predictors.
+    Attributes:
+        args (SimpleNamespace): Configuration for the predictor.
+        save_dir (Path): Directory to save results.
+        done_warmup (bool): Whether the predictor has finished setup.
+        model (nn.Module): Model used for prediction.
+        data (dict): Data configuration.
+        device (torch.device): Device used for prediction.
+        dataset (Dataset): Dataset used for prediction.
+        vid_path (str): Path to video file.
+        vid_writer (cv2.VideoWriter): Video writer for saving video output.
+        data_path (str): Path to data.
+    """
+    def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+        """
+        Initializes the BasePredictor class.
+        Args:
+            cfg (str, optional): Path to a configuration file. Defaults to DEFAULT_CFG.
+            overrides (dict, optional): Configuration overrides. Defaults to None.
+        """
+        self.args = get_cfg(cfg, overrides)
+        self.save_dir = get_save_dir(self.args)
+        if self.args.conf is None:
+            self.args.conf = 0.25  # default conf=0.25
+        self.done_warmup = False
+        if self.args.show:
+            self.args.show = check_imshow(warn=True)
+        # Usable if setup is done
+        self.model = None
+        self.data = self.args.data  # data_dict
+        self.imgsz = None
+        self.device = None
+        self.dataset = None
+        self.vid_path, self.vid_writer, self.vid_frame = None, None, None
+        self.plotted_img = None
+        self.data_path = None
+        self.source_type = None
+        self.batch = None
+        self.results = None
+        self.transforms = None
+        self.callbacks = _callbacks or callbacks.get_default_callbacks()
+        self.txt_path = None
+        self._lock = threading.Lock()  # for automatic thread-safe inference
+        callbacks.add_integration_callbacks(self)
+    def preprocess(self, im):
+        """
+        Prepares input image before inference.
+        Args:
+            im (torch.Tensor | List(np.ndarray)): BCHW for tensor, [(HWC) x B] for list.
+        """
+        not_tensor = not isinstance(im, torch.Tensor)
+        if not_tensor:
+            im = np.stack(self.pre_transform(im))
+            im = im[..., ::-1].transpose((0, 3, 1, 2))  # BGR to RGB, BHWC to BCHW, (n, 3, h, w)
+            im = np.ascontiguousarray(im)  # contiguous
+            im = torch.from_numpy(im)
+        im = im.to(self.device)
+        im = im.half() if self.model.fp16 else im.float()  # uint8 to fp16/32
+        if not_tensor:
+            im /= 255  # 0 - 255 to 0.0 - 1.0
+        return im
+    def inference(self, im, *args, **kwargs):
+        """Runs inference on a given image using the specified model and arguments."""
+        visualize = (
+            increment_path(self.save_dir / Path(self.batch[0][0]).stem, mkdir=True)
+            if self.args.visualize and (not self.source_type.tensor)
+            else False
+        )
+        return self.model(im, augment=self.args.augment, visualize=visualize, embed=self.args.embed, *args, **kwargs)
+    def pre_transform(self, im):
+        """
+        Pre-transform input image before inference.
+        Args:
+            im (List(np.ndarray)): (N, 3, h, w) for tensor, [(h, w, 3) x N] for list.
+        Returns:
+            (list): A list of transformed images.
+        """
+        same_shapes = all(x.shape == im[0].shape for x in im)
+        letterbox = LetterBox(self.imgsz, auto=same_shapes and self.model.pt, stride=self.model.stride)
+        return [letterbox(image=x) for x in im]
+    def write_results(self, idx, results, batch):
+        """Write inference results to a file or directory."""
+        p, im, _ = batch
+        log_string = ""
+        if len(im.shape) == 3:
+            im = im[None]  # expand for batch dim
+        if self.source_type.webcam or self.source_type.from_img or self.source_type.tensor:  # batch_size >= 1
+            log_string += f"{idx}: "
+            frame = self.dataset.count
+        else:
+            frame = getattr(self.dataset, "frame", 0)
+        self.data_path = p
+        self.txt_path = str(self.save_dir / "labels" / p.stem) + ("" if self.dataset.mode == "image" else f"_{frame}")
+        log_string += "%gx%g " % im.shape[2:]  # print string
+        result = results[idx]
+        log_string += result.verbose()
+        if self.args.save or self.args.show:  # Add bbox to image
+            plot_args = {
+                "line_width": self.args.line_width,
+                "boxes": self.args.show_boxes,
+                "conf": self.args.show_conf,
+                "labels": self.args.show_labels,
+            }
+            if not self.args.retina_masks:
+                plot_args["im_gpu"] = im[idx]
+            self.plotted_img = result.plot(**plot_args)
+        # Write
+        if self.args.save_txt:
+            result.save_txt(f"{self.txt_path}.txt", save_conf=self.args.save_conf)
+        if self.args.save_crop:
+            result.save_crop(
+                save_dir=self.save_dir / "crops",
+                file_name=self.data_path.stem + ("" if self.dataset.mode == "image" else f"_{frame}"),
+            )
+        return log_string
+    def postprocess(self, preds, img, orig_imgs):
+        """Post-processes predictions for an image and returns them."""
+        return preds
+    def __call__(self, source=None, model=None, stream=False, *args, **kwargs):
+        """Performs inference on an image or stream."""
+        self.stream = stream
+        if stream:
+            return self.stream_inference(source, model, *args, **kwargs)
+        else:
+            return list(self.stream_inference(source, model, *args, **kwargs))  # merge list of Result into one
+    def predict_cli(self, source=None, model=None):
+        """
+        Method used for CLI prediction.
+        It uses always generator as outputs as not required by CLI mode.
+        """
+        gen = self.stream_inference(source, model)
+        for _ in gen:  # noqa, running CLI inference without accumulating any outputs (do not modify)
+            pass
+    def setup_source(self, source):
+        """Sets up source and inference mode."""
+        self.imgsz = check_imgsz(self.args.imgsz, stride=self.model.stride, min_dim=2)  # check image size
+        self.transforms = (
+            getattr(
+                self.model.model,
+                "transforms",
+                classify_transforms(self.imgsz[0], crop_fraction=self.args.crop_fraction),
+            )
+            if self.args.task == "classify"
+            else None
+        )
+        self.dataset = load_inference_source(
+            source=source, vid_stride=self.args.vid_stride, buffer=self.args.stream_buffer
+        )
+        self.source_type = self.dataset.source_type
+        if not getattr(self, "stream", True) and (
+            self.dataset.mode == "stream"  # streams
+            or len(self.dataset) > 1000  # images
+            or any(getattr(self.dataset, "video_flag", [False]))
+        ):  # videos
+            LOGGER.warning(STREAM_WARNING)
+        self.vid_path = [None] * self.dataset.bs
+        self.vid_writer = [None] * self.dataset.bs
+        self.vid_frame = [None] * self.dataset.bs
+    @smart_inference_mode()
+    def stream_inference(self, source=None, model=None, *args, **kwargs):
+        """Streams real-time inference on camera feed and saves results to file."""
+        if self.args.verbose:
+            LOGGER.info("")
+        # Setup model
+        if not self.model:
+            self.setup_model(model)
+        with self._lock:  # for thread-safe inference
+            # Setup source every time predict is called
+            self.setup_source(source if source is not None else self.args.source)
+            # Check if save_dir/ label file exists
+            if self.args.save or self.args.save_txt:
+                (self.save_dir / "labels" if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)
+            # Warmup model
+            if not self.done_warmup:
+                self.model.warmup(imgsz=(1 if self.model.pt or self.model.triton else self.dataset.bs, 3, *self.imgsz))
+                self.done_warmup = True
+            self.seen, self.windows, self.batch = 0, [], None
+            profilers = (
+                ops.Profile(device=self.device),
+                ops.Profile(device=self.device),
+                ops.Profile(device=self.device),
+            )
+            self.run_callbacks("on_predict_start")
+            all_results = []
+            for batch in self.dataset:
+                self.run_callbacks("on_predict_batch_start")
+                self.batch = batch
+                path, im0s, vid_cap, s = batch
+                # Preprocess
+                with profilers[0]:
+                    im = self.preprocess(im0s)
+                # Inference
+                with profilers[1]:
+                    preds = self.inference(im, *args, **kwargs)
+                    if self.args.embed:
+                        yield from [preds] if isinstance(preds, torch.Tensor) else preds  # yield embedding tensors
+                        continue
+                # Postprocess
+                with profilers[2]:
+                    self.results = self.postprocess(preds, im, im0s)
+                self.run_callbacks("on_predict_postprocess_end")
+                # Visualize, save, write results
+                n = len(im0s)
+                for i in range(n):
+                    self.seen += 1
+                    self.results[i].speed = {
+                        "preprocess": profilers[0].dt * 1e3 / n,
+                        "inference": profilers[1].dt * 1e3 / n,
+                        "postprocess": profilers[2].dt * 1e3 / n,
+                    }
+                    p, im0 = path[i], None if self.source_type.tensor else im0s[i].copy()
+                    p = Path(p)
+                    if self.args.verbose or self.args.save or self.args.save_txt or self.args.show:
+                        s += self.write_results(i, self.results, (p, im, im0))
+                    if self.args.save or self.args.save_txt:
+                        self.results[i].save_dir = self.save_dir.__str__()
+                    if self.args.show and self.plotted_img is not None:
+                        self.show(p)
+                    if self.args.save and self.plotted_img is not None:
+                        self.save_preds(vid_cap, i, str(self.save_dir / p.name))
+                self.run_callbacks("on_predict_batch_end")
+                yield from self.results
+                all_results.extend(self.results)
+                # Print time (inference-only)
+                if self.args.verbose:
+                    LOGGER.info(f"{s}{profilers[1].dt * 1E3:.1f}ms")
+        # Release assets
+        if isinstance(self.vid_writer[-1], cv2.VideoWriter):
+            self.vid_writer[-1].release()  # release final video writer
+        # Print results
+        if self.args.verbose and self.seen:
+            t = tuple(x.t / self.seen * 1e3 for x in profilers)  # speeds per image
+            LOGGER.info(
+                f"Speed: %.1fms preprocess, %.1fms inference, %.1fms postprocess per image at shape "
+                f"{(1, 3, *im.shape[2:])}" % t
+            )
+        if self.args.save or self.args.save_txt or self.args.save_crop:
+            nl = len(list(self.save_dir.glob("labels/*.txt")))  # number of labels
+            s = f"\n{nl} label{'s' * (nl > 1)} saved to {self.save_dir / 'labels'}" if self.args.save_txt else ""
+            LOGGER.info(f"Results saved to {colorstr('bold', self.save_dir)}{s}")
+        self.run_callbacks("on_predict_end")
+        return all_results
+    def setup_model(self, model, verbose=True):
+        """Initialize YOLO model with given parameters and set it to evaluation mode."""
+        self.model = AutoBackend(
+            model or self.args.model,
+            device=select_device(self.args.device, verbose=verbose),
+            dnn=self.args.dnn,
+            data=self.args.data,
+            fp16=self.args.half,
+            fuse=True,
+            verbose=verbose,
+        )
+        self.device = self.model.device  # update device
+        self.args.half = self.model.fp16  # update half
+        self.model.eval()
+    def show(self, p):
+        """Display an image in a window using OpenCV imshow()."""
+        im0 = self.plotted_img
+        if platform.system() == "Linux" and p not in self.windows:
+            self.windows.append(p)
+            cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
+            cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
+        cv2.imshow(str(p), im0)
+        cv2.waitKey(500 if self.batch[3].startswith("image") else 1)  # 1 millisecond
+    def save_preds(self, vid_cap, idx, save_path):
+        """Save video predictions as mp4 at specified path."""
+        im0 = self.plotted_img
+        # Save imgs
+        if self.dataset.mode == "image":
+            cv2.imwrite(save_path, im0)
+        else:  # 'video' or 'stream'
+            frames_path = f'{save_path.split(".", 1)[0]}_frames/'
+            if self.vid_path[idx] != save_path:  # new video
+                self.vid_path[idx] = save_path
+                if self.args.save_frames:
+                    Path(frames_path).mkdir(parents=True, exist_ok=True)
+                    self.vid_frame[idx] = 0
+                if isinstance(self.vid_writer[idx], cv2.VideoWriter):
+                    self.vid_writer[idx].release()  # release previous video writer
+                if vid_cap:  # video
+                    fps = int(vid_cap.get(cv2.CAP_PROP_FPS))  # integer required, floats produce error in MP4 codec
+                    w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+                    h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+                else:  # stream
+                    fps, w, h = 30, im0.shape[1], im0.shape[0]
+                suffix, fourcc = (".mp4", "avc1") if MACOS else (".avi", "WMV2") if WINDOWS else (".avi", "MJPG")
+                self.vid_writer[idx] = cv2.VideoWriter(
+                    str(Path(save_path).with_suffix(suffix)), cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)
+                )
+            # Write video
+            self.vid_writer[idx].write(im0)
+            # Write frame
+            if self.args.save_frames:
+                cv2.imwrite(f"{frames_path}{self.vid_frame[idx]}.jpg", im0)
+                self.vid_frame[idx] += 1
+    def run_callbacks(self, event: str):
+        """Runs all registered callbacks for a specific event."""
+        for callback in self.callbacks.get(event, []):
+            callback(self)
+    def add_callback(self, event: str, func):
+        """Add callback."""
+        self.callbacks[event].append(func)

yolov8_model/ultralytics/engine/results.py ADDED Viewed

	@@ -0,0 +1,680 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+"""
+Ultralytics Results, Boxes and Masks classes for handling inference results.
+Usage: See https://docs.ultralytics.com/modes/predict/
+"""
+from copy import deepcopy
+from functools import lru_cache
+from pathlib import Path
+import numpy as np
+import torch
+from yolov8_model.ultralytics.data.augment import LetterBox
+from yolov8_model.ultralytics.utils import LOGGER, SimpleClass, ops
+from yolov8_model.ultralytics.utils.plotting import Annotator, colors, save_one_box
+from yolov8_model.ultralytics.utils.torch_utils import smart_inference_mode
+class BaseTensor(SimpleClass):
+    """Base tensor class with additional methods for easy manipulation and device handling."""
+    def __init__(self, data, orig_shape) -> None:
+        """
+        Initialize BaseTensor with data and original shape.
+        Args:
+            data (torch.Tensor | np.ndarray): Predictions, such as bboxes, masks and keypoints.
+            orig_shape (tuple): Original shape of image.
+        """
+        assert isinstance(data, (torch.Tensor, np.ndarray))
+        self.data = data
+        self.orig_shape = orig_shape
+    @property
+    def shape(self):
+        """Return the shape of the data tensor."""
+        return self.data.shape
+    def cpu(self):
+        """Return a copy of the tensor on CPU memory."""
+        return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape)
+    def numpy(self):
+        """Return a copy of the tensor as a numpy array."""
+        return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape)
+    def cuda(self):
+        """Return a copy of the tensor on GPU memory."""
+        return self.__class__(torch.as_tensor(self.data).cuda(), self.orig_shape)
+    def to(self, *args, **kwargs):
+        """Return a copy of the tensor with the specified device and dtype."""
+        return self.__class__(torch.as_tensor(self.data).to(*args, **kwargs), self.orig_shape)
+    def __len__(self):  # override len(results)
+        """Return the length of the data tensor."""
+        return len(self.data)
+    def __getitem__(self, idx):
+        """Return a BaseTensor with the specified index of the data tensor."""
+        return self.__class__(self.data[idx], self.orig_shape)
+class Results(SimpleClass):
+    """
+    A class for storing and manipulating inference results.
+    Args:
+        orig_img (numpy.ndarray): The original image as a numpy array.
+        path (str): The path to the image file.
+        names (dict): A dictionary of class names.
+        boxes (torch.tensor, optional): A 2D tensor of bounding box coordinates for each detection.
+        masks (torch.tensor, optional): A 3D tensor of detection masks, where each mask is a binary image.
+        probs (torch.tensor, optional): A 1D tensor of probabilities of each class for classification task.
+        keypoints (List[List[float]], optional): A list of detected keypoints for each object.
+    Attributes:
+        orig_img (numpy.ndarray): The original image as a numpy array.
+        orig_shape (tuple): The original image shape in (height, width) format.
+        boxes (Boxes, optional): A Boxes object containing the detection bounding boxes.
+        masks (Masks, optional): A Masks object containing the detection masks.
+        probs (Probs, optional): A Probs object containing probabilities of each class for classification task.
+        keypoints (Keypoints, optional): A Keypoints object containing detected keypoints for each object.
+        speed (dict): A dictionary of preprocess, inference, and postprocess speeds in milliseconds per image.
+        names (dict): A dictionary of class names.
+        path (str): The path to the image file.
+        _keys (tuple): A tuple of attribute names for non-empty attributes.
+    """
+    def __init__(self, orig_img, path, names, boxes=None, masks=None, probs=None, keypoints=None, obb=None) -> None:
+        """Initialize the Results class."""
+        self.orig_img = orig_img
+        self.orig_shape = orig_img.shape[:2]
+        self.boxes = Boxes(boxes, self.orig_shape) if boxes is not None else None  # native size boxes
+        self.masks = Masks(masks, self.orig_shape) if masks is not None else None  # native size or imgsz masks
+        self.probs = Probs(probs) if probs is not None else None
+        self.keypoints = Keypoints(keypoints, self.orig_shape) if keypoints is not None else None
+        self.obb = OBB(obb, self.orig_shape) if obb is not None else None
+        self.speed = {"preprocess": None, "inference": None, "postprocess": None}  # milliseconds per image
+        self.names = names
+        self.path = path
+        self.save_dir = None
+        self._keys = "boxes", "masks", "probs", "keypoints", "obb"
+    def __getitem__(self, idx):
+        """Return a Results object for the specified index."""
+        return self._apply("__getitem__", idx)
+    def __len__(self):
+        """Return the number of detections in the Results object."""
+        for k in self._keys:
+            v = getattr(self, k)
+            if v is not None:
+                return len(v)
+    def update(self, boxes=None, masks=None, probs=None, obb=None):
+        """Update the boxes, masks, and probs attributes of the Results object."""
+        if boxes is not None:
+            self.boxes = Boxes(ops.clip_boxes(boxes, self.orig_shape), self.orig_shape)
+        if masks is not None:
+            self.masks = Masks(masks, self.orig_shape)
+        if probs is not None:
+            self.probs = probs
+        if obb is not None:
+            self.obb = OBB(obb, self.orig_shape)
+    def _apply(self, fn, *args, **kwargs):
+        """
+        Applies a function to all non-empty attributes and returns a new Results object with modified attributes. This
+        function is internally called by methods like .to(), .cuda(), .cpu(), etc.
+        Args:
+            fn (str): The name of the function to apply.
+            *args: Variable length argument list to pass to the function.
+            **kwargs: Arbitrary keyword arguments to pass to the function.
+        Returns:
+            Results: A new Results object with attributes modified by the applied function.
+        """
+        r = self.new()
+        for k in self._keys:
+            v = getattr(self, k)
+            if v is not None:
+                setattr(r, k, getattr(v, fn)(*args, **kwargs))
+        return r
+    def cpu(self):
+        """Return a copy of the Results object with all tensors on CPU memory."""
+        return self._apply("cpu")
+    def numpy(self):
+        """Return a copy of the Results object with all tensors as numpy arrays."""
+        return self._apply("numpy")
+    def cuda(self):
+        """Return a copy of the Results object with all tensors on GPU memory."""
+        return self._apply("cuda")
+    def to(self, *args, **kwargs):
+        """Return a copy of the Results object with tensors on the specified device and dtype."""
+        return self._apply("to", *args, **kwargs)
+    def new(self):
+        """Return a new Results object with the same image, path, and names."""
+        return Results(orig_img=self.orig_img, path=self.path, names=self.names)
+    def plot(
+        self,
+        conf=True,
+        line_width=None,
+        font_size=None,
+        font="Arial.ttf",
+        pil=False,
+        img=None,
+        im_gpu=None,
+        kpt_radius=5,
+        kpt_line=True,
+        labels=True,
+        boxes=True,
+        masks=True,
+        probs=True,
+    ):
+        """
+        Plots the detection results on an input RGB image. Accepts a numpy array (cv2) or a PIL Image.
+        Args:
+            conf (bool): Whether to plot the detection confidence score.
+            line_width (float, optional): The line width of the bounding boxes. If None, it is scaled to the image size.
+            font_size (float, optional): The font size of the text. If None, it is scaled to the image size.
+            font (str): The font to use for the text.
+            pil (bool): Whether to return the image as a PIL Image.
+            img (numpy.ndarray): Plot to another image. if not, plot to original image.
+            im_gpu (torch.Tensor): Normalized image in gpu with shape (1, 3, 640, 640), for faster mask plotting.
+            kpt_radius (int, optional): Radius of the drawn keypoints. Default is 5.
+            kpt_line (bool): Whether to draw lines connecting keypoints.
+            labels (bool): Whether to plot the label of bounding boxes.
+            boxes (bool): Whether to plot the bounding boxes.
+            masks (bool): Whether to plot the masks.
+            probs (bool): Whether to plot classification probability
+        Returns:
+            (numpy.ndarray): A numpy array of the annotated image.
+        Example:
+            ```python
+            from PIL import Image
+            from ultralytics import YOLO
+            model = YOLO('yolov8n.pt')
+            results = model('bus.jpg')  # results list
+            for r in results:
+                im_array = r.plot()  # plot a BGR numpy array of predictions
+                im = Image.fromarray(im_array[..., ::-1])  # RGB PIL image
+                im.show()  # show image
+                im.save('results.jpg')  # save image
+            ```
+        """
+        if img is None and isinstance(self.orig_img, torch.Tensor):
+            img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).to(torch.uint8).cpu().numpy()
+        names = self.names
+        is_obb = self.obb is not None
+        pred_boxes, show_boxes = self.obb if is_obb else self.boxes, boxes
+        pred_masks, show_masks = self.masks, masks
+        pred_probs, show_probs = self.probs, probs
+        annotator = Annotator(
+            deepcopy(self.orig_img if img is None else img),
+            line_width,
+            font_size,
+            font,
+            pil or (pred_probs is not None and show_probs),  # Classify tasks default to pil=True
+            example=names,
+        )
+        # Plot Segment results
+        if pred_masks and show_masks:
+            if im_gpu is None:
+                img = LetterBox(pred_masks.shape[1:])(image=annotator.result())
+                im_gpu = (
+                    torch.as_tensor(img, dtype=torch.float16, device=pred_masks.data.device)
+                    .permute(2, 0, 1)
+                    .flip(0)
+                    .contiguous()
+                    / 255
+                )
+            idx = pred_boxes.cls if pred_boxes else range(len(pred_masks))
+            annotator.masks(pred_masks.data, colors=[colors(x, True) for x in idx], im_gpu=im_gpu)
+        # Plot Detect results
+        if pred_boxes is not None and show_boxes:
+            for d in reversed(pred_boxes):
+                c, conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item())
+                name = ("" if id is None else f"id:{id} ") + names[c]
+                label = (f"{name} {conf:.2f}" if conf else name) if labels else None
+                box = d.xyxyxyxy.reshape(-1, 4, 2).squeeze() if is_obb else d.xyxy.squeeze()
+                annotator.box_label(box, label, color=colors(c, True), rotated=is_obb)
+        # Plot Classify results
+        if pred_probs is not None and show_probs:
+            text = ",\n".join(f"{names[j] if names else j} {pred_probs.data[j]:.2f}" for j in pred_probs.top5)
+            x = round(self.orig_shape[0] * 0.03)
+            annotator.text([x, x], text, txt_color=(255, 255, 255))  # TODO: allow setting colors
+        # Plot Pose results
+        if self.keypoints is not None:
+            for k in reversed(self.keypoints.data):
+                annotator.kpts(k, self.orig_shape, radius=kpt_radius, kpt_line=kpt_line)
+        return annotator.result()
+    def verbose(self):
+        """Return log string for each task."""
+        log_string = ""
+        probs = self.probs
+        boxes = self.boxes
+        if len(self) == 0:
+            return log_string if probs is not None else f"{log_string}(no detections), "
+        if probs is not None:
+            log_string += f"{', '.join(f'{self.names[j]} {probs.data[j]:.2f}' for j in probs.top5)}, "
+        if boxes:
+            for c in boxes.cls.unique():
+                n = (boxes.cls == c).sum()  # detections per class
+                log_string += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "
+        return log_string
+    def save_txt(self, txt_file, save_conf=False):
+        """
+        Save predictions into txt file.
+        Args:
+            txt_file (str): txt file path.
+            save_conf (bool): save confidence score or not.
+        """
+        is_obb = self.obb is not None
+        boxes = self.obb if is_obb else self.boxes
+        masks = self.masks
+        probs = self.probs
+        kpts = self.keypoints
+        texts = []
+        if probs is not None:
+            # Classify
+            [texts.append(f"{probs.data[j]:.2f} {self.names[j]}") for j in probs.top5]
+        elif boxes:
+            # Detect/segment/pose
+            for j, d in enumerate(boxes):
+                c, conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item())
+                line = (c, *(d.xyxyxyxyn.view(-1) if is_obb else d.xywhn.view(-1)))
+                if masks:
+                    seg = masks[j].xyn[0].copy().reshape(-1)  # reversed mask.xyn, (n,2) to (n*2)
+                    line = (c, *seg)
+                if kpts is not None:
+                    kpt = torch.cat((kpts[j].xyn, kpts[j].conf[..., None]), 2) if kpts[j].has_visible else kpts[j].xyn
+                    line += (*kpt.reshape(-1).tolist(),)
+                line += (conf,) * save_conf + (() if id is None else (id,))
+                texts.append(("%g " * len(line)).rstrip() % line)
+        if texts:
+            Path(txt_file).parent.mkdir(parents=True, exist_ok=True)  # make directory
+            with open(txt_file, "a") as f:
+                f.writelines(text + "\n" for text in texts)
+    def save_crop(self, save_dir, file_name=Path("im.jpg")):
+        """
+        Save cropped predictions to `save_dir/cls/file_name.jpg`.
+        Args:
+            save_dir (str | pathlib.Path): Save path.
+            file_name (str | pathlib.Path): File name.
+        """
+        if self.probs is not None:
+            LOGGER.warning("WARNING ⚠️ Classify task do not support `save_crop`.")
+            return
+        if self.obb is not None:
+            LOGGER.warning("WARNING ⚠️ OBB task do not support `save_crop`.")
+            return
+        for d in self.boxes:
+            save_one_box(
+                d.xyxy,
+                self.orig_img.copy(),
+                file=Path(save_dir) / self.names[int(d.cls)] / f"{Path(file_name)}.jpg",
+                BGR=True,
+            )
+    def tojson(self, normalize=False):
+        """Convert the object to JSON format."""
+        if self.probs is not None:
+            LOGGER.warning("Warning: Classify task do not support `tojson` yet.")
+            return
+        import json
+        # Create list of detection dictionaries
+        results = []
+        data = self.boxes.data.cpu().tolist()
+        h, w = self.orig_shape if normalize else (1, 1)
+        for i, row in enumerate(data):  # xyxy, track_id if tracking, conf, class_id
+            box = {"x1": row[0] / w, "y1": row[1] / h, "x2": row[2] / w, "y2": row[3] / h}
+            conf = row[-2]
+            class_id = int(row[-1])
+            name = self.names[class_id]
+            result = {"name": name, "class": class_id, "confidence": conf, "box": box}
+            if self.boxes.is_track:
+                result["track_id"] = int(row[-3])  # track ID
+            if self.masks:
+                x, y = self.masks.xy[i][:, 0], self.masks.xy[i][:, 1]  # numpy array
+                result["segments"] = {"x": (x / w).tolist(), "y": (y / h).tolist()}
+            if self.keypoints is not None:
+                x, y, visible = self.keypoints[i].data[0].cpu().unbind(dim=1)  # torch Tensor
+                result["keypoints"] = {"x": (x / w).tolist(), "y": (y / h).tolist(), "visible": visible.tolist()}
+            results.append(result)
+        # Convert detections to JSON
+        return json.dumps(results, indent=2)
+class Boxes(BaseTensor):
+    """
+    A class for storing and manipulating detection boxes.
+    Args:
+        boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes,
+            with shape (num_boxes, 6) or (num_boxes, 7). The last two columns contain confidence and class values.
+            If present, the third last column contains track IDs.
+        orig_shape (tuple): Original image size, in the format (height, width).
+    Attributes:
+        xyxy (torch.Tensor | numpy.ndarray): The boxes in xyxy format.
+        conf (torch.Tensor | numpy.ndarray): The confidence values of the boxes.
+        cls (torch.Tensor | numpy.ndarray): The class values of the boxes.
+        id (torch.Tensor | numpy.ndarray): The track IDs of the boxes (if available).
+        xywh (torch.Tensor | numpy.ndarray): The boxes in xywh format.
+        xyxyn (torch.Tensor | numpy.ndarray): The boxes in xyxy format normalized by original image size.
+        xywhn (torch.Tensor | numpy.ndarray): The boxes in xywh format normalized by original image size.
+        data (torch.Tensor): The raw bboxes tensor (alias for `boxes`).
+    Methods:
+        cpu(): Move the object to CPU memory.
+        numpy(): Convert the object to a numpy array.
+        cuda(): Move the object to CUDA memory.
+        to(*args, **kwargs): Move the object to the specified device.
+    """
+    def __init__(self, boxes, orig_shape) -> None:
+        """Initialize the Boxes class."""
+        if boxes.ndim == 1:
+            boxes = boxes[None, :]
+        n = boxes.shape[-1]
+        assert n in (6, 7), f"expected 6 or 7 values but got {n}"  # xyxy, track_id, conf, cls
+        super().__init__(boxes, orig_shape)
+        self.is_track = n == 7
+        self.orig_shape = orig_shape
+    @property
+    def xyxy(self):
+        """Return the boxes in xyxy format."""
+        return self.data[:, :4]
+    @property
+    def conf(self):
+        """Return the confidence values of the boxes."""
+        return self.data[:, -2]
+    @property
+    def cls(self):
+        """Return the class values of the boxes."""
+        return self.data[:, -1]
+    @property
+    def id(self):
+        """Return the track IDs of the boxes (if available)."""
+        return self.data[:, -3] if self.is_track else None
+    @property
+    @lru_cache(maxsize=2)  # maxsize 1 should suffice
+    def xywh(self):
+        """Return the boxes in xywh format."""
+        return ops.xyxy2xywh(self.xyxy)
+    @property
+    @lru_cache(maxsize=2)
+    def xyxyn(self):
+        """Return the boxes in xyxy format normalized by original image size."""
+        xyxy = self.xyxy.clone() if isinstance(self.xyxy, torch.Tensor) else np.copy(self.xyxy)
+        xyxy[..., [0, 2]] /= self.orig_shape[1]
+        xyxy[..., [1, 3]] /= self.orig_shape[0]
+        return xyxy
+    @property
+    @lru_cache(maxsize=2)
+    def xywhn(self):
+        """Return the boxes in xywh format normalized by original image size."""
+        xywh = ops.xyxy2xywh(self.xyxy)
+        xywh[..., [0, 2]] /= self.orig_shape[1]
+        xywh[..., [1, 3]] /= self.orig_shape[0]
+        return xywh
+class Masks(BaseTensor):
+    """
+    A class for storing and manipulating detection masks.
+    Attributes:
+        xy (list): A list of segments in pixel coordinates.
+        xyn (list): A list of normalized segments.
+    Methods:
+        cpu(): Returns the masks tensor on CPU memory.
+        numpy(): Returns the masks tensor as a numpy array.
+        cuda(): Returns the masks tensor on GPU memory.
+        to(device, dtype): Returns the masks tensor with the specified device and dtype.
+    """
+    def __init__(self, masks, orig_shape) -> None:
+        """Initialize the Masks class with the given masks tensor and original image shape."""
+        if masks.ndim == 2:
+            masks = masks[None, :]
+        super().__init__(masks, orig_shape)
+    @property
+    @lru_cache(maxsize=1)
+    def xyn(self):
+        """Return normalized segments."""
+        return [
+            ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=True)
+            for x in ops.masks2segments(self.data)
+        ]
+    @property
+    @lru_cache(maxsize=1)
+    def xy(self):
+        """Return segments in pixel coordinates."""
+        return [
+            ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=False)
+            for x in ops.masks2segments(self.data)
+        ]
+class Keypoints(BaseTensor):
+    """
+    A class for storing and manipulating detection keypoints.
+    Attributes:
+        xy (torch.Tensor): A collection of keypoints containing x, y coordinates for each detection.
+        xyn (torch.Tensor): A normalized version of xy with coordinates in the range [0, 1].
+        conf (torch.Tensor): Confidence values associated with keypoints if available, otherwise None.
+    Methods:
+        cpu(): Returns a copy of the keypoints tensor on CPU memory.
+        numpy(): Returns a copy of the keypoints tensor as a numpy array.
+        cuda(): Returns a copy of the keypoints tensor on GPU memory.
+        to(device, dtype): Returns a copy of the keypoints tensor with the specified device and dtype.
+    """
+    @smart_inference_mode()  # avoid keypoints < conf in-place error
+    def __init__(self, keypoints, orig_shape) -> None:
+        """Initializes the Keypoints object with detection keypoints and original image size."""
+        if keypoints.ndim == 2:
+            keypoints = keypoints[None, :]
+        if keypoints.shape[2] == 3:  # x, y, conf
+            mask = keypoints[..., 2] < 0.5  # points with conf < 0.5 (not visible)
+            keypoints[..., :2][mask] = 0
+        super().__init__(keypoints, orig_shape)
+        self.has_visible = self.data.shape[-1] == 3
+    @property
+    @lru_cache(maxsize=1)
+    def xy(self):
+        """Returns x, y coordinates of keypoints."""
+        return self.data[..., :2]
+    @property
+    @lru_cache(maxsize=1)
+    def xyn(self):
+        """Returns normalized x, y coordinates of keypoints."""
+        xy = self.xy.clone() if isinstance(self.xy, torch.Tensor) else np.copy(self.xy)
+        xy[..., 0] /= self.orig_shape[1]
+        xy[..., 1] /= self.orig_shape[0]
+        return xy
+    @property
+    @lru_cache(maxsize=1)
+    def conf(self):
+        """Returns confidence values of keypoints if available, else None."""
+        return self.data[..., 2] if self.has_visible else None
+class Probs(BaseTensor):
+    """
+    A class for storing and manipulating classification predictions.
+    Attributes:
+        top1 (int): Index of the top 1 class.
+        top5 (list[int]): Indices of the top 5 classes.
+        top1conf (torch.Tensor): Confidence of the top 1 class.
+        top5conf (torch.Tensor): Confidences of the top 5 classes.
+    Methods:
+        cpu(): Returns a copy of the probs tensor on CPU memory.
+        numpy(): Returns a copy of the probs tensor as a numpy array.
+        cuda(): Returns a copy of the probs tensor on GPU memory.
+        to(): Returns a copy of the probs tensor with the specified device and dtype.
+    """
+    def __init__(self, probs, orig_shape=None) -> None:
+        """Initialize the Probs class with classification probabilities and optional original shape of the image."""
+        super().__init__(probs, orig_shape)
+    @property
+    @lru_cache(maxsize=1)
+    def top1(self):
+        """Return the index of top 1."""
+        return int(self.data.argmax())
+    @property
+    @lru_cache(maxsize=1)
+    def top5(self):
+        """Return the indices of top 5."""
+        return (-self.data).argsort(0)[:5].tolist()  # this way works with both torch and numpy.
+    @property
+    @lru_cache(maxsize=1)
+    def top1conf(self):
+        """Return the confidence of top 1."""
+        return self.data[self.top1]
+    @property
+    @lru_cache(maxsize=1)
+    def top5conf(self):
+        """Return the confidences of top 5."""
+        return self.data[self.top5]
+class OBB(BaseTensor):
+    """
+    A class for storing and manipulating Oriented Bounding Boxes (OBB).
+    Args:
+        boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes,
+            with shape (num_boxes, 7) or (num_boxes, 8). The last two columns contain confidence and class values.
+            If present, the third last column contains track IDs, and the fifth column from the left contains rotation.
+        orig_shape (tuple): Original image size, in the format (height, width).
+    Attributes:
+        xywhr (torch.Tensor | numpy.ndarray): The boxes in [x_center, y_center, width, height, rotation] format.
+        conf (torch.Tensor | numpy.ndarray): The confidence values of the boxes.
+        cls (torch.Tensor | numpy.ndarray): The class values of the boxes.
+        id (torch.Tensor | numpy.ndarray): The track IDs of the boxes (if available).
+        xyxyxyxyn (torch.Tensor | numpy.ndarray): The rotated boxes in xyxyxyxy format normalized by original image size.
+        xyxyxyxy (torch.Tensor | numpy.ndarray): The rotated boxes in xyxyxyxy format.
+        xyxy (torch.Tensor | numpy.ndarray): The horizontal boxes in xyxyxyxy format.
+        data (torch.Tensor): The raw OBB tensor (alias for `boxes`).
+    Methods:
+        cpu(): Move the object to CPU memory.
+        numpy(): Convert the object to a numpy array.
+        cuda(): Move the object to CUDA memory.
+        to(*args, **kwargs): Move the object to the specified device.
+    """
+    def __init__(self, boxes, orig_shape) -> None:
+        """Initialize the Boxes class."""
+        if boxes.ndim == 1:
+            boxes = boxes[None, :]
+        n = boxes.shape[-1]
+        assert n in (7, 8), f"expected 7 or 8 values but got {n}"  # xywh, rotation, track_id, conf, cls
+        super().__init__(boxes, orig_shape)
+        self.is_track = n == 8
+        self.orig_shape = orig_shape
+    @property
+    def xywhr(self):
+        """Return the rotated boxes in xywhr format."""
+        return self.data[:, :5]
+    @property
+    def conf(self):
+        """Return the confidence values of the boxes."""
+        return self.data[:, -2]
+    @property
+    def cls(self):
+        """Return the class values of the boxes."""
+        return self.data[:, -1]
+    @property
+    def id(self):
+        """Return the track IDs of the boxes (if available)."""
+        return self.data[:, -3] if self.is_track else None
+    @property
+    @lru_cache(maxsize=2)
+    def xyxyxyxy(self):
+        """Return the boxes in xyxyxyxy format, (N, 4, 2)."""
+        return ops.xywhr2xyxyxyxy(self.xywhr)
+    @property
+    @lru_cache(maxsize=2)
+    def xyxyxyxyn(self):
+        """Return the boxes in xyxyxyxy format, (N, 4, 2)."""
+        xyxyxyxyn = self.xyxyxyxy.clone() if isinstance(self.xyxyxyxy, torch.Tensor) else np.copy(self.xyxyxyxy)
+        xyxyxyxyn[..., 0] /= self.orig_shape[1]
+        xyxyxyxyn[..., 1] /= self.orig_shape[1]
+        return xyxyxyxyn
+    @property
+    @lru_cache(maxsize=2)
+    def xyxy(self):
+        """
+        Return the horizontal boxes in xyxy format, (N, 4).
+        Accepts both torch and numpy boxes.
+        """
+        x1 = self.xyxyxyxy[..., 0].min(1).values
+        x2 = self.xyxyxyxy[..., 0].max(1).values
+        y1 = self.xyxyxyxy[..., 1].min(1).values
+        y2 = self.xyxyxyxy[..., 1].max(1).values
+        xyxy = [x1, y1, x2, y2]
+        return np.stack(xyxy, axis=-1) if isinstance(self.data, np.ndarray) else torch.stack(xyxy, dim=-1)

yolov8_model/ultralytics/engine/trainer.py ADDED Viewed

	@@ -0,0 +1,755 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+"""
+Train a model on a dataset.
+Usage:
+    $ yolo mode=train model=yolov8n.pt data=coco128.yaml imgsz=640 epochs=100 batch=16
+"""
+import math
+import os
+import subprocess
+import time
+import warnings
+from copy import deepcopy
+from datetime import datetime, timedelta
+from pathlib import Path
+import numpy as np
+import torch
+from torch import distributed as dist
+from torch import nn, optim
+from yolov8_model.ultralytics.cfg import get_cfg, get_save_dir
+from yolov8_model.ultralytics.data.utils import check_cls_dataset, check_det_dataset
+from yolov8_model.ultralytics.nn.tasks import attempt_load_one_weight, attempt_load_weights
+from yolov8_model.ultralytics.utils import (
+    DEFAULT_CFG,
+    LOGGER,
+    RANK,
+    TQDM,
+    __version__,
+    callbacks,
+    clean_url,
+    colorstr,
+    emojis,
+    yaml_save,
+)
+from yolov8_model.ultralytics.utils.autobatch import check_train_batch_size
+from yolov8_model.ultralytics.utils.checks import check_amp, check_file, check_imgsz, check_model_file_from_stem, print_args
+from yolov8_model.ultralytics.utils.dist import ddp_cleanup, generate_ddp_command
+from yolov8_model.ultralytics.utils.files import get_latest_run
+from yolov8_model.ultralytics.utils.torch_utils import (
+    EarlyStopping,
+    ModelEMA,
+    de_parallel,
+    init_seeds,
+    one_cycle,
+    select_device,
+    strip_optimizer,
+)
+from yolov8_model.ultralytics.nn.extra_modules.kernel_warehouse import get_temperature
+class BaseTrainer:
+    """
+    BaseTrainer.
+    A base class for creating trainers.
+    Attributes:
+        args (SimpleNamespace): Configuration for the trainer.
+        validator (BaseValidator): Validator instance.
+        model (nn.Module): Model instance.
+        callbacks (defaultdict): Dictionary of callbacks.
+        save_dir (Path): Directory to save results.
+        wdir (Path): Directory to save weights.
+        last (Path): Path to the last checkpoint.
+        best (Path): Path to the best checkpoint.
+        save_period (int): Save checkpoint every x epochs (disabled if < 1).
+        batch_size (int): Batch size for training.
+        epochs (int): Number of epochs to train for.
+        start_epoch (int): Starting epoch for training.
+        device (torch.device): Device to use for training.
+        amp (bool): Flag to enable AMP (Automatic Mixed Precision).
+        scaler (amp.GradScaler): Gradient scaler for AMP.
+        data (str): Path to data.
+        trainset (torch.utils.data.Dataset): Training dataset.
+        testset (torch.utils.data.Dataset): Testing dataset.
+        ema (nn.Module): EMA (Exponential Moving Average) of the model.
+        resume (bool): Resume training from a checkpoint.
+        lf (nn.Module): Loss function.
+        scheduler (torch.optim.lr_scheduler._LRScheduler): Learning rate scheduler.
+        best_fitness (float): The best fitness value achieved.
+        fitness (float): Current fitness value.
+        loss (float): Current loss value.
+        tloss (float): Total loss value.
+        loss_names (list): List of loss names.
+        csv (Path): Path to results CSV file.
+    """
+    def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+        """
+        Initializes the BaseTrainer class.
+        Args:
+            cfg (str, optional): Path to a configuration file. Defaults to DEFAULT_CFG.
+            overrides (dict, optional): Configuration overrides. Defaults to None.
+        """
+        self.args = get_cfg(cfg, overrides)
+        self.check_resume(overrides)
+        self.device = select_device(self.args.device, self.args.batch)
+        self.validator = None
+        self.metrics = None
+        self.plots = {}
+        init_seeds(self.args.seed + 1 + RANK, deterministic=self.args.deterministic)
+        # Dirs
+        self.save_dir = get_save_dir(self.args)
+        self.args.name = self.save_dir.name  # update name for loggers
+        self.wdir = self.save_dir / "weights"  # weights dir
+        if RANK in (-1, 0):
+            self.wdir.mkdir(parents=True, exist_ok=True)  # make dir
+            self.args.save_dir = str(self.save_dir)
+            yaml_save(self.save_dir / "args.yaml", vars(self.args))  # save run args
+        self.last, self.best = self.wdir / "last.pt", self.wdir / "best.pt"  # checkpoint paths
+        self.save_period = self.args.save_period
+        self.batch_size = self.args.batch
+        self.epochs = self.args.epochs
+        self.start_epoch = 0
+        if RANK == -1:
+            print_args(vars(self.args))
+        # Device
+        if self.device.type in ("cpu", "mps"):
+            self.args.workers = 0  # faster CPU training as time dominated by inference, not dataloading
+        # Model and Dataset
+        self.model = check_model_file_from_stem(self.args.model)  # add suffix, i.e. yolov8n -> yolov8n.pt
+        try:
+            if self.args.task == "classify":
+                self.data = check_cls_dataset(self.args.data)
+            elif self.args.data.split(".")[-1] in ("yaml", "yml") or self.args.task in ("detect", "segment", "pose"):
+                self.data = check_det_dataset(self.args.data)
+                if "yaml_file" in self.data:
+                    self.args.data = self.data["yaml_file"]  # for validating 'yolo train data=url.zip' usage
+        except Exception as e:
+            raise RuntimeError(emojis(f"Dataset '{clean_url(self.args.data)}' error ❌ {e}")) from e
+        self.trainset, self.testset = self.get_dataset(self.data)
+        self.ema = None
+        # Optimization utils init
+        self.lf = None
+        self.scheduler = None
+        # Epoch level metrics
+        self.best_fitness = None
+        self.fitness = None
+        self.loss = None
+        self.tloss = None
+        self.loss_names = ["Loss"]
+        self.csv = self.save_dir / "results.csv"
+        self.plot_idx = [0, 1, 2]
+        # Callbacks
+        self.callbacks = _callbacks or callbacks.get_default_callbacks()
+        if RANK in (-1, 0):
+            callbacks.add_integration_callbacks(self)
+    def add_callback(self, event: str, callback):
+        """Appends the given callback."""
+        self.callbacks[event].append(callback)
+    def set_callback(self, event: str, callback):
+        """Overrides the existing callbacks with the given callback."""
+        self.callbacks[event] = [callback]
+    def run_callbacks(self, event: str):
+        """Run all existing callbacks associated with a particular event."""
+        for callback in self.callbacks.get(event, []):
+            callback(self)
+    def train(self):
+        """Allow device='', device=None on Multi-GPU systems to default to device=0."""
+        if isinstance(self.args.device, str) and len(self.args.device):  # i.e. device='0' or device='0,1,2,3'
+            world_size = len(self.args.device.split(","))
+        elif isinstance(self.args.device, (tuple, list)):  # i.e. device=[0, 1, 2, 3] (multi-GPU from CLI is list)
+            world_size = len(self.args.device)
+        elif torch.cuda.is_available():  # i.e. device=None or device='' or device=number
+            world_size = 1  # default to device 0
+        else:  # i.e. device='cpu' or 'mps'
+            world_size = 0
+        # Run subprocess if DDP training, else train normally
+        if world_size > 1 and "LOCAL_RANK" not in os.environ:
+            # Argument checks
+            if self.args.rect:
+                LOGGER.warning("WARNING ⚠️ 'rect=True' is incompatible with Multi-GPU training, setting 'rect=False'")
+                self.args.rect = False
+            if self.args.batch == -1:
+                LOGGER.warning(
+                    "WARNING ⚠️ 'batch=-1' for AutoBatch is incompatible with Multi-GPU training, setting "
+                    "default 'batch=16'"
+                )
+                self.args.batch = 16
+            # Command
+            cmd, file = generate_ddp_command(world_size, self)
+            try:
+                LOGGER.info(f'{colorstr("DDP:")} debug command {" ".join(cmd)}')
+                subprocess.run(cmd, check=True)
+            except Exception as e:
+                raise e
+            finally:
+                ddp_cleanup(self, str(file))
+        else:
+            self._do_train(world_size)
+    def _setup_scheduler(self):
+        """Initialize training learning rate scheduler."""
+        if self.args.cos_lr:
+            self.lf = one_cycle(1, self.args.lrf, self.epochs)  # cosine 1->hyp['lrf']
+        else:
+            self.lf = lambda x: max(1 - x / self.epochs, 0) * (1.0 - self.args.lrf) + self.args.lrf  # linear
+        self.scheduler = optim.lr_scheduler.LambdaLR(self.optimizer, lr_lambda=self.lf)
+    def _setup_ddp(self, world_size):
+        """Initializes and sets the DistributedDataParallel parameters for training."""
+        torch.cuda.set_device(RANK)
+        self.device = torch.device("cuda", RANK)
+        # LOGGER.info(f'DDP info: RANK {RANK}, WORLD_SIZE {world_size}, DEVICE {self.device}')
+        os.environ["NCCL_BLOCKING_WAIT"] = "1"  # set to enforce timeout
+        dist.init_process_group(
+            "nccl" if dist.is_nccl_available() else "gloo",
+            timeout=timedelta(seconds=10800),  # 3 hours
+            rank=RANK,
+            world_size=world_size,
+        )
+    def _setup_train(self, world_size):
+        """Builds dataloaders and optimizer on correct rank process."""
+        # Model
+        self.run_callbacks("on_pretrain_routine_start")
+        ckpt = self.setup_model()
+        self.model = self.model.to(self.device)
+        self.set_model_attributes()
+        # Freeze layers
+        freeze_list = (
+            self.args.freeze
+            if isinstance(self.args.freeze, list)
+            else range(self.args.freeze)
+            if isinstance(self.args.freeze, int)
+            else []
+        )
+        always_freeze_names = [".dfl"]  # always freeze these layers
+        freeze_layer_names = [f"model.{x}." for x in freeze_list] + always_freeze_names
+        for k, v in self.model.named_parameters():
+            # v.register_hook(lambda x: torch.nan_to_num(x))  # NaN to 0 (commented for erratic training results)
+            if any(x in k for x in freeze_layer_names):
+                LOGGER.info(f"Freezing layer '{k}'")
+                v.requires_grad = False
+            elif not v.requires_grad:
+                LOGGER.info(
+                    f"WARNING ⚠️ setting 'requires_grad=True' for frozen layer '{k}'. "
+                    "See ultralytics.engine.trainer for customization of frozen layers."
+                )
+                v.requires_grad = True
+        # Check AMP
+        self.amp = torch.tensor(self.args.amp).to(self.device)  # True or False
+        if self.amp and RANK in (-1, 0):  # Single-GPU and DDP
+            callbacks_backup = callbacks.default_callbacks.copy()  # backup callbacks as check_amp() resets them
+            self.amp = torch.tensor(check_amp(self.model), device=self.device)
+            callbacks.default_callbacks = callbacks_backup  # restore callbacks
+        if RANK > -1 and world_size > 1:  # DDP
+            dist.broadcast(self.amp, src=0)  # broadcast the tensor from rank 0 to all other ranks (returns None)
+        self.amp = bool(self.amp)  # as boolean
+        self.scaler = torch.cuda.amp.GradScaler(enabled=self.amp)
+        if world_size > 1:
+            self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[RANK])
+        # Check imgsz
+        gs = max(int(self.model.stride.max() if hasattr(self.model, "stride") else 32), 32)  # grid size (max stride)
+        self.args.imgsz = check_imgsz(self.args.imgsz, stride=gs, floor=gs, max_dim=1)
+        self.stride = gs  # for multi-scale training
+        # Batch size
+        if self.batch_size == -1 and RANK == -1:  # single-GPU only, estimate best batch size
+            self.args.batch = self.batch_size = check_train_batch_size(self.model, self.args.imgsz, self.amp)
+        # Dataloaders
+        batch_size = self.batch_size // max(world_size, 1)
+        self.train_loader = self.get_dataloader(self.trainset, batch_size=batch_size, rank=RANK, mode="train")
+        if RANK in (-1, 0):
+            # NOTE: When training DOTA dataset, double batch size could get OOM cause some images got more than 2000 objects.
+            self.test_loader = self.get_dataloader(
+                self.testset, batch_size=batch_size if self.args.task == "obb" else batch_size * 2, rank=-1, mode="val"
+            )
+            self.validator = self.get_validator()
+            metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix="val")
+            self.metrics = dict(zip(metric_keys, [0] * len(metric_keys)))
+            self.ema = ModelEMA(self.model)
+            if self.args.plots:
+                self.plot_training_labels()
+        # Optimizer
+        self.accumulate = max(round(self.args.nbs / self.batch_size), 1)  # accumulate loss before optimizing
+        weight_decay = self.args.weight_decay * self.batch_size * self.accumulate / self.args.nbs  # scale weight_decay
+        iterations = math.ceil(len(self.train_loader.dataset) / max(self.batch_size, self.args.nbs)) * self.epochs
+        self.optimizer = self.build_optimizer(
+            model=self.model,
+            name=self.args.optimizer,
+            lr=self.args.lr0,
+            momentum=self.args.momentum,
+            decay=weight_decay,
+            iterations=iterations,
+        )
+        # Scheduler
+        self._setup_scheduler()
+        self.stopper, self.stop = EarlyStopping(patience=self.args.patience), False
+        self.resume_training(ckpt)
+        self.scheduler.last_epoch = self.start_epoch - 1  # do not move
+        self.run_callbacks("on_pretrain_routine_end")
+    def _do_train(self, world_size=1):
+        """Train completed, evaluate and plot if specified by arguments."""
+        if world_size > 1:
+            self._setup_ddp(world_size)
+        self._setup_train(world_size)
+        nb = len(self.train_loader)  # number of batches
+        nw = max(round(self.args.warmup_epochs * nb), 100) if self.args.warmup_epochs > 0 else -1  # warmup iterations
+        last_opt_step = -1
+        self.epoch_time = None
+        self.epoch_time_start = time.time()
+        self.train_time_start = time.time()
+        self.run_callbacks("on_train_start")
+        LOGGER.info(
+            f'Image sizes {self.args.imgsz} train, {self.args.imgsz} val\n'
+            f'Using {self.train_loader.num_workers * (world_size or 1)} dataloader workers\n'
+            f"Logging results to {colorstr('bold', self.save_dir)}\n"
+            f'Starting training for ' + (f"{self.args.time} hours..." if self.args.time else f"{self.epochs} epochs...")
+        )
+        if self.args.close_mosaic:
+            base_idx = (self.epochs - self.args.close_mosaic) * nb
+            self.plot_idx.extend([base_idx, base_idx + 1, base_idx + 2])
+        epoch = self.epochs  # predefine for resume fully trained model edge cases
+        for epoch in range(self.start_epoch, self.epochs):
+            self.epoch = epoch
+            self.run_callbacks("on_train_epoch_start")
+            self.model.train()
+            if RANK != -1:
+                self.train_loader.sampler.set_epoch(epoch)
+            pbar = enumerate(self.train_loader)
+            # Update dataloader attributes (optional)
+            if epoch == (self.epochs - self.args.close_mosaic):
+                self._close_dataloader_mosaic()
+                self.train_loader.reset()
+            if RANK in (-1, 0):
+                LOGGER.info(self.progress_string())
+                pbar = TQDM(enumerate(self.train_loader), total=nb)
+            self.tloss = None
+            self.optimizer.zero_grad()
+            for i, batch in pbar:
+                self.run_callbacks("on_train_batch_start")
+                # Warmup
+                ni = i + nb * epoch
+                if ni <= nw:
+                    xi = [0, nw]  # x interp
+                    self.accumulate = max(1, int(np.interp(ni, xi, [1, self.args.nbs / self.batch_size]).round()))
+                    for j, x in enumerate(self.optimizer.param_groups):
+                        # Bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
+                        x["lr"] = np.interp(
+                            ni, xi, [self.args.warmup_bias_lr if j == 0 else 0.0, x["initial_lr"] * self.lf(epoch)]
+                        )
+                        if "momentum" in x:
+                            x["momentum"] = np.interp(ni, xi, [self.args.warmup_momentum, self.args.momentum])
+                if hasattr(self.model, 'net_update_temperature'):
+                    temp = get_temperature(i + 1, epoch, len(self.train_loader), temp_epoch=20, temp_init_value=1.0)
+                    self.model.net_update_temperature(temp)
+                # Forward
+                with torch.cuda.amp.autocast(self.amp):
+                    batch = self.preprocess_batch(batch)
+                    self.loss, self.loss_items = self.model(batch)
+                    if RANK != -1:
+                        self.loss *= world_size
+                    self.tloss = (
+                        (self.tloss * i + self.loss_items) / (i + 1) if self.tloss is not None else self.loss_items
+                    )
+                # Backward
+                self.scaler.scale(self.loss).backward()
+                # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html
+                if ni - last_opt_step >= self.accumulate:
+                    self.optimizer_step()
+                    last_opt_step = ni
+                    # Timed stopping
+                    if self.args.time:
+                        self.stop = (time.time() - self.train_time_start) > (self.args.time * 3600)
+                        if RANK != -1:  # if DDP training
+                            broadcast_list = [self.stop if RANK == 0 else None]
+                            dist.broadcast_object_list(broadcast_list, 0)  # broadcast 'stop' to all ranks
+                            self.stop = broadcast_list[0]
+                        if self.stop:  # training time exceeded
+                            break
+                # Log
+                mem = f"{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G"  # (GB)
+                loss_len = self.tloss.shape[0] if len(self.tloss.shape) else 1
+                losses = self.tloss if loss_len > 1 else torch.unsqueeze(self.tloss, 0)
+                if RANK in (-1, 0):
+                    pbar.set_description(
+                        ("%11s" * 2 + "%11.4g" * (2 + loss_len))
+                        % (f"{epoch + 1}/{self.epochs}", mem, *losses, batch["cls"].shape[0], batch["img"].shape[-1])
+                    )
+                    self.run_callbacks("on_batch_end")
+                    if self.args.plots and ni in self.plot_idx:
+                        self.plot_training_samples(batch, ni)
+                self.run_callbacks("on_train_batch_end")
+            self.lr = {f"lr/pg{ir}": x["lr"] for ir, x in enumerate(self.optimizer.param_groups)}  # for loggers
+            self.run_callbacks("on_train_epoch_end")
+            if RANK in (-1, 0):
+                final_epoch = epoch + 1 == self.epochs
+                self.ema.update_attr(self.model, include=["yaml", "nc", "args", "names", "stride", "class_weights"])
+                # Validation
+                if self.args.val or final_epoch or self.stopper.possible_stop or self.stop:
+                    self.metrics, self.fitness = self.validate()
+                self.save_metrics(metrics={**self.label_loss_items(self.tloss), **self.metrics, **self.lr})
+                self.stop |= self.stopper(epoch + 1, self.fitness)
+                if self.args.time:
+                    self.stop |= (time.time() - self.train_time_start) > (self.args.time * 3600)
+                # Save model
+                if self.args.save or final_epoch:
+                    self.save_model()
+                    self.run_callbacks("on_model_save")
+            # Scheduler
+            t = time.time()
+            self.epoch_time = t - self.epoch_time_start
+            self.epoch_time_start = t
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore")  # suppress 'Detected lr_scheduler.step() before optimizer.step()'
+                if self.args.time:
+                    mean_epoch_time = (t - self.train_time_start) / (epoch - self.start_epoch + 1)
+                    self.epochs = self.args.epochs = math.ceil(self.args.time * 3600 / mean_epoch_time)
+                    self._setup_scheduler()
+                    self.scheduler.last_epoch = self.epoch  # do not move
+                    self.stop |= epoch >= self.epochs  # stop if exceeded epochs
+                self.scheduler.step()
+            self.run_callbacks("on_fit_epoch_end")
+            torch.cuda.empty_cache()  # clear GPU memory at end of epoch, may help reduce CUDA out of memory errors
+            # Early Stopping
+            if RANK != -1:  # if DDP training
+                broadcast_list = [self.stop if RANK == 0 else None]
+                dist.broadcast_object_list(broadcast_list, 0)  # broadcast 'stop' to all ranks
+                self.stop = broadcast_list[0]
+            if self.stop:
+                break  # must break all DDP ranks
+        if RANK in (-1, 0):
+            # Do final val with best.pt
+            LOGGER.info(
+                f"\n{epoch - self.start_epoch + 1} epochs completed in "
+                f"{(time.time() - self.train_time_start) / 3600:.3f} hours."
+            )
+            self.final_eval()
+            if self.args.plots:
+                self.plot_metrics()
+            self.run_callbacks("on_train_end")
+        torch.cuda.empty_cache()
+        self.run_callbacks("teardown")
+    def save_model(self):
+        """Save model training checkpoints with additional metadata."""
+        import pandas as pd  # scope for faster startup
+        metrics = {**self.metrics, **{"fitness": self.fitness}}
+        results = {k.strip(): v for k, v in pd.read_csv(self.csv).to_dict(orient="list").items()}
+        ckpt = {
+            "epoch": self.epoch,
+            "best_fitness": self.best_fitness,
+            "model": deepcopy(de_parallel(self.model)).half(),
+            "ema": deepcopy(self.ema.ema).half(),
+            "updates": self.ema.updates,
+            "optimizer": self.optimizer.state_dict(),
+            "train_args": vars(self.args),  # save as dict
+            "train_metrics": metrics,
+            "train_results": results,
+            "date": datetime.now().isoformat(),
+            "version": __version__,
+        }
+        # Save last and best
+        torch.save(ckpt, self.last)
+        if self.best_fitness == self.fitness:
+            torch.save(ckpt, self.best)
+        if (self.save_period > 0) and (self.epoch > 0) and (self.epoch % self.save_period == 0):
+            torch.save(ckpt, self.wdir / f"epoch{self.epoch}.pt")
+    @staticmethod
+    def get_dataset(data):
+        """
+        Get train, val path from data dict if it exists.
+        Returns None if data format is not recognized.
+        """
+        return data["train"], data.get("val") or data.get("test")
+    def setup_model(self):
+        """Load/create/download model for any task."""
+        if isinstance(self.model, torch.nn.Module):  # if model is loaded beforehand. No setup needed
+            return
+        model, weights = self.model, None
+        ckpt = None
+        if str(model).endswith(".pt"):
+            weights, ckpt = attempt_load_one_weight(model)
+            cfg = ckpt["model"].yaml
+        else:
+            cfg = model
+        self.model = self.get_model(cfg=cfg, weights=weights, verbose=RANK == -1)  # calls Model(cfg, weights)
+        return ckpt
+    def optimizer_step(self):
+        """Perform a single step of the training optimizer with gradient clipping and EMA update."""
+        self.scaler.unscale_(self.optimizer)  # unscale gradients
+        torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=10.0)  # clip gradients
+        self.scaler.step(self.optimizer)
+        self.scaler.update()
+        self.optimizer.zero_grad()
+        if self.ema:
+            self.ema.update(self.model)
+    def preprocess_batch(self, batch):
+        """Allows custom preprocessing model inputs and ground truths depending on task type."""
+        return batch
+    def validate(self):
+        """
+        Runs validation on test set using self.validator.
+        The returned dict is expected to contain "fitness" key.
+        """
+        metrics = self.validator(self)
+        fitness = metrics.pop("fitness", -self.loss.detach().cpu().numpy())  # use loss as fitness measure if not found
+        if not self.best_fitness or self.best_fitness < fitness:
+            self.best_fitness = fitness
+        return metrics, fitness
+    def get_model(self, cfg=None, weights=None, verbose=True):
+        """Get model and raise NotImplementedError for loading cfg files."""
+        raise NotImplementedError("This task trainer doesn't support loading cfg files")
+    def get_validator(self):
+        """Returns a NotImplementedError when the get_validator function is called."""
+        raise NotImplementedError("get_validator function not implemented in trainer")
+    def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode="train"):
+        """Returns dataloader derived from torch.data.Dataloader."""
+        raise NotImplementedError("get_dataloader function not implemented in trainer")
+    def build_dataset(self, img_path, mode="train", batch=None):
+        """Build dataset."""
+        raise NotImplementedError("build_dataset function not implemented in trainer")
+    def label_loss_items(self, loss_items=None, prefix="train"):
+        """
+        Returns a loss dict with labelled training loss items tensor.
+        Note:
+            This is not needed for classification but necessary for segmentation & detection
+        """
+        return {"loss": loss_items} if loss_items is not None else ["loss"]
+    def set_model_attributes(self):
+        """To set or update model parameters before training."""
+        self.model.names = self.data["names"]
+    def build_targets(self, preds, targets):
+        """Builds target tensors for training YOLO model."""
+        pass
+    def progress_string(self):
+        """Returns a string describing training progress."""
+        return ""
+    # TODO: may need to put these following functions into callback
+    def plot_training_samples(self, batch, ni):
+        """Plots training samples during YOLO training."""
+        pass
+    def plot_training_labels(self):
+        """Plots training labels for YOLO model."""
+        pass
+    def save_metrics(self, metrics):
+        """Saves training metrics to a CSV file."""
+        keys, vals = list(metrics.keys()), list(metrics.values())
+        n = len(metrics) + 1  # number of cols
+        s = "" if self.csv.exists() else (("%23s," * n % tuple(["epoch"] + keys)).rstrip(",") + "\n")  # header
+        with open(self.csv, "a") as f:
+            f.write(s + ("%23.5g," * n % tuple([self.epoch + 1] + vals)).rstrip(",") + "\n")
+    def plot_metrics(self):
+        """Plot and display metrics visually."""
+        pass
+    def on_plot(self, name, data=None):
+        """Registers plots (e.g. to be consumed in callbacks)"""
+        path = Path(name)
+        self.plots[path] = {"data": data, "timestamp": time.time()}
+    def final_eval(self):
+        """Performs final evaluation and validation for object detection YOLO model."""
+        for f in self.last, self.best:
+            if f.exists():
+                strip_optimizer(f)  # strip optimizers
+                if f is self.best:
+                    LOGGER.info(f"\nValidating {f}...")
+                    self.validator.args.plots = self.args.plots
+                    self.metrics = self.validator(model=f)
+                    self.metrics.pop("fitness", None)
+                    self.run_callbacks("on_fit_epoch_end")
+    def check_resume(self, overrides):
+        """Check if resume checkpoint exists and update arguments accordingly."""
+        resume = self.args.resume
+        if resume:
+            try:
+                exists = isinstance(resume, (str, Path)) and Path(resume).exists()
+                last = Path(check_file(resume) if exists else get_latest_run())
+                # Check that resume data YAML exists, otherwise strip to force re-download of dataset
+                ckpt_args = attempt_load_weights(last).args
+                if not Path(ckpt_args["data"]).exists():
+                    ckpt_args["data"] = self.args.data
+                resume = True
+                self.args = get_cfg(ckpt_args)
+                self.args.model = str(last)  # reinstate model
+                for k in "imgsz", "batch":  # allow arg updates to reduce memory on resume if crashed due to CUDA OOM
+                    if k in overrides:
+                        setattr(self.args, k, overrides[k])
+            except Exception as e:
+                raise FileNotFoundError(
+                    "Resume checkpoint not found. Please pass a valid checkpoint to resume from, "
+                    "i.e. 'yolo train resume model=path/to/last.pt'"
+                ) from e
+        self.resume = resume
+    def resume_training(self, ckpt):
+        """Resume YOLO training from given epoch and best fitness."""
+        if ckpt is None:
+            return
+        best_fitness = 0.0
+        start_epoch = ckpt["epoch"] + 1
+        if ckpt["optimizer"] is not None:
+            self.optimizer.load_state_dict(ckpt["optimizer"])  # optimizer
+            best_fitness = ckpt["best_fitness"]
+        if self.ema and ckpt.get("ema"):
+            self.ema.ema.load_state_dict(ckpt["ema"].float().state_dict())  # EMA
+            self.ema.updates = ckpt["updates"]
+        if self.resume:
+            assert start_epoch > 0, (
+                f"{self.args.model} training to {self.epochs} epochs is finished, nothing to resume.\n"
+                f"Start a new training without resuming, i.e. 'yolo train model={self.args.model}'"
+            )
+            LOGGER.info(
+                f"Resuming training from {self.args.model} from epoch {start_epoch + 1} to {self.epochs} total epochs"
+            )
+        if self.epochs < start_epoch:
+            LOGGER.info(
+                f"{self.model} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {self.epochs} more epochs."
+            )
+            self.epochs += ckpt["epoch"]  # finetune additional epochs
+        self.best_fitness = best_fitness
+        self.start_epoch = start_epoch
+        if start_epoch > (self.epochs - self.args.close_mosaic):
+            self._close_dataloader_mosaic()
+    def _close_dataloader_mosaic(self):
+        """Update dataloaders to stop using mosaic augmentation."""
+        if hasattr(self.train_loader.dataset, "mosaic"):
+            self.train_loader.dataset.mosaic = False
+        if hasattr(self.train_loader.dataset, "close_mosaic"):
+            LOGGER.info("Closing dataloader mosaic")
+            self.train_loader.dataset.close_mosaic(hyp=self.args)
+    def build_optimizer(self, model, name="auto", lr=0.001, momentum=0.9, decay=1e-5, iterations=1e5):
+        """
+        Constructs an optimizer for the given model, based on the specified optimizer name, learning rate, momentum,
+        weight decay, and number of iterations.
+        Args:
+            model (torch.nn.Module): The model for which to build an optimizer.
+            name (str, optional): The name of the optimizer to use. If 'auto', the optimizer is selected
+                based on the number of iterations. Default: 'auto'.
+            lr (float, optional): The learning rate for the optimizer. Default: 0.001.
+            momentum (float, optional): The momentum factor for the optimizer. Default: 0.9.
+            decay (float, optional): The weight decay for the optimizer. Default: 1e-5.
+            iterations (float, optional): The number of iterations, which determines the optimizer if
+                name is 'auto'. Default: 1e5.
+        Returns:
+            (torch.optim.Optimizer): The constructed optimizer.
+        """
+        g = [], [], []  # optimizer parameter groups
+        bn = tuple(v for k, v in nn.__dict__.items() if "Norm" in k)  # normalization layers, i.e. BatchNorm2d()
+        if name == "auto":
+            LOGGER.info(
+                f"{colorstr('optimizer:')} 'optimizer=auto' found, "
+                f"ignoring 'lr0={self.args.lr0}' and 'momentum={self.args.momentum}' and "
+                f"determining best 'optimizer', 'lr0' and 'momentum' automatically... "
+            )
+            nc = getattr(model, "nc", 10)  # number of classes
+            lr_fit = round(0.002 * 5 / (4 + nc), 6)  # lr0 fit equation to 6 decimal places
+            name, lr, momentum = ("SGD", 0.01, 0.9) if iterations > 10000 else ("AdamW", lr_fit, 0.9)
+            self.args.warmup_bias_lr = 0.0  # no higher than 0.01 for Adam
+        for module_name, module in model.named_modules():
+            for param_name, param in module.named_parameters(recurse=False):
+                fullname = f"{module_name}.{param_name}" if module_name else param_name
+                if "bias" in fullname:  # bias (no decay)
+                    g[2].append(param)
+                elif isinstance(module, bn):  # weight (no decay)
+                    g[1].append(param)
+                else:  # weight (with decay)
+                    g[0].append(param)
+        if name in ("Adam", "Adamax", "AdamW", "NAdam", "RAdam"):
+            optimizer = getattr(optim, name, optim.Adam)(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
+        elif name == "RMSProp":
+            optimizer = optim.RMSprop(g[2], lr=lr, momentum=momentum)
+        elif name == "SGD":
+            optimizer = optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True)
+        else:
+            raise NotImplementedError(
+                f"Optimizer '{name}' not found in list of available optimizers "
+                f"[Adam, AdamW, NAdam, RAdam, RMSProp, SGD, auto]."
+                "To request support for addition optimizers please visit https://github.com/ultralytics/ultralytics."
+            )
+        optimizer.add_param_group({"params": g[0], "weight_decay": decay})  # add g0 with weight_decay
+        optimizer.add_param_group({"params": g[1], "weight_decay": 0.0})  # add g1 (BatchNorm2d weights)
+        LOGGER.info(
+            f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}, momentum={momentum}) with parameter groups "
+            f'{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias(decay=0.0)'
+        )
+        return optimizer

yolov8_model/ultralytics/engine/tuner.py ADDED Viewed

	@@ -0,0 +1,240 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+"""
+This module provides functionalities for hyperparameter tuning of the Ultralytics YOLO models for object detection,
+instance segmentation, image classification, pose estimation, and multi-object tracking.
+Hyperparameter tuning is the process of systematically searching for the optimal set of hyperparameters
+that yield the best model performance. This is particularly crucial in deep learning models like YOLO,
+where small changes in hyperparameters can lead to significant differences in model accuracy and efficiency.
+Example:
+    Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
+    ```python
+    from ultralytics import YOLO
+    model = YOLO('yolov8n.pt')
+    model.tune(data='coco8.yaml', epochs=10, iterations=300, optimizer='AdamW', plots=False, save=False, val=False)
+    ```
+"""
+import random
+import shutil
+import subprocess
+import time
+import numpy as np
+import torch
+from yolov8_model.ultralytics.cfg import get_cfg, get_save_dir
+from yolov8_model.ultralytics.utils import DEFAULT_CFG, LOGGER, callbacks, colorstr, remove_colorstr, yaml_print, yaml_save
+from yolov8_model.ultralytics.utils.plotting import plot_tune_results
+class Tuner:
+    """
+    Class responsible for hyperparameter tuning of YOLO models.
+    The class evolves YOLO model hyperparameters over a given number of iterations
+    by mutating them according to the search space and retraining the model to evaluate their performance.
+    Attributes:
+        space (dict): Hyperparameter search space containing bounds and scaling factors for mutation.
+        tune_dir (Path): Directory where evolution logs and results will be saved.
+        tune_csv (Path): Path to the CSV file where evolution logs are saved.
+    Methods:
+        _mutate(hyp: dict) -> dict:
+            Mutates the given hyperparameters within the bounds specified in `self.space`.
+        __call__():
+            Executes the hyperparameter evolution across multiple iterations.
+    Example:
+        Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
+        ```python
+        from ultralytics import YOLO
+        model = YOLO('yolov8n.pt')
+        model.tune(data='coco8.yaml', epochs=10, iterations=300, optimizer='AdamW', plots=False, save=False, val=False)
+        ```
+        Tune with custom search space.
+        ```python
+        from ultralytics import YOLO
+        model = YOLO('yolov8n.pt')
+        model.tune(space={key1: val1, key2: val2})  # custom search space dictionary
+        ```
+    """
+    def __init__(self, args=DEFAULT_CFG, _callbacks=None):
+        """
+        Initialize the Tuner with configurations.
+        Args:
+            args (dict, optional): Configuration for hyperparameter evolution.
+        """
+        self.space = args.pop("space", None) or {  # key: (min, max, gain(optional))
+            # 'optimizer': tune.choice(['SGD', 'Adam', 'AdamW', 'NAdam', 'RAdam', 'RMSProp']),
+            "lr0": (1e-5, 1e-1),  # initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
+            "lrf": (0.0001, 0.1),  # final OneCycleLR learning rate (lr0 * lrf)
+            "momentum": (0.7, 0.98, 0.3),  # SGD momentum/Adam beta1
+            "weight_decay": (0.0, 0.001),  # optimizer weight decay 5e-4
+            "warmup_epochs": (0.0, 5.0),  # warmup epochs (fractions ok)
+            "warmup_momentum": (0.0, 0.95),  # warmup initial momentum
+            "box": (1.0, 20.0),  # box loss gain
+            "cls": (0.2, 4.0),  # cls loss gain (scale with pixels)
+            "dfl": (0.4, 6.0),  # dfl loss gain
+            "hsv_h": (0.0, 0.1),  # image HSV-Hue augmentation (fraction)
+            "hsv_s": (0.0, 0.9),  # image HSV-Saturation augmentation (fraction)
+            "hsv_v": (0.0, 0.9),  # image HSV-Value augmentation (fraction)
+            "degrees": (0.0, 45.0),  # image rotation (+/- deg)
+            "translate": (0.0, 0.9),  # image translation (+/- fraction)
+            "scale": (0.0, 0.95),  # image scale (+/- gain)
+            "shear": (0.0, 10.0),  # image shear (+/- deg)
+            "perspective": (0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001
+            "flipud": (0.0, 1.0),  # image flip up-down (probability)
+            "fliplr": (0.0, 1.0),  # image flip left-right (probability)
+            "mosaic": (0.0, 1.0),  # image mixup (probability)
+            "mixup": (0.0, 1.0),  # image mixup (probability)
+            "copy_paste": (0.0, 1.0),  # segment copy-paste (probability)
+        }
+        self.args = get_cfg(overrides=args)
+        self.tune_dir = get_save_dir(self.args, name="tune")
+        self.tune_csv = self.tune_dir / "tune_results.csv"
+        self.callbacks = _callbacks or callbacks.get_default_callbacks()
+        self.prefix = colorstr("Tuner: ")
+        callbacks.add_integration_callbacks(self)
+        LOGGER.info(
+            f"{self.prefix}Initialized Tuner instance with 'tune_dir={self.tune_dir}'\n"
+            f"{self.prefix}💡 Learn about tuning at https://docs.ultralytics.com/guides/hyperparameter-tuning"
+        )
+    def _mutate(self, parent="single", n=5, mutation=0.8, sigma=0.2):
+        """
+        Mutates the hyperparameters based on bounds and scaling factors specified in `self.space`.
+        Args:
+            parent (str): Parent selection method: 'single' or 'weighted'.
+            n (int): Number of parents to consider.
+            mutation (float): Probability of a parameter mutation in any given iteration.
+            sigma (float): Standard deviation for Gaussian random number generator.
+        Returns:
+            (dict): A dictionary containing mutated hyperparameters.
+        """
+        if self.tune_csv.exists():  # if CSV file exists: select best hyps and mutate
+            # Select parent(s)
+            x = np.loadtxt(self.tune_csv, ndmin=2, delimiter=",", skiprows=1)
+            fitness = x[:, 0]  # first column
+            n = min(n, len(x))  # number of previous results to consider
+            x = x[np.argsort(-fitness)][:n]  # top n mutations
+            w = x[:, 0] - x[:, 0].min() + 1e-6  # weights (sum > 0)
+            if parent == "single" or len(x) == 1:
+                # x = x[random.randint(0, n - 1)]  # random selection
+                x = x[random.choices(range(n), weights=w)[0]]  # weighted selection
+            elif parent == "weighted":
+                x = (x * w.reshape(n, 1)).sum(0) / w.sum()  # weighted combination
+            # Mutate
+            r = np.random  # method
+            r.seed(int(time.time()))
+            g = np.array([v[2] if len(v) == 3 else 1.0 for k, v in self.space.items()])  # gains 0-1
+            ng = len(self.space)
+            v = np.ones(ng)
+            while all(v == 1):  # mutate until a change occurs (prevent duplicates)
+                v = (g * (r.random(ng) < mutation) * r.randn(ng) * r.random() * sigma + 1).clip(0.3, 3.0)
+            hyp = {k: float(x[i + 1] * v[i]) for i, k in enumerate(self.space.keys())}
+        else:
+            hyp = {k: getattr(self.args, k) for k in self.space.keys()}
+        # Constrain to limits
+        for k, v in self.space.items():
+            hyp[k] = max(hyp[k], v[0])  # lower limit
+            hyp[k] = min(hyp[k], v[1])  # upper limit
+            hyp[k] = round(hyp[k], 5)  # significant digits
+        return hyp
+    def __call__(self, model=None, iterations=10, cleanup=True):
+        """
+        Executes the hyperparameter evolution process when the Tuner instance is called.
+        This method iterates through the number of iterations, performing the following steps in each iteration:
+        1. Load the existing hyperparameters or initialize new ones.
+        2. Mutate the hyperparameters using the `mutate` method.
+        3. Train a YOLO model with the mutated hyperparameters.
+        4. Log the fitness score and mutated hyperparameters to a CSV file.
+        Args:
+           model (Model): A pre-initialized YOLO model to be used for training.
+           iterations (int): The number of generations to run the evolution for.
+           cleanup (bool): Whether to delete iteration weights to reduce storage space used during tuning.
+        Note:
+           The method utilizes the `self.tune_csv` Path object to read and log hyperparameters and fitness scores.
+           Ensure this path is set correctly in the Tuner instance.
+        """
+        t0 = time.time()
+        best_save_dir, best_metrics = None, None
+        (self.tune_dir / "weights").mkdir(parents=True, exist_ok=True)
+        for i in range(iterations):
+            # Mutate hyperparameters
+            mutated_hyp = self._mutate()
+            LOGGER.info(f"{self.prefix}Starting iteration {i + 1}/{iterations} with hyperparameters: {mutated_hyp}")
+            metrics = {}
+            train_args = {**vars(self.args), **mutated_hyp}
+            save_dir = get_save_dir(get_cfg(train_args))
+            weights_dir = save_dir / "weights"
+            ckpt_file = weights_dir / ("best.pt" if (weights_dir / "best.pt").exists() else "last.pt")
+            try:
+                # Train YOLO model with mutated hyperparameters (run in subprocess to avoid dataloader hang)
+                cmd = ["yolo", "train", *(f"{k}={v}" for k, v in train_args.items())]
+                return_code = subprocess.run(cmd, check=True).returncode
+                metrics = torch.load(ckpt_file)["train_metrics"]
+                assert return_code == 0, "training failed"
+            except Exception as e:
+                LOGGER.warning(f"WARNING ❌️ training failure for hyperparameter tuning iteration {i + 1}\n{e}")
+            # Save results and mutated_hyp to CSV
+            fitness = metrics.get("fitness", 0.0)
+            log_row = [round(fitness, 5)] + [mutated_hyp[k] for k in self.space.keys()]
+            headers = "" if self.tune_csv.exists() else (",".join(["fitness"] + list(self.space.keys())) + "\n")
+            with open(self.tune_csv, "a") as f:
+                f.write(headers + ",".join(map(str, log_row)) + "\n")
+            # Get best results
+            x = np.loadtxt(self.tune_csv, ndmin=2, delimiter=",", skiprows=1)
+            fitness = x[:, 0]  # first column
+            best_idx = fitness.argmax()
+            best_is_current = best_idx == i
+            if best_is_current:
+                best_save_dir = save_dir
+                best_metrics = {k: round(v, 5) for k, v in metrics.items()}
+                for ckpt in weights_dir.glob("*.pt"):
+                    shutil.copy2(ckpt, self.tune_dir / "weights")
+            elif cleanup:
+                shutil.rmtree(ckpt_file.parent)  # remove iteration weights/ dir to reduce storage space
+            # Plot tune results
+            plot_tune_results(self.tune_csv)
+            # Save and print tune results
+            header = (
+                f'{self.prefix}{i + 1}/{iterations} iterations complete ✅ ({time.time() - t0:.2f}s)\n'
+                f'{self.prefix}Results saved to {colorstr("bold", self.tune_dir)}\n'
+                f'{self.prefix}Best fitness={fitness[best_idx]} observed at iteration {best_idx + 1}\n'
+                f'{self.prefix}Best fitness metrics are {best_metrics}\n'
+                f'{self.prefix}Best fitness model is {best_save_dir}\n'
+                f'{self.prefix}Best fitness hyperparameters are printed below.\n'
+            )
+            LOGGER.info("\n" + header)
+            data = {k: float(x[best_idx, i + 1]) for i, k in enumerate(self.space.keys())}
+            yaml_save(
+                self.tune_dir / "best_hyperparameters.yaml",
+                data=data,
+                header=remove_colorstr(header.replace(self.prefix, "# ")) + "\n",
+            )
+            yaml_print(self.tune_dir / "best_hyperparameters.yaml")

yolov8_model/ultralytics/engine/validator.py ADDED Viewed

	@@ -0,0 +1,336 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+"""
+Check a model's accuracy on a test or val split of a dataset.
+Usage:
+    $ yolo mode=val model=yolov8n.pt data=coco128.yaml imgsz=640
+Usage - formats:
+    $ yolo mode=val model=yolov8n.pt                 # PyTorch
+                          yolov8n.torchscript        # TorchScript
+                          yolov8n.onnx               # ONNX Runtime or OpenCV DNN with dnn=True
+                          yolov8n_openvino_model     # OpenVINO
+                          yolov8n.engine             # TensorRT
+                          yolov8n.mlpackage          # CoreML (macOS-only)
+                          yolov8n_saved_model        # TensorFlow SavedModel
+                          yolov8n.pb                 # TensorFlow GraphDef
+                          yolov8n.tflite             # TensorFlow Lite
+                          yolov8n_edgetpu.tflite     # TensorFlow Edge TPU
+                          yolov8n_paddle_model       # PaddlePaddle
+"""
+import json
+import time
+from pathlib import Path
+import numpy as np
+import torch
+from yolov8_model.ultralytics.cfg import get_cfg, get_save_dir
+from yolov8_model.ultralytics.data.utils import check_cls_dataset, check_det_dataset
+from yolov8_model.ultralytics.nn.autobackend import AutoBackend
+from yolov8_model.ultralytics.utils import LOGGER, TQDM, callbacks, colorstr, emojis
+from yolov8_model.ultralytics.utils.checks import check_imgsz
+from yolov8_model.ultralytics.utils.ops import Profile
+from yolov8_model.ultralytics.utils.torch_utils import de_parallel, select_device, smart_inference_mode
+class BaseValidator:
+    """
+    BaseValidator.
+    A base class for creating validators.
+    Attributes:
+        args (SimpleNamespace): Configuration for the validator.
+        dataloader (DataLoader): Dataloader to use for validation.
+        pbar (tqdm): Progress bar to update during validation.
+        model (nn.Module): Model to validate.
+        data (dict): Data dictionary.
+        device (torch.device): Device to use for validation.
+        batch_i (int): Current batch index.
+        training (bool): Whether the model is in training mode.
+        names (dict): Class names.
+        seen: Records the number of images seen so far during validation.
+        stats: Placeholder for statistics during validation.
+        confusion_matrix: Placeholder for a confusion matrix.
+        nc: Number of classes.
+        iouv: (torch.Tensor): IoU thresholds from 0.50 to 0.95 in spaces of 0.05.
+        jdict (dict): Dictionary to store JSON validation results.
+        speed (dict): Dictionary with keys 'preprocess', 'inference', 'loss', 'postprocess' and their respective
+                      batch processing times in milliseconds.
+        save_dir (Path): Directory to save results.
+        plots (dict): Dictionary to store plots for visualization.
+        callbacks (dict): Dictionary to store various callback functions.
+    """
+    def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
+        """
+        Initializes a BaseValidator instance.
+        Args:
+            dataloader (torch.utils.data.DataLoader): Dataloader to be used for validation.
+            save_dir (Path, optional): Directory to save results.
+            pbar (tqdm.tqdm): Progress bar for displaying progress.
+            args (SimpleNamespace): Configuration for the validator.
+            _callbacks (dict): Dictionary to store various callback functions.
+        """
+        self.args = get_cfg(overrides=args)
+        self.dataloader = dataloader
+        self.pbar = pbar
+        self.stride = None
+        self.data = None
+        self.device = None
+        self.batch_i = None
+        self.training = True
+        self.names = None
+        self.seen = None
+        self.stats = None
+        self.confusion_matrix = None
+        self.nc = None
+        self.iouv = None
+        self.jdict = None
+        self.speed = {"preprocess": 0.0, "inference": 0.0, "loss": 0.0, "postprocess": 0.0}
+        self.save_dir = save_dir or get_save_dir(self.args)
+        (self.save_dir / "labels" if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)
+        if self.args.conf is None:
+            self.args.conf = 0.001  # default conf=0.001
+        self.args.imgsz = check_imgsz(self.args.imgsz, max_dim=1)
+        self.plots = {}
+        self.callbacks = _callbacks or callbacks.get_default_callbacks()
+    @smart_inference_mode()
+    def __call__(self, trainer=None, model=None):
+        """Supports validation of a pre-trained model if passed or a model being trained if trainer is passed (trainer
+        gets priority).
+        """
+        self.training = trainer is not None
+        augment = self.args.augment and (not self.training)
+        if self.training:
+            self.device = trainer.device
+            self.data = trainer.data
+            self.args.half = self.device.type != "cpu"  # force FP16 val during training
+            model = trainer.ema.ema or trainer.model
+            model = model.half() if self.args.half else model.float()
+            # self.model = model
+            self.loss = torch.zeros_like(trainer.loss_items, device=trainer.device)
+            self.args.plots &= trainer.stopper.possible_stop or (trainer.epoch == trainer.epochs - 1)
+            model.eval()
+        else:
+            callbacks.add_integration_callbacks(self)
+            model = AutoBackend(
+                model or self.args.model,
+                device=select_device(self.args.device, self.args.batch),
+                dnn=self.args.dnn,
+                data=self.args.data,
+                fp16=self.args.half,
+            )
+            # self.model = model
+            self.device = model.device  # update device
+            self.args.half = model.fp16  # update half
+            stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
+            imgsz = check_imgsz(self.args.imgsz, stride=stride)
+            if engine:
+                self.args.batch = model.batch_size
+            elif not pt and not jit:
+                self.args.batch = 1  # export.py models default to batch-size 1
+                LOGGER.info(f"Forcing batch=1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models")
+            if str(self.args.data).split(".")[-1] in ("yaml", "yml"):
+                self.data = check_det_dataset(self.args.data)
+            elif self.args.task == "classify":
+                self.data = check_cls_dataset(self.args.data, split=self.args.split)
+            else:
+                raise FileNotFoundError(emojis(f"Dataset '{self.args.data}' for task={self.args.task} not found ❌"))
+            if self.device.type in ("cpu", "mps"):
+                self.args.workers = 0  # faster CPU val as time dominated by inference, not dataloading
+            if not pt:
+                self.args.rect = False
+            self.stride = model.stride  # used in get_dataloader() for padding
+            self.dataloader = self.dataloader or self.get_dataloader(self.data.get(self.args.split), self.args.batch)
+            model.eval()
+            model.warmup(imgsz=(1 if pt else self.args.batch, 3, imgsz, imgsz))  # warmup
+        self.run_callbacks("on_val_start")
+        dt = (
+            Profile(device=self.device),
+            Profile(device=self.device),
+            Profile(device=self.device),
+            Profile(device=self.device),
+        )
+        bar = TQDM(self.dataloader, desc=self.get_desc(), total=len(self.dataloader))
+        self.init_metrics(de_parallel(model))
+        self.jdict = []  # empty before each val
+        for batch_i, batch in enumerate(bar):
+            self.run_callbacks("on_val_batch_start")
+            self.batch_i = batch_i
+            # Preprocess
+            with dt[0]:
+                batch = self.preprocess(batch)
+            # Inference
+            with dt[1]:
+                preds = model(batch["img"], augment=augment)
+            # Loss
+            with dt[2]:
+                if self.training:
+                    self.loss += model.loss(batch, preds)[1]
+            # Postprocess
+            with dt[3]:
+                preds = self.postprocess(preds)
+            self.update_metrics(preds, batch)
+            if self.args.plots and batch_i < 3:
+                self.plot_val_samples(batch, batch_i)
+                self.plot_predictions(batch, preds, batch_i)
+            self.run_callbacks("on_val_batch_end")
+        stats = self.get_stats()
+        self.check_stats(stats)
+        self.speed = dict(zip(self.speed.keys(), (x.t / len(self.dataloader.dataset) * 1e3 for x in dt)))
+        self.finalize_metrics()
+        self.print_results()
+        self.run_callbacks("on_val_end")
+        if self.training:
+            model.float()
+            results = {**stats, **trainer.label_loss_items(self.loss.cpu() / len(self.dataloader), prefix="val")}
+            return {k: round(float(v), 5) for k, v in results.items()}  # return results as 5 decimal place floats
+        else:
+            LOGGER.info(
+                "Speed: %.1fms preprocess, %.1fms inference, %.1fms loss, %.1fms postprocess per image"
+                % tuple(self.speed.values())
+            )
+            if self.args.save_json and self.jdict:
+                with open(str(self.save_dir / "predictions.json"), "w") as f:
+                    LOGGER.info(f"Saving {f.name}...")
+                    json.dump(self.jdict, f)  # flatten and save
+                stats = self.eval_json(stats)  # update stats
+            if self.args.plots or self.args.save_json:
+                LOGGER.info(f"Results saved to {colorstr('bold', self.save_dir)}")
+            return stats
+    def match_predictions(self, pred_classes, true_classes, iou, use_scipy=False):
+        """
+        Matches predictions to ground truth objects (pred_classes, true_classes) using IoU.
+        Args:
+            pred_classes (torch.Tensor): Predicted class indices of shape(N,).
+            true_classes (torch.Tensor): Target class indices of shape(M,).
+            iou (torch.Tensor): An NxM tensor containing the pairwise IoU values for predictions and ground of truth
+            use_scipy (bool): Whether to use scipy for matching (more precise).
+        Returns:
+            (torch.Tensor): Correct tensor of shape(N,10) for 10 IoU thresholds.
+        """
+        # Dx10 matrix, where D - detections, 10 - IoU thresholds
+        correct = np.zeros((pred_classes.shape[0], self.iouv.shape[0])).astype(bool)
+        # LxD matrix where L - labels (rows), D - detections (columns)
+        correct_class = true_classes[:, None] == pred_classes
+        iou = iou * correct_class  # zero out the wrong classes
+        iou = iou.cpu().numpy()
+        for i, threshold in enumerate(self.iouv.cpu().tolist()):
+            if use_scipy:
+                # WARNING: known issue that reduces mAP in https://github.com/ultralytics/ultralytics/pull/4708
+                import scipy  # scope import to avoid importing for all commands
+                cost_matrix = iou * (iou >= threshold)
+                if cost_matrix.any():
+                    labels_idx, detections_idx = scipy.optimize.linear_sum_assignment(cost_matrix, maximize=True)
+                    valid = cost_matrix[labels_idx, detections_idx] > 0
+                    if valid.any():
+                        correct[detections_idx[valid], i] = True
+            else:
+                matches = np.nonzero(iou >= threshold)  # IoU > threshold and classes match
+                matches = np.array(matches).T
+                if matches.shape[0]:
+                    if matches.shape[0] > 1:
+                        matches = matches[iou[matches[:, 0], matches[:, 1]].argsort()[::-1]]
+                        matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+                        # matches = matches[matches[:, 2].argsort()[::-1]]
+                        matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+                    correct[matches[:, 1].astype(int), i] = True
+        return torch.tensor(correct, dtype=torch.bool, device=pred_classes.device)
+    def add_callback(self, event: str, callback):
+        """Appends the given callback."""
+        self.callbacks[event].append(callback)
+    def run_callbacks(self, event: str):
+        """Runs all callbacks associated with a specified event."""
+        for callback in self.callbacks.get(event, []):
+            callback(self)
+    def get_dataloader(self, dataset_path, batch_size):
+        """Get data loader from dataset path and batch size."""
+        raise NotImplementedError("get_dataloader function not implemented for this validator")
+    def build_dataset(self, img_path):
+        """Build dataset."""
+        raise NotImplementedError("build_dataset function not implemented in validator")
+    def preprocess(self, batch):
+        """Preprocesses an input batch."""
+        return batch
+    def postprocess(self, preds):
+        """Describes and summarizes the purpose of 'postprocess()' but no details mentioned."""
+        return preds
+    def init_metrics(self, model):
+        """Initialize performance metrics for the YOLO model."""
+        pass
+    def update_metrics(self, preds, batch):
+        """Updates metrics based on predictions and batch."""
+        pass
+    def finalize_metrics(self, *args, **kwargs):
+        """Finalizes and returns all metrics."""
+        pass
+    def get_stats(self):
+        """Returns statistics about the model's performance."""
+        return {}
+    def check_stats(self, stats):
+        """Checks statistics."""
+        pass
+    def print_results(self):
+        """Prints the results of the model's predictions."""
+        pass
+    def get_desc(self):
+        """Get description of the YOLO model."""
+        pass
+    @property
+    def metric_keys(self):
+        """Returns the metric keys used in YOLO training/validation."""
+        return []
+    def on_plot(self, name, data=None):
+        """Registers plots (e.g. to be consumed in callbacks)"""
+        self.plots[Path(name)] = {"data": data, "timestamp": time.time()}
+    # TODO: may need to put these following functions into callback
+    def plot_val_samples(self, batch, ni):
+        """Plots validation samples during training."""
+        pass
+    def plot_predictions(self, batch, preds, ni):
+        """Plots YOLO model predictions on batch images."""
+        pass
+    def pred_to_json(self, preds, batch):
+        """Convert predictions to JSON format."""
+        pass
+    def eval_json(self, stats):
+        """Evaluate and return JSON format of prediction statistics."""
+        pass

yolov8_model/ultralytics/hub/__init__.py ADDED Viewed

	@@ -0,0 +1,128 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+import requests
+from yolov8_model.ultralytics.data.utils import HUBDatasetStats
+from yolov8_model.ultralytics.hub.auth import Auth
+from yolov8_model.ultralytics.hub.utils import HUB_API_ROOT, HUB_WEB_ROOT, PREFIX
+from yolov8_model.ultralytics.utils import LOGGER, SETTINGS, checks
+def login(api_key: str = None, save=True) -> bool:
+    """
+    Log in to the Ultralytics HUB API using the provided API key.
+    The session is not stored; a new session is created when needed using the saved SETTINGS or the HUB_API_KEY
+    environment variable if successfully authenticated.
+    Args:
+        api_key (str, optional): API key to use for authentication.
+            If not provided, it will be retrieved from SETTINGS or HUB_API_KEY environment variable.
+        save (bool, optional): Whether to save the API key to SETTINGS if authentication is successful.
+    Returns:
+        (bool): True if authentication is successful, False otherwise.
+    """
+    checks.check_requirements("hub-sdk>=0.0.2")
+    from hub_sdk import HUBClient
+    api_key_url = f"{HUB_WEB_ROOT}/settings?tab=api+keys"  # set the redirect URL
+    saved_key = SETTINGS.get("api_key")
+    active_key = api_key or saved_key
+    credentials = {"api_key": active_key} if active_key and active_key != "" else None  # set credentials
+    client = HUBClient(credentials)  # initialize HUBClient
+    if client.authenticated:
+        # Successfully authenticated with HUB
+        if save and client.api_key != saved_key:
+            SETTINGS.update({"api_key": client.api_key})  # update settings with valid API key
+        # Set message based on whether key was provided or retrieved from settings
+        log_message = (
+            "New authentication successful ✅" if client.api_key == api_key or not credentials else "Authenticated ✅"
+        )
+        LOGGER.info(f"{PREFIX}{log_message}")
+        return True
+    else:
+        # Failed to authenticate with HUB
+        LOGGER.info(f"{PREFIX}Retrieve API key from {api_key_url}")
+        return False
+def logout():
+    """
+    Log out of Ultralytics HUB by removing the API key from the settings file. To log in again, use 'yolo hub login'.
+    Example:
+        ```python
+        from ultralytics import hub
+        hub.logout()
+        ```
+    """
+    SETTINGS["api_key"] = ""
+    SETTINGS.save()
+    LOGGER.info(f"{PREFIX}logged out ✅. To log in again, use 'yolo hub login'.")
+def reset_model(model_id=""):
+    """Reset a trained model to an untrained state."""
+    r = requests.post(f"{HUB_API_ROOT}/model-reset", json={"modelId": model_id}, headers={"x-api-key": Auth().api_key})
+    if r.status_code == 200:
+        LOGGER.info(f"{PREFIX}Model reset successfully")
+        return
+    LOGGER.warning(f"{PREFIX}Model reset failure {r.status_code} {r.reason}")
+def export_fmts_hub():
+    """Returns a list of HUB-supported export formats."""
+    from ultralytics.engine.exporter import export_formats
+    return list(export_formats()["Argument"][1:]) + ["ultralytics_tflite", "ultralytics_coreml"]
+def export_model(model_id="", format="torchscript"):
+    """Export a model to all formats."""
+    assert format in export_fmts_hub(), f"Unsupported export format '{format}', valid formats are {export_fmts_hub()}"
+    r = requests.post(
+        f"{HUB_API_ROOT}/v1/models/{model_id}/export", json={"format": format}, headers={"x-api-key": Auth().api_key}
+    )
+    assert r.status_code == 200, f"{PREFIX}{format} export failure {r.status_code} {r.reason}"
+    LOGGER.info(f"{PREFIX}{format} export started ✅")
+def get_export(model_id="", format="torchscript"):
+    """Get an exported model dictionary with download URL."""
+    assert format in export_fmts_hub(), f"Unsupported export format '{format}', valid formats are {export_fmts_hub()}"
+    r = requests.post(
+        f"{HUB_API_ROOT}/get-export",
+        json={"apiKey": Auth().api_key, "modelId": model_id, "format": format},
+        headers={"x-api-key": Auth().api_key},
+    )
+    assert r.status_code == 200, f"{PREFIX}{format} get_export failure {r.status_code} {r.reason}"
+    return r.json()
+def check_dataset(path="", task="detect"):
+    """
+    Function for error-checking HUB dataset Zip file before upload. It checks a dataset for errors before it is uploaded
+    to the HUB. Usage examples are given below.
+    Args:
+        path (str, optional): Path to data.zip (with data.yaml inside data.zip). Defaults to ''.
+        task (str, optional): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Defaults to 'detect'.
+    Example:
+        ```python
+        from ultralytics.hub import check_dataset
+        check_dataset('path/to/coco8.zip', task='detect')  # detect dataset
+        check_dataset('path/to/coco8-seg.zip', task='segment')  # segment dataset
+        check_dataset('path/to/coco8-pose.zip', task='pose')  # pose dataset
+        ```
+    """
+    HUBDatasetStats(path=path, task=task).get_json()
+    LOGGER.info(f"Checks completed correctly ✅. Upload this dataset to {HUB_WEB_ROOT}/datasets/.")

yolov8_model/ultralytics/hub/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (5.02 kB). View file

yolov8_model/ultralytics/hub/__pycache__/auth.cpython-310.pyc ADDED Viewed

Binary file (4.32 kB). View file

yolov8_model/ultralytics/hub/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (8.53 kB). View file

yolov8_model/ultralytics/hub/auth.py ADDED Viewed

	@@ -0,0 +1,136 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+import requests
+from yolov8_model.ultralytics.hub.utils import HUB_API_ROOT, HUB_WEB_ROOT, PREFIX, request_with_credentials
+from yolov8_model.ultralytics.utils import LOGGER, SETTINGS, emojis, is_colab
+API_KEY_URL = f"{HUB_WEB_ROOT}/settings?tab=api+keys"
+class Auth:
+    """
+    Manages authentication processes including API key handling, cookie-based authentication, and header generation.
+    The class supports different methods of authentication:
+    1. Directly using an API key.
+    2. Authenticating using browser cookies (specifically in Google Colab).
+    3. Prompting the user to enter an API key.
+    Attributes:
+        id_token (str or bool): Token used for identity verification, initialized as False.
+        api_key (str or bool): API key for authentication, initialized as False.
+        model_key (bool): Placeholder for model key, initialized as False.
+    """
+    id_token = api_key = model_key = False
+    def __init__(self, api_key="", verbose=False):
+        """
+        Initialize the Auth class with an optional API key.
+        Args:
+            api_key (str, optional): May be an API key or a combination API key and model ID, i.e. key_id
+        """
+        # Split the input API key in case it contains a combined key_model and keep only the API key part
+        api_key = api_key.split("_")[0]
+        # Set API key attribute as value passed or SETTINGS API key if none passed
+        self.api_key = api_key or SETTINGS.get("api_key", "")
+        # If an API key is provided
+        if self.api_key:
+            # If the provided API key matches the API key in the SETTINGS
+            if self.api_key == SETTINGS.get("api_key"):
+                # Log that the user is already logged in
+                if verbose:
+                    LOGGER.info(f"{PREFIX}Authenticated ✅")
+                return
+            else:
+                # Attempt to authenticate with the provided API key
+                success = self.authenticate()
+        # If the API key is not provided and the environment is a Google Colab notebook
+        elif is_colab():
+            # Attempt to authenticate using browser cookies
+            success = self.auth_with_cookies()
+        else:
+            # Request an API key
+            success = self.request_api_key()
+        # Update SETTINGS with the new API key after successful authentication
+        if success:
+            SETTINGS.update({"api_key": self.api_key})
+            # Log that the new login was successful
+            if verbose:
+                LOGGER.info(f"{PREFIX}New authentication successful ✅")
+        elif verbose:
+            LOGGER.info(f"{PREFIX}Retrieve API key from {API_KEY_URL}")
+    def request_api_key(self, max_attempts=3):
+        """
+        Prompt the user to input their API key.
+        Returns the model ID.
+        """
+        import getpass
+        for attempts in range(max_attempts):
+            LOGGER.info(f"{PREFIX}Login. Attempt {attempts + 1} of {max_attempts}")
+            input_key = getpass.getpass(f"Enter API key from {API_KEY_URL} ")
+            self.api_key = input_key.split("_")[0]  # remove model id if present
+            if self.authenticate():
+                return True
+        raise ConnectionError(emojis(f"{PREFIX}Failed to authenticate ❌"))
+    def authenticate(self) -> bool:
+        """
+        Attempt to authenticate with the server using either id_token or API key.
+        Returns:
+            (bool): True if authentication is successful, False otherwise.
+        """
+        try:
+            if header := self.get_auth_header():
+                r = requests.post(f"{HUB_API_ROOT}/v1/auth", headers=header)
+                if not r.json().get("success", False):
+                    raise ConnectionError("Unable to authenticate.")
+                return True
+            raise ConnectionError("User has not authenticated locally.")
+        except ConnectionError:
+            self.id_token = self.api_key = False  # reset invalid
+            LOGGER.warning(f"{PREFIX}Invalid API key ⚠️")
+            return False
+    def auth_with_cookies(self) -> bool:
+        """
+        Attempt to fetch authentication via cookies and set id_token. User must be logged in to HUB and running in a
+        supported browser.
+        Returns:
+            (bool): True if authentication is successful, False otherwise.
+        """
+        if not is_colab():
+            return False  # Currently only works with Colab
+        try:
+            authn = request_with_credentials(f"{HUB_API_ROOT}/v1/auth/auto")
+            if authn.get("success", False):
+                self.id_token = authn.get("data", {}).get("idToken", None)
+                self.authenticate()
+                return True
+            raise ConnectionError("Unable to fetch browser authentication details.")
+        except ConnectionError:
+            self.id_token = False  # reset invalid
+            return False
+    def get_auth_header(self):
+        """
+        Get the authentication header for making API requests.
+        Returns:
+            (dict): The authentication header if id_token or API key is set, None otherwise.
+        """
+        if self.id_token:
+            return {"authorization": f"Bearer {self.id_token}"}
+        elif self.api_key:
+            return {"x-api-key": self.api_key}
+        # else returns None

yolov8_model/ultralytics/hub/session.py ADDED Viewed

	@@ -0,0 +1,348 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+import threading
+import time
+from http import HTTPStatus
+from pathlib import Path
+import requests
+from yolov8_model.ultralytics.hub.utils import HUB_WEB_ROOT, HELP_MSG, PREFIX, TQDM
+from yolov8_model.ultralytics.utils import LOGGER, SETTINGS, __version__, checks, emojis, is_colab
+from yolov8_model.ultralytics.utils.errors import HUBModelError
+AGENT_NAME = f"python-{__version__}-colab" if is_colab() else f"python-{__version__}-local"
+class HUBTrainingSession:
+    """
+    HUB training session for Ultralytics HUB YOLO models. Handles model initialization, heartbeats, and checkpointing.
+    Attributes:
+        agent_id (str): Identifier for the instance communicating with the server.
+        model_id (str): Identifier for the YOLO model being trained.
+        model_url (str): URL for the model in Ultralytics HUB.
+        api_url (str): API URL for the model in Ultralytics HUB.
+        auth_header (dict): Authentication header for the Ultralytics HUB API requests.
+        rate_limits (dict): Rate limits for different API calls (in seconds).
+        timers (dict): Timers for rate limiting.
+        metrics_queue (dict): Queue for the model's metrics.
+        model (dict): Model data fetched from Ultralytics HUB.
+        alive (bool): Indicates if the heartbeat loop is active.
+    """
+    def __init__(self, identifier):
+        """
+        Initialize the HUBTrainingSession with the provided model identifier.
+        Args:
+            identifier (str): Model identifier used to initialize the HUB training session.
+                It can be a URL string or a model key with specific format.
+        Raises:
+            ValueError: If the provided model identifier is invalid.
+            ConnectionError: If connecting with global API key is not supported.
+            ModuleNotFoundError: If hub-sdk package is not installed.
+        """
+        from hub_sdk import HUBClient
+        self.rate_limits = {
+            "metrics": 3.0,
+            "ckpt": 900.0,
+            "heartbeat": 300.0,
+        }  # rate limits (seconds)
+        self.metrics_queue = {}  # holds metrics for each epoch until upload
+        self.timers = {}  # holds timers in ultralytics/utils/callbacks/hub.py
+        # Parse input
+        api_key, model_id, self.filename = self._parse_identifier(identifier)
+        # Get credentials
+        active_key = api_key or SETTINGS.get("api_key")
+        credentials = {"api_key": active_key} if active_key else None  # set credentials
+        # Initialize client
+        self.client = HUBClient(credentials)
+        if model_id:
+            self.load_model(model_id)  # load existing model
+        else:
+            self.model = self.client.model()  # load empty model
+    def load_model(self, model_id):
+        """Loads an existing model from Ultralytics HUB using the provided model identifier."""
+        self.model = self.client.model(model_id)
+        if not self.model.data:  # then model does not exist
+            raise ValueError(emojis("❌ The specified HUB model does not exist"))  # TODO: improve error handling
+        self.model_url = f"{HUB_WEB_ROOT}/models/{self.model.id}"
+        self._set_train_args()
+        # Start heartbeats for HUB to monitor agent
+        self.model.start_heartbeat(self.rate_limits["heartbeat"])
+        LOGGER.info(f"{PREFIX}View model at {self.model_url} 🚀")
+    def create_model(self, model_args):
+        """Initializes a HUB training session with the specified model identifier."""
+        payload = {
+            "config": {
+                "batchSize": model_args.get("batch", -1),
+                "epochs": model_args.get("epochs", 300),
+                "imageSize": model_args.get("imgsz", 640),
+                "patience": model_args.get("patience", 100),
+                "device": model_args.get("device", ""),
+                "cache": model_args.get("cache", "ram"),
+            },
+            "dataset": {"name": model_args.get("data")},
+            "lineage": {
+                "architecture": {
+                    "name": self.filename.replace(".pt", "").replace(".yaml", ""),
+                },
+                "parent": {},
+            },
+            "meta": {"name": self.filename},
+        }
+        if self.filename.endswith(".pt"):
+            payload["lineage"]["parent"]["name"] = self.filename
+        self.model.create_model(payload)
+        # Model could not be created
+        # TODO: improve error handling
+        if not self.model.id:
+            return
+        self.model_url = f"{HUB_WEB_ROOT}/models/{self.model.id}"
+        # Start heartbeats for HUB to monitor agent
+        self.model.start_heartbeat(self.rate_limits["heartbeat"])
+        LOGGER.info(f"{PREFIX}View model at {self.model_url} 🚀")
+    def _parse_identifier(self, identifier):
+        """
+        Parses the given identifier to determine the type of identifier and extract relevant components.
+        The method supports different identifier formats:
+            - A HUB URL, which starts with HUB_WEB_ROOT followed by '/models/'
+            - An identifier containing an API key and a model ID separated by an underscore
+            - An identifier that is solely a model ID of a fixed length
+            - A local filename that ends with '.pt' or '.yaml'
+        Args:
+            identifier (str): The identifier string to be parsed.
+        Returns:
+            (tuple): A tuple containing the API key, model ID, and filename as applicable.
+        Raises:
+            HUBModelError: If the identifier format is not recognized.
+        """
+        # Initialize variables
+        api_key, model_id, filename = None, None, None
+        # Check if identifier is a HUB URL
+        if identifier.startswith(f"{HUB_WEB_ROOT}/models/"):
+            # Extract the model_id after the HUB_WEB_ROOT URL
+            model_id = identifier.split(f"{HUB_WEB_ROOT}/models/")[-1]
+        else:
+            # Split the identifier based on underscores only if it's not a HUB URL
+            parts = identifier.split("_")
+            # Check if identifier is in the format of API key and model ID
+            if len(parts) == 2 and len(parts[0]) == 42 and len(parts[1]) == 20:
+                api_key, model_id = parts
+            # Check if identifier is a single model ID
+            elif len(parts) == 1 and len(parts[0]) == 20:
+                model_id = parts[0]
+            # Check if identifier is a local filename
+            elif identifier.endswith(".pt") or identifier.endswith(".yaml"):
+                filename = identifier
+            else:
+                raise HUBModelError(
+                    f"model='{identifier}' could not be parsed. Check format is correct. "
+                    f"Supported formats are Ultralytics HUB URL, apiKey_modelId, modelId, local pt or yaml file."
+                )
+        return api_key, model_id, filename
+    def _set_train_args(self, **kwargs):
+        """Initializes training arguments and creates a model entry on the Ultralytics HUB."""
+        if self.model.is_trained():
+            # Model is already trained
+            raise ValueError(emojis(f"Model is already trained and uploaded to {self.model_url} 🚀"))
+        if self.model.is_resumable():
+            # Model has saved weights
+            self.train_args = {"data": self.model.get_dataset_url(), "resume": True}
+            self.model_file = self.model.get_weights_url("last")
+        else:
+            # Model has no saved weights
+            def get_train_args(config):
+                """Parses an identifier to extract API key, model ID, and filename if applicable."""
+                return {
+                    "batch": config["batchSize"],
+                    "epochs": config["epochs"],
+                    "imgsz": config["imageSize"],
+                    "patience": config["patience"],
+                    "device": config["device"],
+                    "cache": config["cache"],
+                    "data": self.model.get_dataset_url(),
+                }
+            self.train_args = get_train_args(self.model.data.get("config"))
+            # Set the model file as either a *.pt or *.yaml file
+            self.model_file = (
+                self.model.get_weights_url("parent") if self.model.is_pretrained() else self.model.get_architecture()
+            )
+        if not self.train_args.get("data"):
+            raise ValueError("Dataset may still be processing. Please wait a minute and try again.")  # RF fix
+        self.model_file = checks.check_yolov5u_filename(self.model_file, verbose=False)  # YOLOv5->YOLOv5u
+        self.model_id = self.model.id
+    def request_queue(
+        self,
+        request_func,
+        retry=3,
+        timeout=30,
+        thread=True,
+        verbose=True,
+        progress_total=None,
+        *args,
+        **kwargs,
+    ):
+        def retry_request():
+            """Attempts to call `request_func` with retries, timeout, and optional threading."""
+            t0 = time.time()  # Record the start time for the timeout
+            for i in range(retry + 1):
+                if (time.time() - t0) > timeout:
+                    LOGGER.warning(f"{PREFIX}Timeout for request reached. {HELP_MSG}")
+                    break  # Timeout reached, exit loop
+                response = request_func(*args, **kwargs)
+                if response is None:
+                    LOGGER.warning(f"{PREFIX}Received no response from the request. {HELP_MSG}")
+                    time.sleep(2**i)  # Exponential backoff before retrying
+                    continue  # Skip further processing and retry
+                if progress_total:
+                    self._show_upload_progress(progress_total, response)
+                if HTTPStatus.OK <= response.status_code < HTTPStatus.MULTIPLE_CHOICES:
+                    return response  # Success, no need to retry
+                if i == 0:
+                    # Initial attempt, check status code and provide messages
+                    message = self._get_failure_message(response, retry, timeout)
+                    if verbose:
+                        LOGGER.warning(f"{PREFIX}{message} {HELP_MSG} ({response.status_code})")
+                if not self._should_retry(response.status_code):
+                    LOGGER.warning(f"{PREFIX}Request failed. {HELP_MSG} ({response.status_code}")
+                    break  # Not an error that should be retried, exit loop
+                time.sleep(2**i)  # Exponential backoff for retries
+            return response
+        if thread:
+            # Start a new thread to run the retry_request function
+            threading.Thread(target=retry_request, daemon=True).start()
+        else:
+            # If running in the main thread, call retry_request directly
+            return retry_request()
+    def _should_retry(self, status_code):
+        """Determines if a request should be retried based on the HTTP status code."""
+        retry_codes = {
+            HTTPStatus.REQUEST_TIMEOUT,
+            HTTPStatus.BAD_GATEWAY,
+            HTTPStatus.GATEWAY_TIMEOUT,
+        }
+        return status_code in retry_codes
+    def _get_failure_message(self, response: requests.Response, retry: int, timeout: int):
+        """
+        Generate a retry message based on the response status code.
+        Args:
+            response: The HTTP response object.
+            retry: The number of retry attempts allowed.
+            timeout: The maximum timeout duration.
+        Returns:
+            (str): The retry message.
+        """
+        if self._should_retry(response.status_code):
+            return f"Retrying {retry}x for {timeout}s." if retry else ""
+        elif response.status_code == HTTPStatus.TOO_MANY_REQUESTS:  # rate limit
+            headers = response.headers
+            return (
+                f"Rate limit reached ({headers['X-RateLimit-Remaining']}/{headers['X-RateLimit-Limit']}). "
+                f"Please retry after {headers['Retry-After']}s."
+            )
+        else:
+            try:
+                return response.json().get("message", "No JSON message.")
+            except AttributeError:
+                return "Unable to read JSON."
+    def upload_metrics(self):
+        """Upload model metrics to Ultralytics HUB."""
+        return self.request_queue(self.model.upload_metrics, metrics=self.metrics_queue.copy(), thread=True)
+    def upload_model(
+        self,
+        epoch: int,
+        weights: str,
+        is_best: bool = False,
+        map: float = 0.0,
+        final: bool = False,
+    ) -> None:
+        """
+        Upload a model checkpoint to Ultralytics HUB.
+        Args:
+            epoch (int): The current training epoch.
+            weights (str): Path to the model weights file.
+            is_best (bool): Indicates if the current model is the best one so far.
+            map (float): Mean average precision of the model.
+            final (bool): Indicates if the model is the final model after training.
+        """
+        if Path(weights).is_file():
+            progress_total = Path(weights).stat().st_size if final else None  # Only show progress if final
+            self.request_queue(
+                self.model.upload_model,
+                epoch=epoch,
+                weights=weights,
+                is_best=is_best,
+                map=map,
+                final=final,
+                retry=10,
+                timeout=3600,
+                thread=not final,
+                progress_total=progress_total,
+            )
+        else:
+            LOGGER.warning(f"{PREFIX}WARNING ⚠️ Model upload issue. Missing model {weights}.")
+    def _show_upload_progress(self, content_length: int, response: requests.Response) -> None:
+        """
+        Display a progress bar to track the upload progress of a file download.
+        Args:
+            content_length (int): The total size of the content to be downloaded in bytes.
+            response (requests.Response): The response object from the file download request.
+        Returns:
+            None
+        """
+        with TQDM(total=content_length, unit="B", unit_scale=True, unit_divisor=1024) as pbar:
+            for data in response.iter_content(chunk_size=1024):
+                pbar.update(len(data))

yolov8_model/ultralytics/hub/utils.py ADDED Viewed

	@@ -0,0 +1,247 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+import os
+import platform
+import random
+import sys
+import threading
+import time
+from pathlib import Path
+import requests
+from yolov8_model.ultralytics.utils import (
+    ENVIRONMENT,
+    LOGGER,
+    ONLINE,
+    RANK,
+    SETTINGS,
+    TESTS_RUNNING,
+    TQDM,
+    TryExcept,
+    __version__,
+    colorstr,
+    get_git_origin_url,
+    is_colab,
+    is_git_dir,
+    is_pip_package,
+)
+from yolov8_model.ultralytics.utils.downloads import GITHUB_ASSETS_NAMES
+HUB_API_ROOT = os.environ.get("ULTRALYTICS_HUB_API", "https://api.ultralytics.com")
+HUB_WEB_ROOT = os.environ.get("ULTRALYTICS_HUB_WEB", "https://hub.ultralytics.com")
+PREFIX = colorstr("Ultralytics HUB: ")
+HELP_MSG = "If this issue persists please visit https://github.com/ultralytics/hub/issues for assistance."
+def request_with_credentials(url: str) -> any:
+    """
+    Make an AJAX request with cookies attached in a Google Colab environment.
+    Args:
+        url (str): The URL to make the request to.
+    Returns:
+        (any): The response data from the AJAX request.
+    Raises:
+        OSError: If the function is not run in a Google Colab environment.
+    """
+    if not is_colab():
+        raise OSError("request_with_credentials() must run in a Colab environment")
+    from google.colab import output  # noqa
+    from IPython import display  # noqa
+    display.display(
+        display.Javascript(
+            """
+            window._hub_tmp = new Promise((resolve, reject) => {
+                const timeout = setTimeout(() => reject("Failed authenticating existing browser session"), 5000)
+                fetch("%s", {
+                    method: 'POST',
+                    credentials: 'include'
+                })
+                    .then((response) => resolve(response.json()))
+                    .then((json) => {
+                    clearTimeout(timeout);
+                    }).catch((err) => {
+                    clearTimeout(timeout);
+                    reject(err);
+                });
+            });
+            """
+            % url
+        )
+    )
+    return output.eval_js("_hub_tmp")
+def requests_with_progress(method, url, **kwargs):
+    """
+    Make an HTTP request using the specified method and URL, with an optional progress bar.
+    Args:
+        method (str): The HTTP method to use (e.g. 'GET', 'POST').
+        url (str): The URL to send the request to.
+        **kwargs (dict): Additional keyword arguments to pass to the underlying `requests.request` function.
+    Returns:
+        (requests.Response): The response object from the HTTP request.
+    Note:
+        - If 'progress' is set to True, the progress bar will display the download progress for responses with a known
+        content length.
+        - If 'progress' is a number then progress bar will display assuming content length = progress.
+    """
+    progress = kwargs.pop("progress", False)
+    if not progress:
+        return requests.request(method, url, **kwargs)
+    response = requests.request(method, url, stream=True, **kwargs)
+    total = int(response.headers.get("content-length", 0) if isinstance(progress, bool) else progress)  # total size
+    try:
+        pbar = TQDM(total=total, unit="B", unit_scale=True, unit_divisor=1024)
+        for data in response.iter_content(chunk_size=1024):
+            pbar.update(len(data))
+        pbar.close()
+    except requests.exceptions.ChunkedEncodingError:  # avoid 'Connection broken: IncompleteRead' warnings
+        response.close()
+    return response
+def smart_request(method, url, retry=3, timeout=30, thread=True, code=-1, verbose=True, progress=False, **kwargs):
+    """
+    Makes an HTTP request using the 'requests' library, with exponential backoff retries up to a specified timeout.
+    Args:
+        method (str): The HTTP method to use for the request. Choices are 'post' and 'get'.
+        url (str): The URL to make the request to.
+        retry (int, optional): Number of retries to attempt before giving up. Default is 3.
+        timeout (int, optional): Timeout in seconds after which the function will give up retrying. Default is 30.
+        thread (bool, optional): Whether to execute the request in a separate daemon thread. Default is True.
+        code (int, optional): An identifier for the request, used for logging purposes. Default is -1.
+        verbose (bool, optional): A flag to determine whether to print out to console or not. Default is True.
+        progress (bool, optional): Whether to show a progress bar during the request. Default is False.
+        **kwargs (dict): Keyword arguments to be passed to the requests function specified in method.
+    Returns:
+        (requests.Response): The HTTP response object. If the request is executed in a separate thread, returns None.
+    """
+    retry_codes = (408, 500)  # retry only these codes
+    @TryExcept(verbose=verbose)
+    def func(func_method, func_url, **func_kwargs):
+        """Make HTTP requests with retries and timeouts, with optional progress tracking."""
+        r = None  # response
+        t0 = time.time()  # initial time for timer
+        for i in range(retry + 1):
+            if (time.time() - t0) > timeout:
+                break
+            r = requests_with_progress(func_method, func_url, **func_kwargs)  # i.e. get(url, data, json, files)
+            if r.status_code < 300:  # return codes in the 2xx range are generally considered "good" or "successful"
+                break
+            try:
+                m = r.json().get("message", "No JSON message.")
+            except AttributeError:
+                m = "Unable to read JSON."
+            if i == 0:
+                if r.status_code in retry_codes:
+                    m += f" Retrying {retry}x for {timeout}s." if retry else ""
+                elif r.status_code == 429:  # rate limit
+                    h = r.headers  # response headers
+                    m = (
+                        f"Rate limit reached ({h['X-RateLimit-Remaining']}/{h['X-RateLimit-Limit']}). "
+                        f"Please retry after {h['Retry-After']}s."
+                    )
+                if verbose:
+                    LOGGER.warning(f"{PREFIX}{m} {HELP_MSG} ({r.status_code} #{code})")
+                if r.status_code not in retry_codes:
+                    return r
+            time.sleep(2**i)  # exponential standoff
+        return r
+    args = method, url
+    kwargs["progress"] = progress
+    if thread:
+        threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True).start()
+    else:
+        return func(*args, **kwargs)
+class Events:
+    """
+    A class for collecting anonymous event analytics. Event analytics are enabled when sync=True in settings and
+    disabled when sync=False. Run 'yolo settings' to see and update settings YAML file.
+    Attributes:
+        url (str): The URL to send anonymous events.
+        rate_limit (float): The rate limit in seconds for sending events.
+        metadata (dict): A dictionary containing metadata about the environment.
+        enabled (bool): A flag to enable or disable Events based on certain conditions.
+    """
+    url = "https://www.google-analytics.com/mp/collect?measurement_id=G-X8NCJYTQXM&api_secret=QLQrATrNSwGRFRLE-cbHJw"
+    def __init__(self):
+        """Initializes the Events object with default values for events, rate_limit, and metadata."""
+        self.events = []  # events list
+        self.rate_limit = 60.0  # rate limit (seconds)
+        self.t = 0.0  # rate limit timer (seconds)
+        self.metadata = {
+            "cli": Path(sys.argv[0]).name == "yolo",
+            "install": "git" if is_git_dir() else "pip" if is_pip_package() else "other",
+            "python": ".".join(platform.python_version_tuple()[:2]),  # i.e. 3.10
+            "version": __version__,
+            "env": ENVIRONMENT,
+            "session_id": round(random.random() * 1e15),
+            "engagement_time_msec": 1000,
+        }
+        self.enabled = (
+            SETTINGS["sync"]
+            and RANK in (-1, 0)
+            and not TESTS_RUNNING
+            and ONLINE
+            and (is_pip_package() or get_git_origin_url() == "https://github.com/ultralytics/ultralytics.git")
+        )
+    def __call__(self, cfg):
+        """
+        Attempts to add a new event to the events list and send events if the rate limit is reached.
+        Args:
+            cfg (IterableSimpleNamespace): The configuration object containing mode and task information.
+        """
+        if not self.enabled:
+            # Events disabled, do nothing
+            return
+        # Attempt to add to events
+        if len(self.events) < 25:  # Events list limited to 25 events (drop any events past this)
+            params = {
+                **self.metadata,
+                "task": cfg.task,
+                "model": cfg.model if cfg.model in GITHUB_ASSETS_NAMES else "custom",
+            }
+            if cfg.mode == "export":
+                params["format"] = cfg.format
+            self.events.append({"name": cfg.mode, "params": params})
+        # Check rate limit
+        t = time.time()
+        if (t - self.t) < self.rate_limit:
+            # Time is under rate limiter, wait to send
+            return
+        # Time is over rate limiter, send now
+        data = {"client_id": SETTINGS["uuid"], "events": self.events}  # SHA-256 anonymized UUID hash and events list
+        # POST equivalent to requests.post(self.url, json=data)
+        smart_request("post", self.url, json=data, retry=0, verbose=False)
+        # Reset events and rate limit timer
+        self.events = []
+        self.t = t
+# Run below code on hub/utils init -------------------------------------------------------------------------------------
+events = Events()

yolov8_model/ultralytics/models/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+from .rtdetr import RTDETR
+from .sam import SAM
+from .yolo import YOLO
+__all__ = "YOLO", "RTDETR", "SAM"  # allow simpler import

yolov8_model/ultralytics/models/fastsam/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+from .model import FastSAM
+from .predict import FastSAMPredictor
+from .prompt import FastSAMPrompt
+from .val import FastSAMValidator
+__all__ = "FastSAMPredictor", "FastSAM", "FastSAMPrompt", "FastSAMValidator"

yolov8_model/ultralytics/models/fastsam/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (390 Bytes). View file