diff --git a/Prism/Dream/Dream_Prism/eval_instruct/lm_eval/api/__init__.py b/Prism/Dream/Dream_Prism/eval_instruct/lm_eval/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Prism/Dream/Dream_Prism/eval_instruct/lm_eval/api/instance.py b/Prism/Dream/Dream_Prism/eval_instruct/lm_eval/api/instance.py new file mode 100644 index 0000000000000000000000000000000000000000..d3c6afa0644e729ba441728c72a2469fdad07b8f --- /dev/null +++ b/Prism/Dream/Dream_Prism/eval_instruct/lm_eval/api/instance.py @@ -0,0 +1,38 @@ +from dataclasses import dataclass, field +from typing import Literal, Optional, Tuple + + +OutputType = Literal[ + "loglikelihood", "loglikelihood_rolling", "generate_until", "multiple_choice" +] + + +@dataclass +class Instance: + request_type: OutputType + doc: dict + arguments: tuple + idx: int + metadata: Tuple[Optional[str], Optional[int], Optional[int]] = field( + default_factory=lambda: (None, None, None) + ) + resps: list = field(default_factory=list) + filtered_resps: dict = field(default_factory=dict) + + # initialized after init + task_name: Optional[str] = None + doc_id: Optional[int] = None + repeats: Optional[int] = None + + def __post_init__(self) -> None: + # unpack metadata field + self.task_name, self.doc_id, self.repeats = self.metadata + + @property + def args(self): + """ + Returns (string,) where `string` is the string to calculate loglikelihood over + """ + return ( + self.arguments if isinstance(self.arguments, tuple) else (self.arguments,) + ) diff --git a/Prism/Dream/Dream_Prism/eval_instruct/lm_eval/api/metrics.py b/Prism/Dream/Dream_Prism/eval_instruct/lm_eval/api/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..61fca5e19d376502f3b75aa5328045cee6ee5454 --- /dev/null +++ b/Prism/Dream/Dream_Prism/eval_instruct/lm_eval/api/metrics.py @@ -0,0 +1,578 @@ +import logging +import math +import random +import re +import string +from collections.abc import Iterable +from typing import List + +import numpy as np +import sacrebleu + +from lm_eval.api.registry import register_aggregation, register_metric + + +eval_logger = logging.getLogger(__name__) + + +# Register Aggregations First +@register_aggregation("bypass") +def bypass_agg(arr): + return 999 + + +@register_aggregation("nanmean") +def nanmean(arr): + if len(arr) == 0 or all(np.isnan(arr)): + return np.nan + return np.nanmean(arr) + + +@register_aggregation("mean") +def mean(arr): + return sum(arr) / len(arr) + + +@register_aggregation("median") +def median(arr): + return arr[len(arr) // 2] + + +# Certain metrics must be calculated across all documents in a benchmark. +# We use them as aggregation metrics, paired with no-op passthrough metric fns. +@register_aggregation("perplexity") +def perplexity(items): + return math.exp(-mean(items)) + + +@register_aggregation("weighted_perplexity") +def weighted_perplexity(items): + return math.exp(-weighted_mean(items)) + + +@register_aggregation("bits_per_byte") +def bits_per_byte(items): + return -weighted_mean(items) / math.log(2) + + +@register_aggregation("f1") +def f1_score(items): + from sklearn.metrics import f1_score + + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds) + + return np.max(fscore) + + +@register_aggregation("matthews_corrcoef") +def matthews_corrcoef(items): + from sklearn.metrics import matthews_corrcoef + + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + return matthews_corrcoef(golds, preds) + + +@register_aggregation("bleu") +def bleu(items): + """The Bilingual Evaluation Understudy Score, or BLEU for short, is a metric + for evaluating a generated sentence to a reference sentence. It counts matching + n-grams in the candidate translation to n-grams in the reference text, where + 1-gram or unigram would be each token and a bigram comparison would be each + word pair. The comparison is made regardless of word order + Source: https://machinelearningmastery.com/calculate-bleu-score-for-text-python/ + Paper: https://www.aclweb.org/anthology/P02-1040/ + + Higher is better + """ + refs = list(zip(*items))[0] + preds = list(zip(*items))[1] + refs, preds = _sacreformat(refs, preds) + return sacrebleu.corpus_bleu(preds, refs).score + + +@register_aggregation("chrf") +def chrf(items): + """chrF++ is a tool for automatic evaluation of machine translation output + based on character n-gram precision and recall enhanced with word n-grams. + Source: https://github.com/m-popovic/chrF + Paper: https://www.aclweb.org/anthology/W15-3049.pdf + + Higher is better # TODO I think + """ + refs = list(zip(*items))[0] + preds = list(zip(*items))[1] + refs, preds = _sacreformat(refs, preds) + return sacrebleu.corpus_chrf(preds, refs).score + + +@register_aggregation("ter") +def ter(items): + """Translation Error Rate is an error metric for machine translation that + measures the number of edits required to change a system output into one + of the references + Source: http://www.cs.umd.edu/~snover/tercom/ + Paper: http://mt-archive.info/AMTA-2006-Snover.pdf + + Lower is better + """ + refs = list(zip(*items))[0] + preds = list(zip(*items))[1] + refs, preds = _sacreformat(refs, preds) + return sacrebleu.corpus_ter(preds, refs).score + + +@register_aggregation("brier_score") +def brier_score(items): # This is a passthrough function + gold, predictions = list(zip(*items)) + bs, num_class = np.array(predictions).shape + + gold = list(gold) + gold_one_hot = np.eye(num_class)[gold] + return np.mean(np.sum((predictions - gold_one_hot) ** 2, axis=1)) + + +@register_metric( + metric="brier_score", + higher_is_better=False, + output_type=["multiple_choice"], + aggregation="brier_score", +) +def brier_score_fn(items): # This is a passthrough function + return items + + +@register_metric( + metric="acc", + higher_is_better=True, + output_type=["loglikelihood", "multiple_choice"], + aggregation="mean", +) +def acc_fn(items): # This is a passthrough function + return items + + +@register_metric( + metric="acc_norm", + higher_is_better=True, + output_type=["loglikelihood", "multiple_choice"], + aggregation="mean", +) +def acc_norm_fn(items): # This is a passthrough function + return items + + +@register_metric( + metric="acc_mutual_info", + higher_is_better=True, + output_type="multiple_choice", + aggregation="mean", +) +def acc_mutual_info_fn(items): # This is a passthrough function + return items + + +### the code used in the `exact_match_hf_evaluate` function is ported from +### https://github.com/huggingface/evaluate/blob/main/metrics/exact_match/exact_match.py +### which is under the apache license. + +# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +def exact_match_hf_evaluate( + predictions, + references, + regexes_to_ignore=None, + ignore_case=False, + ignore_punctuation=False, + ignore_numbers=False, +): + if regexes_to_ignore is not None: + for s in regexes_to_ignore: + predictions = np.array([re.sub(s, "", x) for x in predictions]) + references = np.array([re.sub(s, "", x) for x in references]) + else: + predictions = np.asarray(predictions) + references = np.asarray(references) + + if ignore_case: + predictions = np.char.lower(predictions) + references = np.char.lower(references) + + if ignore_punctuation: + repl_table = string.punctuation.maketrans("", "", string.punctuation) + predictions = np.char.translate(predictions, table=repl_table) + references = np.char.translate(references, table=repl_table) + + if ignore_numbers: + repl_table = string.digits.maketrans("", "", string.digits) + predictions = np.char.translate(predictions, table=repl_table) + references = np.char.translate(references, table=repl_table) + + score_list = predictions == references + + return {"exact_match": np.mean(score_list)} + + +### + + +@register_metric( + metric="exact_match", + higher_is_better=True, + output_type="generate_until", + aggregation="mean", +) +def exact_match_fn(**kwargs): + return exact_match_hf_evaluate(**kwargs) + + +@register_metric( + metric="perplexity", + higher_is_better=False, + output_type="loglikelihood", + aggregation="perplexity", +) +def perplexity_fn(items): # This is a passthrough function + return items + + +@register_metric( + metric="word_perplexity", + higher_is_better=False, + output_type="loglikelihood_rolling", + aggregation="weighted_perplexity", +) +def word_perplexity_fn(items): # This is a passthrough function + return items + + +@register_metric( + metric="byte_perplexity", + higher_is_better=False, + output_type="loglikelihood_rolling", + aggregation="weighted_perplexity", +) +def byte_perplexity_fn(items): # This is a passthrough function + return items + + +@register_metric( + metric="bits_per_byte", + higher_is_better=False, + output_type="loglikelihood_rolling", + aggregation="bits_per_byte", +) +def bits_per_byte_fn(items): # This is a passthrough function + return items + + +def pop_stddev(arr): + mu = mean(arr) + return math.sqrt(sum([(x - mu) ** 2 for x in arr]) / len(arr)) + + +def sample_stddev(arr): + mu = mean(arr) + return math.sqrt(sum([(x - mu) ** 2 for x in arr]) / (len(arr) - 1)) + + +def mean_stderr(arr): + return sample_stddev(arr) / math.sqrt(len(arr)) + + +@register_metric( + metric="bypass", + higher_is_better=True, + output_type=["loglikelihood", "multiple_choice", "generate_until"], + aggregation="bypass", +) +def bypass(items): + return None + + +@register_metric( + metric="mcc", + higher_is_better=True, + output_type="multiple_choice", + aggregation="matthews_corrcoef", +) +def mcc_fn(items): # This is a passthrough function + return items + + +@register_metric( + metric="f1", + higher_is_better=True, + output_type="multiple_choice", + aggregation="f1", +) +def f1_fn(items): # This is a passthrough function + return items + + +@register_metric( + metric="bleu", + higher_is_better=True, + output_type="generate_until", + aggregation="bleu", +) +def bleu_fn(items): # This is a passthrough function + return items + + +@register_metric( + metric="chrf", + higher_is_better=True, + output_type="generate_until", + aggregation="chrf", +) +def chrf_fn(items): # This is a passthrough function + return items + + +@register_metric( + metric="ter", + higher_is_better=True, + output_type="generate_until", + aggregation="ter", +) +def ter_fn(items): # This is a passthrough function + return items + + +@register_metric( + metric="acc_all", + higher_is_better=True, + output_type="loglikelihood", + aggregation="mean", +) +def acc_all(items): + # Only count as correct if all answers are labeled correctly for each question + question_scoring_dict = {} + preds = list(zip(*items))[0] + docs = list(zip(*items))[1] + + for doc, pred in zip(docs, preds): + paragraph_id = doc["idx"]["paragraph"] + question_id = doc["idx"]["question"] + if (paragraph_id, question_id) not in question_scoring_dict: + question_scoring_dict[(paragraph_id, question_id)] = [] + + gold_label = doc["label"] == 1 + + question_scoring_dict[(paragraph_id, question_id)].append(gold_label == pred) + acc = np.mean([int(all(x)) for x in question_scoring_dict.values()]) + return acc + + +def acc_all_stderr(items): + # Only count as correct if all answers are labeled correctly for each question + question_scoring_dict = {} + preds = list(zip(*items))[0] + docs = list(zip(*items))[1] + + for doc, pred in zip(docs, preds): + question_id = doc["idx"]["question"] + if question_id not in question_scoring_dict: + question_scoring_dict[question_id] = [] + + gold_label = doc["label"] == 1 + question_scoring_dict[question_id].append(gold_label == pred) + + acc = mean_stderr([int(all(x)) for x in question_scoring_dict.values()]) + return acc + + +def metric_max_over_ground_truths(metric_fn, prediction, ground_truths): + """Compute max metric between prediction and each ground truth.""" + scores_for_ground_truths = [] + for ground_truth in ground_truths: + score = metric_fn(prediction, ground_truth) + scores_for_ground_truths.append(score) + return max(scores_for_ground_truths) + + +def weighted_mean(items): + a, b = zip(*items) + return sum(a) / sum(b) + + +def is_non_str_iterable(obj): + return isinstance(obj, Iterable) and not isinstance(obj, str) + + +def _sacreformat(refs, preds): + """Format refs and preds for sacrebleu corpus calculation. It is very particular""" + # Sacrebleu expects (List[str], List[List[str]) + # e.g. sacrebleu.corpus_bleu([pred_t], [[ref1_stream], [ref2_stream], ...]) + + # Note [ref1_stream] is the first reference for each pred. + # So lists are size N and (M, N) for N preds and M possible refs for each pred + # This is a different order of dimensions that I would expect + + # We expect refs to be List[str] or List[List[str]], the outer list corresponding to preds + # Must become List[List[str]] with the inner list corresponding to preds + if not is_non_str_iterable(refs): + refs = list(refs) + if not is_non_str_iterable(refs[0]): + refs = [[ref] for ref in refs] + refs = list(zip(*refs)) + # Note the number of refs in each ref list much match the number of preds + + # We expect preds to be List[str] or List[List[str]]. Must become List[str] + if not is_non_str_iterable(preds): + preds = list(preds) + if is_non_str_iterable(preds[0]): + assert len(preds[0]) == 1, f"Pred must be a str, was {preds[0]}" + preds = [pred[0] for pred in preds] + + return refs, preds + + +# stderr stuff + + +class _bootstrap_internal: + def __init__(self, f, n) -> None: + self.f = f + self.n = n + + def __call__(self, v): + i, xs = v + rnd = random.Random() + rnd.seed(i) + res = [] + for _ in range(self.n): + res.append(self.f(rnd.choices(xs, k=len(xs)))) + return res + + +def bootstrap_stderr(f, xs, iters): + import multiprocessing as mp + + pool = mp.Pool(mp.cpu_count()) + # this gives a biased estimate of the stderr (i.e w/ the mean, it gives something + # equivalent to stderr calculated without Bessel's correction in the stddev. + # Unfortunately, I haven't been able to figure out what the right correction is + # to make the bootstrap unbiased - i considered multiplying by sqrt(n/(n-1)) but + # that would be ad-hoc and I can't prove that that would actually be an unbiased estimator) + # Thankfully, shouldn't matter because our samples are pretty big usually anyways + res = [] + chunk_size = min(1000, iters) + from tqdm import tqdm + + print("bootstrapping for stddev:", f.__name__) + for bootstrap in tqdm( + pool.imap( + _bootstrap_internal(f, chunk_size), + [(i, xs) for i in range(iters // chunk_size)], + ), + total=iters // chunk_size, + ): + # sample w replacement + res.extend(bootstrap) + + pool.close() + return sample_stddev(res) + + +def stderr_for_metric(metric, bootstrap_iters: int): + if bootstrap_iters <= 0: + # return no function (don't compute stderr) if bootstrap iters = 0 + return None + + bootstrappable = [ + median, + matthews_corrcoef, + f1_score, + perplexity, + bleu, + chrf, + ter, + nanmean, + ] + + if metric in bootstrappable: + return lambda x: bootstrap_stderr(metric, x, iters=bootstrap_iters) + + stderr = {mean: mean_stderr, acc_all: acc_all_stderr} + + return stderr.get(metric, None) + + +def pooled_sample_stderr(stderrs: List[float], sizes: List[int]): + # Used to aggregate bootstrapped stderrs across subtasks in a group, + # when we are weighting by the size of each subtask. + # + + assert len(stderrs) == len(sizes) + + # formula source: https://en.wikipedia.org/wiki/Pooled_variance + # and: https://stats.stackexchange.com/a/4841331 + # this empirically seems to match running `stderr_for_metric` on all instances + # from the subtasks concatenated with each other. + pooled_sample_var = ( + sum([(size - 1) * stderr**2 * size for size, stderr in zip(sizes, stderrs)]) + ) / (sum(sizes) - len(sizes)) + + return np.sqrt(pooled_sample_var / sum(sizes)) + + +def combined_sample_stderr(stderrs: List[float], sizes: List[int], metrics=None): + assert metrics is not None, ( + "Need to pass a list of each subtask's metric for this stderr aggregation" + ) + assert len(stderrs) == len(sizes) and len(sizes) == len(metrics) + + # See https://github.com/EleutherAI/lm-evaluation-harness/pull/1390 for more documentation. + # This formula depends on sample means. + # removed because it seems to give erroneously huge stderrs for groupings of tasks + # and does not seem to match up with bootstrap-calculated stderrs for groups. + + ### don't use this unless a statistician has told you it's the right thing to do ### + + # accumulators: we'll aggregate pairwise N - 1 times + variance = stderrs[0] ** 2 + curr_size = sizes[0] + curr_score = metrics[0] + + for stderr, size, score in zip(stderrs[1:], sizes[1:], metrics[1:]): + curr_score = ((curr_score * curr_size) + (score * size)) / ( + curr_size + size + ) # NOTE: this assumes our aggregation fn is "mean" + + variance = ((curr_size - 1) * variance + (size - 1) * (stderr**2)) / ( + curr_size + size - 1 + ) + curr_size * size / ((curr_size + size) * (curr_size + size - 1)) * ( + curr_score - score + ) ** 2 + + return np.sqrt(variance) + + +def aggregate_subtask_metrics(metrics, sizes, weight_by_size=True): + # A helper function that is used to aggregate + # subtask scores cross-task. + # TODO: does not hold for non-mean aggregations + if not weight_by_size: + sizes = [1] * len(sizes) + + assert len(metrics) == len(sizes) + + return sum([metric * size for metric, size in zip(metrics, sizes)]) / sum(sizes) diff --git a/Prism/Dream/Dream_Prism/eval_instruct/lm_eval/api/registry.py b/Prism/Dream/Dream_Prism/eval_instruct/lm_eval/api/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..4673b157b1fc1eaed2eb40e7a1ad527ce1fcb595 --- /dev/null +++ b/Prism/Dream/Dream_Prism/eval_instruct/lm_eval/api/registry.py @@ -0,0 +1,196 @@ +import logging +from typing import Callable, Dict, Union + +import evaluate as hf_evaluate + +from lm_eval.api.model import LM + + +eval_logger = logging.getLogger(__name__) + +MODEL_REGISTRY = {} + + +def register_model(*names): + # either pass a list or a single alias. + # function receives them as a tuple of strings + + def decorate(cls): + for name in names: + assert issubclass(cls, LM), ( + f"Model '{name}' ({cls.__name__}) must extend LM class" + ) + + assert name not in MODEL_REGISTRY, ( + f"Model named '{name}' conflicts with existing model! Please register with a non-conflicting alias instead." + ) + + MODEL_REGISTRY[name] = cls + return cls + + return decorate + + +def get_model(model_name): + try: + return MODEL_REGISTRY[model_name] + except KeyError: + raise ValueError( + f"Attempted to load model '{model_name}', but no model for this name found! Supported model names: {', '.join(MODEL_REGISTRY.keys())}" + ) + + +TASK_REGISTRY = {} +GROUP_REGISTRY = {} +ALL_TASKS = set() +func2task_index = {} + + +def register_task(name): + def decorate(fn): + assert name not in TASK_REGISTRY, ( + f"task named '{name}' conflicts with existing registered task!" + ) + + TASK_REGISTRY[name] = fn + ALL_TASKS.add(name) + func2task_index[fn.__name__] = name + return fn + + return decorate + + +def register_group(name): + def decorate(fn): + func_name = func2task_index[fn.__name__] + if name in GROUP_REGISTRY: + GROUP_REGISTRY[name].append(func_name) + else: + GROUP_REGISTRY[name] = [func_name] + ALL_TASKS.add(name) + return fn + + return decorate + + +OUTPUT_TYPE_REGISTRY = {} +METRIC_REGISTRY = {} +METRIC_AGGREGATION_REGISTRY = {} +AGGREGATION_REGISTRY: Dict[str, Callable[[], Dict[str, Callable]]] = {} +HIGHER_IS_BETTER_REGISTRY = {} +FILTER_REGISTRY = {} + +DEFAULT_METRIC_REGISTRY = { + "loglikelihood": [ + "perplexity", + "acc", + ], + "loglikelihood_rolling": ["word_perplexity", "byte_perplexity", "bits_per_byte"], + "multiple_choice": ["acc", "acc_norm"], + "generate_until": ["exact_match"], +} + + +def register_metric(**args): + # TODO: do we want to enforce a certain interface to registered metrics? + def decorate(fn): + assert "metric" in args + name = args["metric"] + + for key, registry in [ + ("metric", METRIC_REGISTRY), + ("higher_is_better", HIGHER_IS_BETTER_REGISTRY), + ("aggregation", METRIC_AGGREGATION_REGISTRY), + ]: + if key in args: + value = args[key] + assert value not in registry, ( + f"{key} named '{value}' conflicts with existing registered {key}!" + ) + + if key == "metric": + registry[name] = fn + elif key == "aggregation": + registry[name] = AGGREGATION_REGISTRY[value] + else: + registry[name] = value + + return fn + + return decorate + + +def get_metric(name: str, hf_evaluate_metric=False) -> Callable: + if not hf_evaluate_metric: + if name in METRIC_REGISTRY: + return METRIC_REGISTRY[name] + else: + eval_logger.warning( + f"Could not find registered metric '{name}' in lm-eval, searching in HF Evaluate library..." + ) + + try: + metric_object = hf_evaluate.load(name) + return metric_object.compute + except Exception: + eval_logger.error( + f"{name} not found in the evaluate library! Please check https://huggingface.co/evaluate-metric", + ) + + +def register_aggregation(name: str): + def decorate(fn): + assert name not in AGGREGATION_REGISTRY, ( + f"aggregation named '{name}' conflicts with existing registered aggregation!" + ) + + AGGREGATION_REGISTRY[name] = fn + return fn + + return decorate + + +def get_aggregation(name: str) -> Callable[[], Dict[str, Callable]]: + try: + return AGGREGATION_REGISTRY[name] + except KeyError: + eval_logger.warning(f"{name} not a registered aggregation metric!") + + +def get_metric_aggregation(name: str) -> Callable[[], Dict[str, Callable]]: + try: + return METRIC_AGGREGATION_REGISTRY[name] + except KeyError: + eval_logger.warning(f"{name} metric is not assigned a default aggregation!") + + +def is_higher_better(metric_name) -> bool: + try: + return HIGHER_IS_BETTER_REGISTRY[metric_name] + except KeyError: + eval_logger.warning( + f"higher_is_better not specified for metric '{metric_name}'!" + ) + + +def register_filter(name): + def decorate(cls): + if name in FILTER_REGISTRY: + eval_logger.info( + f"Registering filter `{name}` that is already in Registry {FILTER_REGISTRY}" + ) + FILTER_REGISTRY[name] = cls + return cls + + return decorate + + +def get_filter(filter_name: Union[str, Callable]) -> Callable: + try: + return FILTER_REGISTRY[filter_name] + except KeyError as e: + if callable(filter_name): + return filter_name + else: + eval_logger.warning(f"filter `{filter_name}` is not registered!") + raise e diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate-1.12.0.dist-info/licenses/LICENSE b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate-1.12.0.dist-info/licenses/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate-1.12.0.dist-info/licenses/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click/__pycache__/_winconsole.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click/__pycache__/_winconsole.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d9ce11f659f45463514dc5588d24edbe63285a8f Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click/__pycache__/_winconsole.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click/__pycache__/exceptions.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click/__pycache__/exceptions.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b0caca6d80b03e45ccfe117b559f764a9627470 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click/__pycache__/exceptions.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click/__pycache__/parser.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click/__pycache__/parser.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..007f04fafdda632e51b136a8a80670946d655073 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click/__pycache__/parser.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click/__pycache__/termui.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click/__pycache__/termui.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ea94974e83d08d6c292a39a37e80bfacbe899ee9 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click/__pycache__/termui.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click/__pycache__/testing.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click/__pycache__/testing.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a47efe9d17a89fb1fbc17f9286c9905347a68c8e Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/click/__pycache__/testing.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/__pycache__/_version.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/__pycache__/_version.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9140af1e48086c28cf095d56e9b3ee1e67e9feee Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/__pycache__/_version.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/__pycache__/relativedelta.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/__pycache__/relativedelta.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..01f09b905ab8adbeb160641c94e8bbfc37a3a6bd Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/__pycache__/relativedelta.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/__pycache__/rrule.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/__pycache__/rrule.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..548579eeab96d4a752f27ff1f7edc678b1097ada Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/__pycache__/rrule.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/tz/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/tz/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..af1352c47292f4eebc5cae8da45641b5544558e3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/tz/__init__.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +from .tz import * +from .tz import __doc__ + +__all__ = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange", + "tzstr", "tzical", "tzwin", "tzwinlocal", "gettz", + "enfold", "datetime_ambiguous", "datetime_exists", + "resolve_imaginary", "UTC", "DeprecatedTzFormatWarning"] + + +class DeprecatedTzFormatWarning(Warning): + """Warning raised when time zones are parsed from deprecated formats.""" diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/tz/_common.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/tz/_common.py new file mode 100644 index 0000000000000000000000000000000000000000..e6ac11831522b266114d5b68ee1da298e3aeb14a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/tz/_common.py @@ -0,0 +1,419 @@ +from six import PY2 + +from functools import wraps + +from datetime import datetime, timedelta, tzinfo + + +ZERO = timedelta(0) + +__all__ = ['tzname_in_python2', 'enfold'] + + +def tzname_in_python2(namefunc): + """Change unicode output into bytestrings in Python 2 + + tzname() API changed in Python 3. It used to return bytes, but was changed + to unicode strings + """ + if PY2: + @wraps(namefunc) + def adjust_encoding(*args, **kwargs): + name = namefunc(*args, **kwargs) + if name is not None: + name = name.encode() + + return name + + return adjust_encoding + else: + return namefunc + + +# The following is adapted from Alexander Belopolsky's tz library +# https://github.com/abalkin/tz +if hasattr(datetime, 'fold'): + # This is the pre-python 3.6 fold situation + def enfold(dt, fold=1): + """ + Provides a unified interface for assigning the ``fold`` attribute to + datetimes both before and after the implementation of PEP-495. + + :param fold: + The value for the ``fold`` attribute in the returned datetime. This + should be either 0 or 1. + + :return: + Returns an object for which ``getattr(dt, 'fold', 0)`` returns + ``fold`` for all versions of Python. In versions prior to + Python 3.6, this is a ``_DatetimeWithFold`` object, which is a + subclass of :py:class:`datetime.datetime` with the ``fold`` + attribute added, if ``fold`` is 1. + + .. versionadded:: 2.6.0 + """ + return dt.replace(fold=fold) + +else: + class _DatetimeWithFold(datetime): + """ + This is a class designed to provide a PEP 495-compliant interface for + Python versions before 3.6. It is used only for dates in a fold, so + the ``fold`` attribute is fixed at ``1``. + + .. versionadded:: 2.6.0 + """ + __slots__ = () + + def replace(self, *args, **kwargs): + """ + Return a datetime with the same attributes, except for those + attributes given new values by whichever keyword arguments are + specified. Note that tzinfo=None can be specified to create a naive + datetime from an aware datetime with no conversion of date and time + data. + + This is reimplemented in ``_DatetimeWithFold`` because pypy3 will + return a ``datetime.datetime`` even if ``fold`` is unchanged. + """ + argnames = ( + 'year', 'month', 'day', 'hour', 'minute', 'second', + 'microsecond', 'tzinfo' + ) + + for arg, argname in zip(args, argnames): + if argname in kwargs: + raise TypeError('Duplicate argument: {}'.format(argname)) + + kwargs[argname] = arg + + for argname in argnames: + if argname not in kwargs: + kwargs[argname] = getattr(self, argname) + + dt_class = self.__class__ if kwargs.get('fold', 1) else datetime + + return dt_class(**kwargs) + + @property + def fold(self): + return 1 + + def enfold(dt, fold=1): + """ + Provides a unified interface for assigning the ``fold`` attribute to + datetimes both before and after the implementation of PEP-495. + + :param fold: + The value for the ``fold`` attribute in the returned datetime. This + should be either 0 or 1. + + :return: + Returns an object for which ``getattr(dt, 'fold', 0)`` returns + ``fold`` for all versions of Python. In versions prior to + Python 3.6, this is a ``_DatetimeWithFold`` object, which is a + subclass of :py:class:`datetime.datetime` with the ``fold`` + attribute added, if ``fold`` is 1. + + .. versionadded:: 2.6.0 + """ + if getattr(dt, 'fold', 0) == fold: + return dt + + args = dt.timetuple()[:6] + args += (dt.microsecond, dt.tzinfo) + + if fold: + return _DatetimeWithFold(*args) + else: + return datetime(*args) + + +def _validate_fromutc_inputs(f): + """ + The CPython version of ``fromutc`` checks that the input is a ``datetime`` + object and that ``self`` is attached as its ``tzinfo``. + """ + @wraps(f) + def fromutc(self, dt): + if not isinstance(dt, datetime): + raise TypeError("fromutc() requires a datetime argument") + if dt.tzinfo is not self: + raise ValueError("dt.tzinfo is not self") + + return f(self, dt) + + return fromutc + + +class _tzinfo(tzinfo): + """ + Base class for all ``dateutil`` ``tzinfo`` objects. + """ + + def is_ambiguous(self, dt): + """ + Whether or not the "wall time" of a given datetime is ambiguous in this + zone. + + :param dt: + A :py:class:`datetime.datetime`, naive or time zone aware. + + + :return: + Returns ``True`` if ambiguous, ``False`` otherwise. + + .. versionadded:: 2.6.0 + """ + + dt = dt.replace(tzinfo=self) + + wall_0 = enfold(dt, fold=0) + wall_1 = enfold(dt, fold=1) + + same_offset = wall_0.utcoffset() == wall_1.utcoffset() + same_dt = wall_0.replace(tzinfo=None) == wall_1.replace(tzinfo=None) + + return same_dt and not same_offset + + def _fold_status(self, dt_utc, dt_wall): + """ + Determine the fold status of a "wall" datetime, given a representation + of the same datetime as a (naive) UTC datetime. This is calculated based + on the assumption that ``dt.utcoffset() - dt.dst()`` is constant for all + datetimes, and that this offset is the actual number of hours separating + ``dt_utc`` and ``dt_wall``. + + :param dt_utc: + Representation of the datetime as UTC + + :param dt_wall: + Representation of the datetime as "wall time". This parameter must + either have a `fold` attribute or have a fold-naive + :class:`datetime.tzinfo` attached, otherwise the calculation may + fail. + """ + if self.is_ambiguous(dt_wall): + delta_wall = dt_wall - dt_utc + _fold = int(delta_wall == (dt_utc.utcoffset() - dt_utc.dst())) + else: + _fold = 0 + + return _fold + + def _fold(self, dt): + return getattr(dt, 'fold', 0) + + def _fromutc(self, dt): + """ + Given a timezone-aware datetime in a given timezone, calculates a + timezone-aware datetime in a new timezone. + + Since this is the one time that we *know* we have an unambiguous + datetime object, we take this opportunity to determine whether the + datetime is ambiguous and in a "fold" state (e.g. if it's the first + occurrence, chronologically, of the ambiguous datetime). + + :param dt: + A timezone-aware :class:`datetime.datetime` object. + """ + + # Re-implement the algorithm from Python's datetime.py + dtoff = dt.utcoffset() + if dtoff is None: + raise ValueError("fromutc() requires a non-None utcoffset() " + "result") + + # The original datetime.py code assumes that `dst()` defaults to + # zero during ambiguous times. PEP 495 inverts this presumption, so + # for pre-PEP 495 versions of python, we need to tweak the algorithm. + dtdst = dt.dst() + if dtdst is None: + raise ValueError("fromutc() requires a non-None dst() result") + delta = dtoff - dtdst + + dt += delta + # Set fold=1 so we can default to being in the fold for + # ambiguous dates. + dtdst = enfold(dt, fold=1).dst() + if dtdst is None: + raise ValueError("fromutc(): dt.dst gave inconsistent " + "results; cannot convert") + return dt + dtdst + + @_validate_fromutc_inputs + def fromutc(self, dt): + """ + Given a timezone-aware datetime in a given timezone, calculates a + timezone-aware datetime in a new timezone. + + Since this is the one time that we *know* we have an unambiguous + datetime object, we take this opportunity to determine whether the + datetime is ambiguous and in a "fold" state (e.g. if it's the first + occurrence, chronologically, of the ambiguous datetime). + + :param dt: + A timezone-aware :class:`datetime.datetime` object. + """ + dt_wall = self._fromutc(dt) + + # Calculate the fold status given the two datetimes. + _fold = self._fold_status(dt, dt_wall) + + # Set the default fold value for ambiguous dates + return enfold(dt_wall, fold=_fold) + + +class tzrangebase(_tzinfo): + """ + This is an abstract base class for time zones represented by an annual + transition into and out of DST. Child classes should implement the following + methods: + + * ``__init__(self, *args, **kwargs)`` + * ``transitions(self, year)`` - this is expected to return a tuple of + datetimes representing the DST on and off transitions in standard + time. + + A fully initialized ``tzrangebase`` subclass should also provide the + following attributes: + * ``hasdst``: Boolean whether or not the zone uses DST. + * ``_dst_offset`` / ``_std_offset``: :class:`datetime.timedelta` objects + representing the respective UTC offsets. + * ``_dst_abbr`` / ``_std_abbr``: Strings representing the timezone short + abbreviations in DST and STD, respectively. + * ``_hasdst``: Whether or not the zone has DST. + + .. versionadded:: 2.6.0 + """ + def __init__(self): + raise NotImplementedError('tzrangebase is an abstract base class') + + def utcoffset(self, dt): + isdst = self._isdst(dt) + + if isdst is None: + return None + elif isdst: + return self._dst_offset + else: + return self._std_offset + + def dst(self, dt): + isdst = self._isdst(dt) + + if isdst is None: + return None + elif isdst: + return self._dst_base_offset + else: + return ZERO + + @tzname_in_python2 + def tzname(self, dt): + if self._isdst(dt): + return self._dst_abbr + else: + return self._std_abbr + + def fromutc(self, dt): + """ Given a datetime in UTC, return local time """ + if not isinstance(dt, datetime): + raise TypeError("fromutc() requires a datetime argument") + + if dt.tzinfo is not self: + raise ValueError("dt.tzinfo is not self") + + # Get transitions - if there are none, fixed offset + transitions = self.transitions(dt.year) + if transitions is None: + return dt + self.utcoffset(dt) + + # Get the transition times in UTC + dston, dstoff = transitions + + dston -= self._std_offset + dstoff -= self._std_offset + + utc_transitions = (dston, dstoff) + dt_utc = dt.replace(tzinfo=None) + + isdst = self._naive_isdst(dt_utc, utc_transitions) + + if isdst: + dt_wall = dt + self._dst_offset + else: + dt_wall = dt + self._std_offset + + _fold = int(not isdst and self.is_ambiguous(dt_wall)) + + return enfold(dt_wall, fold=_fold) + + def is_ambiguous(self, dt): + """ + Whether or not the "wall time" of a given datetime is ambiguous in this + zone. + + :param dt: + A :py:class:`datetime.datetime`, naive or time zone aware. + + + :return: + Returns ``True`` if ambiguous, ``False`` otherwise. + + .. versionadded:: 2.6.0 + """ + if not self.hasdst: + return False + + start, end = self.transitions(dt.year) + + dt = dt.replace(tzinfo=None) + return (end <= dt < end + self._dst_base_offset) + + def _isdst(self, dt): + if not self.hasdst: + return False + elif dt is None: + return None + + transitions = self.transitions(dt.year) + + if transitions is None: + return False + + dt = dt.replace(tzinfo=None) + + isdst = self._naive_isdst(dt, transitions) + + # Handle ambiguous dates + if not isdst and self.is_ambiguous(dt): + return not self._fold(dt) + else: + return isdst + + def _naive_isdst(self, dt, transitions): + dston, dstoff = transitions + + dt = dt.replace(tzinfo=None) + + if dston < dstoff: + isdst = dston <= dt < dstoff + else: + isdst = not dstoff <= dt < dston + + return isdst + + @property + def _dst_base_offset(self): + return self._dst_offset - self._std_offset + + __hash__ = None + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + return "%s(...)" % self.__class__.__name__ + + __reduce__ = object.__reduce__ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/tz/_factories.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/tz/_factories.py new file mode 100644 index 0000000000000000000000000000000000000000..f8a65891a023ebf9eb0c24d391ba67541b7133f1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/tz/_factories.py @@ -0,0 +1,80 @@ +from datetime import timedelta +import weakref +from collections import OrderedDict + +from six.moves import _thread + + +class _TzSingleton(type): + def __init__(cls, *args, **kwargs): + cls.__instance = None + super(_TzSingleton, cls).__init__(*args, **kwargs) + + def __call__(cls): + if cls.__instance is None: + cls.__instance = super(_TzSingleton, cls).__call__() + return cls.__instance + + +class _TzFactory(type): + def instance(cls, *args, **kwargs): + """Alternate constructor that returns a fresh instance""" + return type.__call__(cls, *args, **kwargs) + + +class _TzOffsetFactory(_TzFactory): + def __init__(cls, *args, **kwargs): + cls.__instances = weakref.WeakValueDictionary() + cls.__strong_cache = OrderedDict() + cls.__strong_cache_size = 8 + + cls._cache_lock = _thread.allocate_lock() + + def __call__(cls, name, offset): + if isinstance(offset, timedelta): + key = (name, offset.total_seconds()) + else: + key = (name, offset) + + instance = cls.__instances.get(key, None) + if instance is None: + instance = cls.__instances.setdefault(key, + cls.instance(name, offset)) + + # This lock may not be necessary in Python 3. See GH issue #901 + with cls._cache_lock: + cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance) + + # Remove an item if the strong cache is overpopulated + if len(cls.__strong_cache) > cls.__strong_cache_size: + cls.__strong_cache.popitem(last=False) + + return instance + + +class _TzStrFactory(_TzFactory): + def __init__(cls, *args, **kwargs): + cls.__instances = weakref.WeakValueDictionary() + cls.__strong_cache = OrderedDict() + cls.__strong_cache_size = 8 + + cls.__cache_lock = _thread.allocate_lock() + + def __call__(cls, s, posix_offset=False): + key = (s, posix_offset) + instance = cls.__instances.get(key, None) + + if instance is None: + instance = cls.__instances.setdefault(key, + cls.instance(s, posix_offset)) + + # This lock may not be necessary in Python 3. See GH issue #901 + with cls.__cache_lock: + cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance) + + # Remove an item if the strong cache is overpopulated + if len(cls.__strong_cache) > cls.__strong_cache_size: + cls.__strong_cache.popitem(last=False) + + return instance + diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/tz/tz.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/tz/tz.py new file mode 100644 index 0000000000000000000000000000000000000000..617591446bd92eb1cc7b7d67fa3f17435e691cdd --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/tz/tz.py @@ -0,0 +1,1849 @@ +# -*- coding: utf-8 -*- +""" +This module offers timezone implementations subclassing the abstract +:py:class:`datetime.tzinfo` type. There are classes to handle tzfile format +files (usually are in :file:`/etc/localtime`, :file:`/usr/share/zoneinfo`, +etc), TZ environment string (in all known formats), given ranges (with help +from relative deltas), local machine timezone, fixed offset timezone, and UTC +timezone. +""" +import datetime +import struct +import time +import sys +import os +import bisect +import weakref +from collections import OrderedDict + +import six +from six import string_types +from six.moves import _thread +from ._common import tzname_in_python2, _tzinfo +from ._common import tzrangebase, enfold +from ._common import _validate_fromutc_inputs + +from ._factories import _TzSingleton, _TzOffsetFactory +from ._factories import _TzStrFactory +try: + from .win import tzwin, tzwinlocal +except ImportError: + tzwin = tzwinlocal = None + +# For warning about rounding tzinfo +from warnings import warn + +ZERO = datetime.timedelta(0) +EPOCH = datetime.datetime(1970, 1, 1, 0, 0) +EPOCHORDINAL = EPOCH.toordinal() + + +@six.add_metaclass(_TzSingleton) +class tzutc(datetime.tzinfo): + """ + This is a tzinfo object that represents the UTC time zone. + + **Examples:** + + .. doctest:: + + >>> from datetime import * + >>> from dateutil.tz import * + + >>> datetime.now() + datetime.datetime(2003, 9, 27, 9, 40, 1, 521290) + + >>> datetime.now(tzutc()) + datetime.datetime(2003, 9, 27, 12, 40, 12, 156379, tzinfo=tzutc()) + + >>> datetime.now(tzutc()).tzname() + 'UTC' + + .. versionchanged:: 2.7.0 + ``tzutc()`` is now a singleton, so the result of ``tzutc()`` will + always return the same object. + + .. doctest:: + + >>> from dateutil.tz import tzutc, UTC + >>> tzutc() is tzutc() + True + >>> tzutc() is UTC + True + """ + def utcoffset(self, dt): + return ZERO + + def dst(self, dt): + return ZERO + + @tzname_in_python2 + def tzname(self, dt): + return "UTC" + + def is_ambiguous(self, dt): + """ + Whether or not the "wall time" of a given datetime is ambiguous in this + zone. + + :param dt: + A :py:class:`datetime.datetime`, naive or time zone aware. + + + :return: + Returns ``True`` if ambiguous, ``False`` otherwise. + + .. versionadded:: 2.6.0 + """ + return False + + @_validate_fromutc_inputs + def fromutc(self, dt): + """ + Fast track version of fromutc() returns the original ``dt`` object for + any valid :py:class:`datetime.datetime` object. + """ + return dt + + def __eq__(self, other): + if not isinstance(other, (tzutc, tzoffset)): + return NotImplemented + + return (isinstance(other, tzutc) or + (isinstance(other, tzoffset) and other._offset == ZERO)) + + __hash__ = None + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + return "%s()" % self.__class__.__name__ + + __reduce__ = object.__reduce__ + + +#: Convenience constant providing a :class:`tzutc()` instance +#: +#: .. versionadded:: 2.7.0 +UTC = tzutc() + + +@six.add_metaclass(_TzOffsetFactory) +class tzoffset(datetime.tzinfo): + """ + A simple class for representing a fixed offset from UTC. + + :param name: + The timezone name, to be returned when ``tzname()`` is called. + :param offset: + The time zone offset in seconds, or (since version 2.6.0, represented + as a :py:class:`datetime.timedelta` object). + """ + def __init__(self, name, offset): + self._name = name + + try: + # Allow a timedelta + offset = offset.total_seconds() + except (TypeError, AttributeError): + pass + + self._offset = datetime.timedelta(seconds=_get_supported_offset(offset)) + + def utcoffset(self, dt): + return self._offset + + def dst(self, dt): + return ZERO + + @tzname_in_python2 + def tzname(self, dt): + return self._name + + @_validate_fromutc_inputs + def fromutc(self, dt): + return dt + self._offset + + def is_ambiguous(self, dt): + """ + Whether or not the "wall time" of a given datetime is ambiguous in this + zone. + + :param dt: + A :py:class:`datetime.datetime`, naive or time zone aware. + :return: + Returns ``True`` if ambiguous, ``False`` otherwise. + + .. versionadded:: 2.6.0 + """ + return False + + def __eq__(self, other): + if not isinstance(other, tzoffset): + return NotImplemented + + return self._offset == other._offset + + __hash__ = None + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + return "%s(%s, %s)" % (self.__class__.__name__, + repr(self._name), + int(self._offset.total_seconds())) + + __reduce__ = object.__reduce__ + + +class tzlocal(_tzinfo): + """ + A :class:`tzinfo` subclass built around the ``time`` timezone functions. + """ + def __init__(self): + super(tzlocal, self).__init__() + + self._std_offset = datetime.timedelta(seconds=-time.timezone) + if time.daylight: + self._dst_offset = datetime.timedelta(seconds=-time.altzone) + else: + self._dst_offset = self._std_offset + + self._dst_saved = self._dst_offset - self._std_offset + self._hasdst = bool(self._dst_saved) + self._tznames = tuple(time.tzname) + + def utcoffset(self, dt): + if dt is None and self._hasdst: + return None + + if self._isdst(dt): + return self._dst_offset + else: + return self._std_offset + + def dst(self, dt): + if dt is None and self._hasdst: + return None + + if self._isdst(dt): + return self._dst_offset - self._std_offset + else: + return ZERO + + @tzname_in_python2 + def tzname(self, dt): + return self._tznames[self._isdst(dt)] + + def is_ambiguous(self, dt): + """ + Whether or not the "wall time" of a given datetime is ambiguous in this + zone. + + :param dt: + A :py:class:`datetime.datetime`, naive or time zone aware. + + + :return: + Returns ``True`` if ambiguous, ``False`` otherwise. + + .. versionadded:: 2.6.0 + """ + naive_dst = self._naive_is_dst(dt) + return (not naive_dst and + (naive_dst != self._naive_is_dst(dt - self._dst_saved))) + + def _naive_is_dst(self, dt): + timestamp = _datetime_to_timestamp(dt) + return time.localtime(timestamp + time.timezone).tm_isdst + + def _isdst(self, dt, fold_naive=True): + # We can't use mktime here. It is unstable when deciding if + # the hour near to a change is DST or not. + # + # timestamp = time.mktime((dt.year, dt.month, dt.day, dt.hour, + # dt.minute, dt.second, dt.weekday(), 0, -1)) + # return time.localtime(timestamp).tm_isdst + # + # The code above yields the following result: + # + # >>> import tz, datetime + # >>> t = tz.tzlocal() + # >>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname() + # 'BRDT' + # >>> datetime.datetime(2003,2,16,0,tzinfo=t).tzname() + # 'BRST' + # >>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname() + # 'BRST' + # >>> datetime.datetime(2003,2,15,22,tzinfo=t).tzname() + # 'BRDT' + # >>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname() + # 'BRDT' + # + # Here is a more stable implementation: + # + if not self._hasdst: + return False + + # Check for ambiguous times: + dstval = self._naive_is_dst(dt) + fold = getattr(dt, 'fold', None) + + if self.is_ambiguous(dt): + if fold is not None: + return not self._fold(dt) + else: + return True + + return dstval + + def __eq__(self, other): + if isinstance(other, tzlocal): + return (self._std_offset == other._std_offset and + self._dst_offset == other._dst_offset) + elif isinstance(other, tzutc): + return (not self._hasdst and + self._tznames[0] in {'UTC', 'GMT'} and + self._std_offset == ZERO) + elif isinstance(other, tzoffset): + return (not self._hasdst and + self._tznames[0] == other._name and + self._std_offset == other._offset) + else: + return NotImplemented + + __hash__ = None + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + return "%s()" % self.__class__.__name__ + + __reduce__ = object.__reduce__ + + +class _ttinfo(object): + __slots__ = ["offset", "delta", "isdst", "abbr", + "isstd", "isgmt", "dstoffset"] + + def __init__(self): + for attr in self.__slots__: + setattr(self, attr, None) + + def __repr__(self): + l = [] + for attr in self.__slots__: + value = getattr(self, attr) + if value is not None: + l.append("%s=%s" % (attr, repr(value))) + return "%s(%s)" % (self.__class__.__name__, ", ".join(l)) + + def __eq__(self, other): + if not isinstance(other, _ttinfo): + return NotImplemented + + return (self.offset == other.offset and + self.delta == other.delta and + self.isdst == other.isdst and + self.abbr == other.abbr and + self.isstd == other.isstd and + self.isgmt == other.isgmt and + self.dstoffset == other.dstoffset) + + __hash__ = None + + def __ne__(self, other): + return not (self == other) + + def __getstate__(self): + state = {} + for name in self.__slots__: + state[name] = getattr(self, name, None) + return state + + def __setstate__(self, state): + for name in self.__slots__: + if name in state: + setattr(self, name, state[name]) + + +class _tzfile(object): + """ + Lightweight class for holding the relevant transition and time zone + information read from binary tzfiles. + """ + attrs = ['trans_list', 'trans_list_utc', 'trans_idx', 'ttinfo_list', + 'ttinfo_std', 'ttinfo_dst', 'ttinfo_before', 'ttinfo_first'] + + def __init__(self, **kwargs): + for attr in self.attrs: + setattr(self, attr, kwargs.get(attr, None)) + + +class tzfile(_tzinfo): + """ + This is a ``tzinfo`` subclass that allows one to use the ``tzfile(5)`` + format timezone files to extract current and historical zone information. + + :param fileobj: + This can be an opened file stream or a file name that the time zone + information can be read from. + + :param filename: + This is an optional parameter specifying the source of the time zone + information in the event that ``fileobj`` is a file object. If omitted + and ``fileobj`` is a file stream, this parameter will be set either to + ``fileobj``'s ``name`` attribute or to ``repr(fileobj)``. + + See `Sources for Time Zone and Daylight Saving Time Data + `_ for more information. + Time zone files can be compiled from the `IANA Time Zone database files + `_ with the `zic time zone compiler + `_ + + .. note:: + + Only construct a ``tzfile`` directly if you have a specific timezone + file on disk that you want to read into a Python ``tzinfo`` object. + If you want to get a ``tzfile`` representing a specific IANA zone, + (e.g. ``'America/New_York'``), you should call + :func:`dateutil.tz.gettz` with the zone identifier. + + + **Examples:** + + Using the US Eastern time zone as an example, we can see that a ``tzfile`` + provides time zone information for the standard Daylight Saving offsets: + + .. testsetup:: tzfile + + from dateutil.tz import gettz + from datetime import datetime + + .. doctest:: tzfile + + >>> NYC = gettz('America/New_York') + >>> NYC + tzfile('/usr/share/zoneinfo/America/New_York') + + >>> print(datetime(2016, 1, 3, tzinfo=NYC)) # EST + 2016-01-03 00:00:00-05:00 + + >>> print(datetime(2016, 7, 7, tzinfo=NYC)) # EDT + 2016-07-07 00:00:00-04:00 + + + The ``tzfile`` structure contains a fully history of the time zone, + so historical dates will also have the right offsets. For example, before + the adoption of the UTC standards, New York used local solar mean time: + + .. doctest:: tzfile + + >>> print(datetime(1901, 4, 12, tzinfo=NYC)) # LMT + 1901-04-12 00:00:00-04:56 + + And during World War II, New York was on "Eastern War Time", which was a + state of permanent daylight saving time: + + .. doctest:: tzfile + + >>> print(datetime(1944, 2, 7, tzinfo=NYC)) # EWT + 1944-02-07 00:00:00-04:00 + + """ + + def __init__(self, fileobj, filename=None): + super(tzfile, self).__init__() + + file_opened_here = False + if isinstance(fileobj, string_types): + self._filename = fileobj + fileobj = open(fileobj, 'rb') + file_opened_here = True + elif filename is not None: + self._filename = filename + elif hasattr(fileobj, "name"): + self._filename = fileobj.name + else: + self._filename = repr(fileobj) + + if fileobj is not None: + if not file_opened_here: + fileobj = _nullcontext(fileobj) + + with fileobj as file_stream: + tzobj = self._read_tzfile(file_stream) + + self._set_tzdata(tzobj) + + def _set_tzdata(self, tzobj): + """ Set the time zone data of this object from a _tzfile object """ + # Copy the relevant attributes over as private attributes + for attr in _tzfile.attrs: + setattr(self, '_' + attr, getattr(tzobj, attr)) + + def _read_tzfile(self, fileobj): + out = _tzfile() + + # From tzfile(5): + # + # The time zone information files used by tzset(3) + # begin with the magic characters "TZif" to identify + # them as time zone information files, followed by + # sixteen bytes reserved for future use, followed by + # six four-byte values of type long, written in a + # ``standard'' byte order (the high-order byte + # of the value is written first). + if fileobj.read(4).decode() != "TZif": + raise ValueError("magic not found") + + fileobj.read(16) + + ( + # The number of UTC/local indicators stored in the file. + ttisgmtcnt, + + # The number of standard/wall indicators stored in the file. + ttisstdcnt, + + # The number of leap seconds for which data is + # stored in the file. + leapcnt, + + # The number of "transition times" for which data + # is stored in the file. + timecnt, + + # The number of "local time types" for which data + # is stored in the file (must not be zero). + typecnt, + + # The number of characters of "time zone + # abbreviation strings" stored in the file. + charcnt, + + ) = struct.unpack(">6l", fileobj.read(24)) + + # The above header is followed by tzh_timecnt four-byte + # values of type long, sorted in ascending order. + # These values are written in ``standard'' byte order. + # Each is used as a transition time (as returned by + # time(2)) at which the rules for computing local time + # change. + + if timecnt: + out.trans_list_utc = list(struct.unpack(">%dl" % timecnt, + fileobj.read(timecnt*4))) + else: + out.trans_list_utc = [] + + # Next come tzh_timecnt one-byte values of type unsigned + # char; each one tells which of the different types of + # ``local time'' types described in the file is associated + # with the same-indexed transition time. These values + # serve as indices into an array of ttinfo structures that + # appears next in the file. + + if timecnt: + out.trans_idx = struct.unpack(">%dB" % timecnt, + fileobj.read(timecnt)) + else: + out.trans_idx = [] + + # Each ttinfo structure is written as a four-byte value + # for tt_gmtoff of type long, in a standard byte + # order, followed by a one-byte value for tt_isdst + # and a one-byte value for tt_abbrind. In each + # structure, tt_gmtoff gives the number of + # seconds to be added to UTC, tt_isdst tells whether + # tm_isdst should be set by localtime(3), and + # tt_abbrind serves as an index into the array of + # time zone abbreviation characters that follow the + # ttinfo structure(s) in the file. + + ttinfo = [] + + for i in range(typecnt): + ttinfo.append(struct.unpack(">lbb", fileobj.read(6))) + + abbr = fileobj.read(charcnt).decode() + + # Then there are tzh_leapcnt pairs of four-byte + # values, written in standard byte order; the + # first value of each pair gives the time (as + # returned by time(2)) at which a leap second + # occurs; the second gives the total number of + # leap seconds to be applied after the given time. + # The pairs of values are sorted in ascending order + # by time. + + # Not used, for now (but seek for correct file position) + if leapcnt: + fileobj.seek(leapcnt * 8, os.SEEK_CUR) + + # Then there are tzh_ttisstdcnt standard/wall + # indicators, each stored as a one-byte value; + # they tell whether the transition times associated + # with local time types were specified as standard + # time or wall clock time, and are used when + # a time zone file is used in handling POSIX-style + # time zone environment variables. + + if ttisstdcnt: + isstd = struct.unpack(">%db" % ttisstdcnt, + fileobj.read(ttisstdcnt)) + + # Finally, there are tzh_ttisgmtcnt UTC/local + # indicators, each stored as a one-byte value; + # they tell whether the transition times associated + # with local time types were specified as UTC or + # local time, and are used when a time zone file + # is used in handling POSIX-style time zone envi- + # ronment variables. + + if ttisgmtcnt: + isgmt = struct.unpack(">%db" % ttisgmtcnt, + fileobj.read(ttisgmtcnt)) + + # Build ttinfo list + out.ttinfo_list = [] + for i in range(typecnt): + gmtoff, isdst, abbrind = ttinfo[i] + gmtoff = _get_supported_offset(gmtoff) + tti = _ttinfo() + tti.offset = gmtoff + tti.dstoffset = datetime.timedelta(0) + tti.delta = datetime.timedelta(seconds=gmtoff) + tti.isdst = isdst + tti.abbr = abbr[abbrind:abbr.find('\x00', abbrind)] + tti.isstd = (ttisstdcnt > i and isstd[i] != 0) + tti.isgmt = (ttisgmtcnt > i and isgmt[i] != 0) + out.ttinfo_list.append(tti) + + # Replace ttinfo indexes for ttinfo objects. + out.trans_idx = [out.ttinfo_list[idx] for idx in out.trans_idx] + + # Set standard, dst, and before ttinfos. before will be + # used when a given time is before any transitions, + # and will be set to the first non-dst ttinfo, or to + # the first dst, if all of them are dst. + out.ttinfo_std = None + out.ttinfo_dst = None + out.ttinfo_before = None + if out.ttinfo_list: + if not out.trans_list_utc: + out.ttinfo_std = out.ttinfo_first = out.ttinfo_list[0] + else: + for i in range(timecnt-1, -1, -1): + tti = out.trans_idx[i] + if not out.ttinfo_std and not tti.isdst: + out.ttinfo_std = tti + elif not out.ttinfo_dst and tti.isdst: + out.ttinfo_dst = tti + + if out.ttinfo_std and out.ttinfo_dst: + break + else: + if out.ttinfo_dst and not out.ttinfo_std: + out.ttinfo_std = out.ttinfo_dst + + for tti in out.ttinfo_list: + if not tti.isdst: + out.ttinfo_before = tti + break + else: + out.ttinfo_before = out.ttinfo_list[0] + + # Now fix transition times to become relative to wall time. + # + # I'm not sure about this. In my tests, the tz source file + # is setup to wall time, and in the binary file isstd and + # isgmt are off, so it should be in wall time. OTOH, it's + # always in gmt time. Let me know if you have comments + # about this. + lastdst = None + lastoffset = None + lastdstoffset = None + lastbaseoffset = None + out.trans_list = [] + + for i, tti in enumerate(out.trans_idx): + offset = tti.offset + dstoffset = 0 + + if lastdst is not None: + if tti.isdst: + if not lastdst: + dstoffset = offset - lastoffset + + if not dstoffset and lastdstoffset: + dstoffset = lastdstoffset + + tti.dstoffset = datetime.timedelta(seconds=dstoffset) + lastdstoffset = dstoffset + + # If a time zone changes its base offset during a DST transition, + # then you need to adjust by the previous base offset to get the + # transition time in local time. Otherwise you use the current + # base offset. Ideally, I would have some mathematical proof of + # why this is true, but I haven't really thought about it enough. + baseoffset = offset - dstoffset + adjustment = baseoffset + if (lastbaseoffset is not None and baseoffset != lastbaseoffset + and tti.isdst != lastdst): + # The base DST has changed + adjustment = lastbaseoffset + + lastdst = tti.isdst + lastoffset = offset + lastbaseoffset = baseoffset + + out.trans_list.append(out.trans_list_utc[i] + adjustment) + + out.trans_idx = tuple(out.trans_idx) + out.trans_list = tuple(out.trans_list) + out.trans_list_utc = tuple(out.trans_list_utc) + + return out + + def _find_last_transition(self, dt, in_utc=False): + # If there's no list, there are no transitions to find + if not self._trans_list: + return None + + timestamp = _datetime_to_timestamp(dt) + + # Find where the timestamp fits in the transition list - if the + # timestamp is a transition time, it's part of the "after" period. + trans_list = self._trans_list_utc if in_utc else self._trans_list + idx = bisect.bisect_right(trans_list, timestamp) + + # We want to know when the previous transition was, so subtract off 1 + return idx - 1 + + def _get_ttinfo(self, idx): + # For no list or after the last transition, default to _ttinfo_std + if idx is None or (idx + 1) >= len(self._trans_list): + return self._ttinfo_std + + # If there is a list and the time is before it, return _ttinfo_before + if idx < 0: + return self._ttinfo_before + + return self._trans_idx[idx] + + def _find_ttinfo(self, dt): + idx = self._resolve_ambiguous_time(dt) + + return self._get_ttinfo(idx) + + def fromutc(self, dt): + """ + The ``tzfile`` implementation of :py:func:`datetime.tzinfo.fromutc`. + + :param dt: + A :py:class:`datetime.datetime` object. + + :raises TypeError: + Raised if ``dt`` is not a :py:class:`datetime.datetime` object. + + :raises ValueError: + Raised if this is called with a ``dt`` which does not have this + ``tzinfo`` attached. + + :return: + Returns a :py:class:`datetime.datetime` object representing the + wall time in ``self``'s time zone. + """ + # These isinstance checks are in datetime.tzinfo, so we'll preserve + # them, even if we don't care about duck typing. + if not isinstance(dt, datetime.datetime): + raise TypeError("fromutc() requires a datetime argument") + + if dt.tzinfo is not self: + raise ValueError("dt.tzinfo is not self") + + # First treat UTC as wall time and get the transition we're in. + idx = self._find_last_transition(dt, in_utc=True) + tti = self._get_ttinfo(idx) + + dt_out = dt + datetime.timedelta(seconds=tti.offset) + + fold = self.is_ambiguous(dt_out, idx=idx) + + return enfold(dt_out, fold=int(fold)) + + def is_ambiguous(self, dt, idx=None): + """ + Whether or not the "wall time" of a given datetime is ambiguous in this + zone. + + :param dt: + A :py:class:`datetime.datetime`, naive or time zone aware. + + + :return: + Returns ``True`` if ambiguous, ``False`` otherwise. + + .. versionadded:: 2.6.0 + """ + if idx is None: + idx = self._find_last_transition(dt) + + # Calculate the difference in offsets from current to previous + timestamp = _datetime_to_timestamp(dt) + tti = self._get_ttinfo(idx) + + if idx is None or idx <= 0: + return False + + od = self._get_ttinfo(idx - 1).offset - tti.offset + tt = self._trans_list[idx] # Transition time + + return timestamp < tt + od + + def _resolve_ambiguous_time(self, dt): + idx = self._find_last_transition(dt) + + # If we have no transitions, return the index + _fold = self._fold(dt) + if idx is None or idx == 0: + return idx + + # If it's ambiguous and we're in a fold, shift to a different index. + idx_offset = int(not _fold and self.is_ambiguous(dt, idx)) + + return idx - idx_offset + + def utcoffset(self, dt): + if dt is None: + return None + + if not self._ttinfo_std: + return ZERO + + return self._find_ttinfo(dt).delta + + def dst(self, dt): + if dt is None: + return None + + if not self._ttinfo_dst: + return ZERO + + tti = self._find_ttinfo(dt) + + if not tti.isdst: + return ZERO + + # The documentation says that utcoffset()-dst() must + # be constant for every dt. + return tti.dstoffset + + @tzname_in_python2 + def tzname(self, dt): + if not self._ttinfo_std or dt is None: + return None + return self._find_ttinfo(dt).abbr + + def __eq__(self, other): + if not isinstance(other, tzfile): + return NotImplemented + return (self._trans_list == other._trans_list and + self._trans_idx == other._trans_idx and + self._ttinfo_list == other._ttinfo_list) + + __hash__ = None + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, repr(self._filename)) + + def __reduce__(self): + return self.__reduce_ex__(None) + + def __reduce_ex__(self, protocol): + return (self.__class__, (None, self._filename), self.__dict__) + + +class tzrange(tzrangebase): + """ + The ``tzrange`` object is a time zone specified by a set of offsets and + abbreviations, equivalent to the way the ``TZ`` variable can be specified + in POSIX-like systems, but using Python delta objects to specify DST + start, end and offsets. + + :param stdabbr: + The abbreviation for standard time (e.g. ``'EST'``). + + :param stdoffset: + An integer or :class:`datetime.timedelta` object or equivalent + specifying the base offset from UTC. + + If unspecified, +00:00 is used. + + :param dstabbr: + The abbreviation for DST / "Summer" time (e.g. ``'EDT'``). + + If specified, with no other DST information, DST is assumed to occur + and the default behavior or ``dstoffset``, ``start`` and ``end`` is + used. If unspecified and no other DST information is specified, it + is assumed that this zone has no DST. + + If this is unspecified and other DST information is *is* specified, + DST occurs in the zone but the time zone abbreviation is left + unchanged. + + :param dstoffset: + A an integer or :class:`datetime.timedelta` object or equivalent + specifying the UTC offset during DST. If unspecified and any other DST + information is specified, it is assumed to be the STD offset +1 hour. + + :param start: + A :class:`relativedelta.relativedelta` object or equivalent specifying + the time and time of year that daylight savings time starts. To + specify, for example, that DST starts at 2AM on the 2nd Sunday in + March, pass: + + ``relativedelta(hours=2, month=3, day=1, weekday=SU(+2))`` + + If unspecified and any other DST information is specified, the default + value is 2 AM on the first Sunday in April. + + :param end: + A :class:`relativedelta.relativedelta` object or equivalent + representing the time and time of year that daylight savings time + ends, with the same specification method as in ``start``. One note is + that this should point to the first time in the *standard* zone, so if + a transition occurs at 2AM in the DST zone and the clocks are set back + 1 hour to 1AM, set the ``hours`` parameter to +1. + + + **Examples:** + + .. testsetup:: tzrange + + from dateutil.tz import tzrange, tzstr + + .. doctest:: tzrange + + >>> tzstr('EST5EDT') == tzrange("EST", -18000, "EDT") + True + + >>> from dateutil.relativedelta import * + >>> range1 = tzrange("EST", -18000, "EDT") + >>> range2 = tzrange("EST", -18000, "EDT", -14400, + ... relativedelta(hours=+2, month=4, day=1, + ... weekday=SU(+1)), + ... relativedelta(hours=+1, month=10, day=31, + ... weekday=SU(-1))) + >>> tzstr('EST5EDT') == range1 == range2 + True + + """ + def __init__(self, stdabbr, stdoffset=None, + dstabbr=None, dstoffset=None, + start=None, end=None): + + global relativedelta + from dateutil import relativedelta + + self._std_abbr = stdabbr + self._dst_abbr = dstabbr + + try: + stdoffset = stdoffset.total_seconds() + except (TypeError, AttributeError): + pass + + try: + dstoffset = dstoffset.total_seconds() + except (TypeError, AttributeError): + pass + + if stdoffset is not None: + self._std_offset = datetime.timedelta(seconds=stdoffset) + else: + self._std_offset = ZERO + + if dstoffset is not None: + self._dst_offset = datetime.timedelta(seconds=dstoffset) + elif dstabbr and stdoffset is not None: + self._dst_offset = self._std_offset + datetime.timedelta(hours=+1) + else: + self._dst_offset = ZERO + + if dstabbr and start is None: + self._start_delta = relativedelta.relativedelta( + hours=+2, month=4, day=1, weekday=relativedelta.SU(+1)) + else: + self._start_delta = start + + if dstabbr and end is None: + self._end_delta = relativedelta.relativedelta( + hours=+1, month=10, day=31, weekday=relativedelta.SU(-1)) + else: + self._end_delta = end + + self._dst_base_offset_ = self._dst_offset - self._std_offset + self.hasdst = bool(self._start_delta) + + def transitions(self, year): + """ + For a given year, get the DST on and off transition times, expressed + always on the standard time side. For zones with no transitions, this + function returns ``None``. + + :param year: + The year whose transitions you would like to query. + + :return: + Returns a :class:`tuple` of :class:`datetime.datetime` objects, + ``(dston, dstoff)`` for zones with an annual DST transition, or + ``None`` for fixed offset zones. + """ + if not self.hasdst: + return None + + base_year = datetime.datetime(year, 1, 1) + + start = base_year + self._start_delta + end = base_year + self._end_delta + + return (start, end) + + def __eq__(self, other): + if not isinstance(other, tzrange): + return NotImplemented + + return (self._std_abbr == other._std_abbr and + self._dst_abbr == other._dst_abbr and + self._std_offset == other._std_offset and + self._dst_offset == other._dst_offset and + self._start_delta == other._start_delta and + self._end_delta == other._end_delta) + + @property + def _dst_base_offset(self): + return self._dst_base_offset_ + + +@six.add_metaclass(_TzStrFactory) +class tzstr(tzrange): + """ + ``tzstr`` objects are time zone objects specified by a time-zone string as + it would be passed to a ``TZ`` variable on POSIX-style systems (see + the `GNU C Library: TZ Variable`_ for more details). + + There is one notable exception, which is that POSIX-style time zones use an + inverted offset format, so normally ``GMT+3`` would be parsed as an offset + 3 hours *behind* GMT. The ``tzstr`` time zone object will parse this as an + offset 3 hours *ahead* of GMT. If you would like to maintain the POSIX + behavior, pass a ``True`` value to ``posix_offset``. + + The :class:`tzrange` object provides the same functionality, but is + specified using :class:`relativedelta.relativedelta` objects. rather than + strings. + + :param s: + A time zone string in ``TZ`` variable format. This can be a + :class:`bytes` (2.x: :class:`str`), :class:`str` (2.x: + :class:`unicode`) or a stream emitting unicode characters + (e.g. :class:`StringIO`). + + :param posix_offset: + Optional. If set to ``True``, interpret strings such as ``GMT+3`` or + ``UTC+3`` as being 3 hours *behind* UTC rather than ahead, per the + POSIX standard. + + .. caution:: + + Prior to version 2.7.0, this function also supported time zones + in the format: + + * ``EST5EDT,4,0,6,7200,10,0,26,7200,3600`` + * ``EST5EDT,4,1,0,7200,10,-1,0,7200,3600`` + + This format is non-standard and has been deprecated; this function + will raise a :class:`DeprecatedTZFormatWarning` until + support is removed in a future version. + + .. _`GNU C Library: TZ Variable`: + https://www.gnu.org/software/libc/manual/html_node/TZ-Variable.html + """ + def __init__(self, s, posix_offset=False): + global parser + from dateutil.parser import _parser as parser + + self._s = s + + res = parser._parsetz(s) + if res is None or res.any_unused_tokens: + raise ValueError("unknown string format") + + # Here we break the compatibility with the TZ variable handling. + # GMT-3 actually *means* the timezone -3. + if res.stdabbr in ("GMT", "UTC") and not posix_offset: + res.stdoffset *= -1 + + # We must initialize it first, since _delta() needs + # _std_offset and _dst_offset set. Use False in start/end + # to avoid building it two times. + tzrange.__init__(self, res.stdabbr, res.stdoffset, + res.dstabbr, res.dstoffset, + start=False, end=False) + + if not res.dstabbr: + self._start_delta = None + self._end_delta = None + else: + self._start_delta = self._delta(res.start) + if self._start_delta: + self._end_delta = self._delta(res.end, isend=1) + + self.hasdst = bool(self._start_delta) + + def _delta(self, x, isend=0): + from dateutil import relativedelta + kwargs = {} + if x.month is not None: + kwargs["month"] = x.month + if x.weekday is not None: + kwargs["weekday"] = relativedelta.weekday(x.weekday, x.week) + if x.week > 0: + kwargs["day"] = 1 + else: + kwargs["day"] = 31 + elif x.day: + kwargs["day"] = x.day + elif x.yday is not None: + kwargs["yearday"] = x.yday + elif x.jyday is not None: + kwargs["nlyearday"] = x.jyday + if not kwargs: + # Default is to start on first sunday of april, and end + # on last sunday of october. + if not isend: + kwargs["month"] = 4 + kwargs["day"] = 1 + kwargs["weekday"] = relativedelta.SU(+1) + else: + kwargs["month"] = 10 + kwargs["day"] = 31 + kwargs["weekday"] = relativedelta.SU(-1) + if x.time is not None: + kwargs["seconds"] = x.time + else: + # Default is 2AM. + kwargs["seconds"] = 7200 + if isend: + # Convert to standard time, to follow the documented way + # of working with the extra hour. See the documentation + # of the tzinfo class. + delta = self._dst_offset - self._std_offset + kwargs["seconds"] -= delta.seconds + delta.days * 86400 + return relativedelta.relativedelta(**kwargs) + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, repr(self._s)) + + +class _tzicalvtzcomp(object): + def __init__(self, tzoffsetfrom, tzoffsetto, isdst, + tzname=None, rrule=None): + self.tzoffsetfrom = datetime.timedelta(seconds=tzoffsetfrom) + self.tzoffsetto = datetime.timedelta(seconds=tzoffsetto) + self.tzoffsetdiff = self.tzoffsetto - self.tzoffsetfrom + self.isdst = isdst + self.tzname = tzname + self.rrule = rrule + + +class _tzicalvtz(_tzinfo): + def __init__(self, tzid, comps=[]): + super(_tzicalvtz, self).__init__() + + self._tzid = tzid + self._comps = comps + self._cachedate = [] + self._cachecomp = [] + self._cache_lock = _thread.allocate_lock() + + def _find_comp(self, dt): + if len(self._comps) == 1: + return self._comps[0] + + dt = dt.replace(tzinfo=None) + + try: + with self._cache_lock: + return self._cachecomp[self._cachedate.index( + (dt, self._fold(dt)))] + except ValueError: + pass + + lastcompdt = None + lastcomp = None + + for comp in self._comps: + compdt = self._find_compdt(comp, dt) + + if compdt and (not lastcompdt or lastcompdt < compdt): + lastcompdt = compdt + lastcomp = comp + + if not lastcomp: + # RFC says nothing about what to do when a given + # time is before the first onset date. We'll look for the + # first standard component, or the first component, if + # none is found. + for comp in self._comps: + if not comp.isdst: + lastcomp = comp + break + else: + lastcomp = comp[0] + + with self._cache_lock: + self._cachedate.insert(0, (dt, self._fold(dt))) + self._cachecomp.insert(0, lastcomp) + + if len(self._cachedate) > 10: + self._cachedate.pop() + self._cachecomp.pop() + + return lastcomp + + def _find_compdt(self, comp, dt): + if comp.tzoffsetdiff < ZERO and self._fold(dt): + dt -= comp.tzoffsetdiff + + compdt = comp.rrule.before(dt, inc=True) + + return compdt + + def utcoffset(self, dt): + if dt is None: + return None + + return self._find_comp(dt).tzoffsetto + + def dst(self, dt): + comp = self._find_comp(dt) + if comp.isdst: + return comp.tzoffsetdiff + else: + return ZERO + + @tzname_in_python2 + def tzname(self, dt): + return self._find_comp(dt).tzname + + def __repr__(self): + return "" % repr(self._tzid) + + __reduce__ = object.__reduce__ + + +class tzical(object): + """ + This object is designed to parse an iCalendar-style ``VTIMEZONE`` structure + as set out in `RFC 5545`_ Section 4.6.5 into one or more `tzinfo` objects. + + :param `fileobj`: + A file or stream in iCalendar format, which should be UTF-8 encoded + with CRLF endings. + + .. _`RFC 5545`: https://tools.ietf.org/html/rfc5545 + """ + def __init__(self, fileobj): + global rrule + from dateutil import rrule + + if isinstance(fileobj, string_types): + self._s = fileobj + # ical should be encoded in UTF-8 with CRLF + fileobj = open(fileobj, 'r') + else: + self._s = getattr(fileobj, 'name', repr(fileobj)) + fileobj = _nullcontext(fileobj) + + self._vtz = {} + + with fileobj as fobj: + self._parse_rfc(fobj.read()) + + def keys(self): + """ + Retrieves the available time zones as a list. + """ + return list(self._vtz.keys()) + + def get(self, tzid=None): + """ + Retrieve a :py:class:`datetime.tzinfo` object by its ``tzid``. + + :param tzid: + If there is exactly one time zone available, omitting ``tzid`` + or passing :py:const:`None` value returns it. Otherwise a valid + key (which can be retrieved from :func:`keys`) is required. + + :raises ValueError: + Raised if ``tzid`` is not specified but there are either more + or fewer than 1 zone defined. + + :returns: + Returns either a :py:class:`datetime.tzinfo` object representing + the relevant time zone or :py:const:`None` if the ``tzid`` was + not found. + """ + if tzid is None: + if len(self._vtz) == 0: + raise ValueError("no timezones defined") + elif len(self._vtz) > 1: + raise ValueError("more than one timezone available") + tzid = next(iter(self._vtz)) + + return self._vtz.get(tzid) + + def _parse_offset(self, s): + s = s.strip() + if not s: + raise ValueError("empty offset") + if s[0] in ('+', '-'): + signal = (-1, +1)[s[0] == '+'] + s = s[1:] + else: + signal = +1 + if len(s) == 4: + return (int(s[:2]) * 3600 + int(s[2:]) * 60) * signal + elif len(s) == 6: + return (int(s[:2]) * 3600 + int(s[2:4]) * 60 + int(s[4:])) * signal + else: + raise ValueError("invalid offset: " + s) + + def _parse_rfc(self, s): + lines = s.splitlines() + if not lines: + raise ValueError("empty string") + + # Unfold + i = 0 + while i < len(lines): + line = lines[i].rstrip() + if not line: + del lines[i] + elif i > 0 and line[0] == " ": + lines[i-1] += line[1:] + del lines[i] + else: + i += 1 + + tzid = None + comps = [] + invtz = False + comptype = None + for line in lines: + if not line: + continue + name, value = line.split(':', 1) + parms = name.split(';') + if not parms: + raise ValueError("empty property name") + name = parms[0].upper() + parms = parms[1:] + if invtz: + if name == "BEGIN": + if value in ("STANDARD", "DAYLIGHT"): + # Process component + pass + else: + raise ValueError("unknown component: "+value) + comptype = value + founddtstart = False + tzoffsetfrom = None + tzoffsetto = None + rrulelines = [] + tzname = None + elif name == "END": + if value == "VTIMEZONE": + if comptype: + raise ValueError("component not closed: "+comptype) + if not tzid: + raise ValueError("mandatory TZID not found") + if not comps: + raise ValueError( + "at least one component is needed") + # Process vtimezone + self._vtz[tzid] = _tzicalvtz(tzid, comps) + invtz = False + elif value == comptype: + if not founddtstart: + raise ValueError("mandatory DTSTART not found") + if tzoffsetfrom is None: + raise ValueError( + "mandatory TZOFFSETFROM not found") + if tzoffsetto is None: + raise ValueError( + "mandatory TZOFFSETFROM not found") + # Process component + rr = None + if rrulelines: + rr = rrule.rrulestr("\n".join(rrulelines), + compatible=True, + ignoretz=True, + cache=True) + comp = _tzicalvtzcomp(tzoffsetfrom, tzoffsetto, + (comptype == "DAYLIGHT"), + tzname, rr) + comps.append(comp) + comptype = None + else: + raise ValueError("invalid component end: "+value) + elif comptype: + if name == "DTSTART": + # DTSTART in VTIMEZONE takes a subset of valid RRULE + # values under RFC 5545. + for parm in parms: + if parm != 'VALUE=DATE-TIME': + msg = ('Unsupported DTSTART param in ' + + 'VTIMEZONE: ' + parm) + raise ValueError(msg) + rrulelines.append(line) + founddtstart = True + elif name in ("RRULE", "RDATE", "EXRULE", "EXDATE"): + rrulelines.append(line) + elif name == "TZOFFSETFROM": + if parms: + raise ValueError( + "unsupported %s parm: %s " % (name, parms[0])) + tzoffsetfrom = self._parse_offset(value) + elif name == "TZOFFSETTO": + if parms: + raise ValueError( + "unsupported TZOFFSETTO parm: "+parms[0]) + tzoffsetto = self._parse_offset(value) + elif name == "TZNAME": + if parms: + raise ValueError( + "unsupported TZNAME parm: "+parms[0]) + tzname = value + elif name == "COMMENT": + pass + else: + raise ValueError("unsupported property: "+name) + else: + if name == "TZID": + if parms: + raise ValueError( + "unsupported TZID parm: "+parms[0]) + tzid = value + elif name in ("TZURL", "LAST-MODIFIED", "COMMENT"): + pass + else: + raise ValueError("unsupported property: "+name) + elif name == "BEGIN" and value == "VTIMEZONE": + tzid = None + comps = [] + invtz = True + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, repr(self._s)) + + +if sys.platform != "win32": + TZFILES = ["/etc/localtime", "localtime"] + TZPATHS = ["/usr/share/zoneinfo", + "/usr/lib/zoneinfo", + "/usr/share/lib/zoneinfo", + "/etc/zoneinfo"] +else: + TZFILES = [] + TZPATHS = [] + + +def __get_gettz(): + tzlocal_classes = (tzlocal,) + if tzwinlocal is not None: + tzlocal_classes += (tzwinlocal,) + + class GettzFunc(object): + """ + Retrieve a time zone object from a string representation + + This function is intended to retrieve the :py:class:`tzinfo` subclass + that best represents the time zone that would be used if a POSIX + `TZ variable`_ were set to the same value. + + If no argument or an empty string is passed to ``gettz``, local time + is returned: + + .. code-block:: python3 + + >>> gettz() + tzfile('/etc/localtime') + + This function is also the preferred way to map IANA tz database keys + to :class:`tzfile` objects: + + .. code-block:: python3 + + >>> gettz('Pacific/Kiritimati') + tzfile('/usr/share/zoneinfo/Pacific/Kiritimati') + + On Windows, the standard is extended to include the Windows-specific + zone names provided by the operating system: + + .. code-block:: python3 + + >>> gettz('Egypt Standard Time') + tzwin('Egypt Standard Time') + + Passing a GNU ``TZ`` style string time zone specification returns a + :class:`tzstr` object: + + .. code-block:: python3 + + >>> gettz('AEST-10AEDT-11,M10.1.0/2,M4.1.0/3') + tzstr('AEST-10AEDT-11,M10.1.0/2,M4.1.0/3') + + :param name: + A time zone name (IANA, or, on Windows, Windows keys), location of + a ``tzfile(5)`` zoneinfo file or ``TZ`` variable style time zone + specifier. An empty string, no argument or ``None`` is interpreted + as local time. + + :return: + Returns an instance of one of ``dateutil``'s :py:class:`tzinfo` + subclasses. + + .. versionchanged:: 2.7.0 + + After version 2.7.0, any two calls to ``gettz`` using the same + input strings will return the same object: + + .. code-block:: python3 + + >>> tz.gettz('America/Chicago') is tz.gettz('America/Chicago') + True + + In addition to improving performance, this ensures that + `"same zone" semantics`_ are used for datetimes in the same zone. + + + .. _`TZ variable`: + https://www.gnu.org/software/libc/manual/html_node/TZ-Variable.html + + .. _`"same zone" semantics`: + https://blog.ganssle.io/articles/2018/02/aware-datetime-arithmetic.html + """ + def __init__(self): + + self.__instances = weakref.WeakValueDictionary() + self.__strong_cache_size = 8 + self.__strong_cache = OrderedDict() + self._cache_lock = _thread.allocate_lock() + + def __call__(self, name=None): + with self._cache_lock: + rv = self.__instances.get(name, None) + + if rv is None: + rv = self.nocache(name=name) + if not (name is None + or isinstance(rv, tzlocal_classes) + or rv is None): + # tzlocal is slightly more complicated than the other + # time zone providers because it depends on environment + # at construction time, so don't cache that. + # + # We also cannot store weak references to None, so we + # will also not store that. + self.__instances[name] = rv + else: + # No need for strong caching, return immediately + return rv + + self.__strong_cache[name] = self.__strong_cache.pop(name, rv) + + if len(self.__strong_cache) > self.__strong_cache_size: + self.__strong_cache.popitem(last=False) + + return rv + + def set_cache_size(self, size): + with self._cache_lock: + self.__strong_cache_size = size + while len(self.__strong_cache) > size: + self.__strong_cache.popitem(last=False) + + def cache_clear(self): + with self._cache_lock: + self.__instances = weakref.WeakValueDictionary() + self.__strong_cache.clear() + + @staticmethod + def nocache(name=None): + """A non-cached version of gettz""" + tz = None + if not name: + try: + name = os.environ["TZ"] + except KeyError: + pass + if name is None or name in ("", ":"): + for filepath in TZFILES: + if not os.path.isabs(filepath): + filename = filepath + for path in TZPATHS: + filepath = os.path.join(path, filename) + if os.path.isfile(filepath): + break + else: + continue + if os.path.isfile(filepath): + try: + tz = tzfile(filepath) + break + except (IOError, OSError, ValueError): + pass + else: + tz = tzlocal() + else: + try: + if name.startswith(":"): + name = name[1:] + except TypeError as e: + if isinstance(name, bytes): + new_msg = "gettz argument should be str, not bytes" + six.raise_from(TypeError(new_msg), e) + else: + raise + if os.path.isabs(name): + if os.path.isfile(name): + tz = tzfile(name) + else: + tz = None + else: + for path in TZPATHS: + filepath = os.path.join(path, name) + if not os.path.isfile(filepath): + filepath = filepath.replace(' ', '_') + if not os.path.isfile(filepath): + continue + try: + tz = tzfile(filepath) + break + except (IOError, OSError, ValueError): + pass + else: + tz = None + if tzwin is not None: + try: + tz = tzwin(name) + except (WindowsError, UnicodeEncodeError): + # UnicodeEncodeError is for Python 2.7 compat + tz = None + + if not tz: + from dateutil.zoneinfo import get_zonefile_instance + tz = get_zonefile_instance().get(name) + + if not tz: + for c in name: + # name is not a tzstr unless it has at least + # one offset. For short values of "name", an + # explicit for loop seems to be the fastest way + # To determine if a string contains a digit + if c in "0123456789": + try: + tz = tzstr(name) + except ValueError: + pass + break + else: + if name in ("GMT", "UTC"): + tz = UTC + elif name in time.tzname: + tz = tzlocal() + return tz + + return GettzFunc() + + +gettz = __get_gettz() +del __get_gettz + + +def datetime_exists(dt, tz=None): + """ + Given a datetime and a time zone, determine whether or not a given datetime + would fall in a gap. + + :param dt: + A :class:`datetime.datetime` (whose time zone will be ignored if ``tz`` + is provided.) + + :param tz: + A :class:`datetime.tzinfo` with support for the ``fold`` attribute. If + ``None`` or not provided, the datetime's own time zone will be used. + + :return: + Returns a boolean value whether or not the "wall time" exists in + ``tz``. + + .. versionadded:: 2.7.0 + """ + if tz is None: + if dt.tzinfo is None: + raise ValueError('Datetime is naive and no time zone provided.') + tz = dt.tzinfo + + dt = dt.replace(tzinfo=None) + + # This is essentially a test of whether or not the datetime can survive + # a round trip to UTC. + dt_rt = dt.replace(tzinfo=tz).astimezone(UTC).astimezone(tz) + dt_rt = dt_rt.replace(tzinfo=None) + + return dt == dt_rt + + +def datetime_ambiguous(dt, tz=None): + """ + Given a datetime and a time zone, determine whether or not a given datetime + is ambiguous (i.e if there are two times differentiated only by their DST + status). + + :param dt: + A :class:`datetime.datetime` (whose time zone will be ignored if ``tz`` + is provided.) + + :param tz: + A :class:`datetime.tzinfo` with support for the ``fold`` attribute. If + ``None`` or not provided, the datetime's own time zone will be used. + + :return: + Returns a boolean value whether or not the "wall time" is ambiguous in + ``tz``. + + .. versionadded:: 2.6.0 + """ + if tz is None: + if dt.tzinfo is None: + raise ValueError('Datetime is naive and no time zone provided.') + + tz = dt.tzinfo + + # If a time zone defines its own "is_ambiguous" function, we'll use that. + is_ambiguous_fn = getattr(tz, 'is_ambiguous', None) + if is_ambiguous_fn is not None: + try: + return tz.is_ambiguous(dt) + except Exception: + pass + + # If it doesn't come out and tell us it's ambiguous, we'll just check if + # the fold attribute has any effect on this particular date and time. + dt = dt.replace(tzinfo=tz) + wall_0 = enfold(dt, fold=0) + wall_1 = enfold(dt, fold=1) + + same_offset = wall_0.utcoffset() == wall_1.utcoffset() + same_dst = wall_0.dst() == wall_1.dst() + + return not (same_offset and same_dst) + + +def resolve_imaginary(dt): + """ + Given a datetime that may be imaginary, return an existing datetime. + + This function assumes that an imaginary datetime represents what the + wall time would be in a zone had the offset transition not occurred, so + it will always fall forward by the transition's change in offset. + + .. doctest:: + + >>> from dateutil import tz + >>> from datetime import datetime + >>> NYC = tz.gettz('America/New_York') + >>> print(tz.resolve_imaginary(datetime(2017, 3, 12, 2, 30, tzinfo=NYC))) + 2017-03-12 03:30:00-04:00 + + >>> KIR = tz.gettz('Pacific/Kiritimati') + >>> print(tz.resolve_imaginary(datetime(1995, 1, 1, 12, 30, tzinfo=KIR))) + 1995-01-02 12:30:00+14:00 + + As a note, :func:`datetime.astimezone` is guaranteed to produce a valid, + existing datetime, so a round-trip to and from UTC is sufficient to get + an extant datetime, however, this generally "falls back" to an earlier time + rather than falling forward to the STD side (though no guarantees are made + about this behavior). + + :param dt: + A :class:`datetime.datetime` which may or may not exist. + + :return: + Returns an existing :class:`datetime.datetime`. If ``dt`` was not + imaginary, the datetime returned is guaranteed to be the same object + passed to the function. + + .. versionadded:: 2.7.0 + """ + if dt.tzinfo is not None and not datetime_exists(dt): + + curr_offset = (dt + datetime.timedelta(hours=24)).utcoffset() + old_offset = (dt - datetime.timedelta(hours=24)).utcoffset() + + dt += curr_offset - old_offset + + return dt + + +def _datetime_to_timestamp(dt): + """ + Convert a :class:`datetime.datetime` object to an epoch timestamp in + seconds since January 1, 1970, ignoring the time zone. + """ + return (dt.replace(tzinfo=None) - EPOCH).total_seconds() + + +if sys.version_info >= (3, 6): + def _get_supported_offset(second_offset): + return second_offset +else: + def _get_supported_offset(second_offset): + # For python pre-3.6, round to full-minutes if that's not the case. + # Python's datetime doesn't accept sub-minute timezones. Check + # http://python.org/sf/1447945 or https://bugs.python.org/issue5288 + # for some information. + old_offset = second_offset + calculated_offset = 60 * ((second_offset + 30) // 60) + return calculated_offset + + +try: + # Python 3.7 feature + from contextlib import nullcontext as _nullcontext +except ImportError: + class _nullcontext(object): + """ + Class for wrapping contexts so that they are passed through in a + with statement. + """ + def __init__(self, context): + self.context = context + + def __enter__(self): + return self.context + + def __exit__(*args, **kwargs): + pass + +# vim:ts=4:sw=4:et diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/tz/win.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/tz/win.py new file mode 100644 index 0000000000000000000000000000000000000000..cde07ba792c40903f0c334839140173b39fd8124 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/tz/win.py @@ -0,0 +1,370 @@ +# -*- coding: utf-8 -*- +""" +This module provides an interface to the native time zone data on Windows, +including :py:class:`datetime.tzinfo` implementations. + +Attempting to import this module on a non-Windows platform will raise an +:py:obj:`ImportError`. +""" +# This code was originally contributed by Jeffrey Harris. +import datetime +import struct + +from six.moves import winreg +from six import text_type + +try: + import ctypes + from ctypes import wintypes +except ValueError: + # ValueError is raised on non-Windows systems for some horrible reason. + raise ImportError("Running tzwin on non-Windows system") + +from ._common import tzrangebase + +__all__ = ["tzwin", "tzwinlocal", "tzres"] + +ONEWEEK = datetime.timedelta(7) + +TZKEYNAMENT = r"SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones" +TZKEYNAME9X = r"SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones" +TZLOCALKEYNAME = r"SYSTEM\CurrentControlSet\Control\TimeZoneInformation" + + +def _settzkeyname(): + handle = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) + try: + winreg.OpenKey(handle, TZKEYNAMENT).Close() + TZKEYNAME = TZKEYNAMENT + except WindowsError: + TZKEYNAME = TZKEYNAME9X + handle.Close() + return TZKEYNAME + + +TZKEYNAME = _settzkeyname() + + +class tzres(object): + """ + Class for accessing ``tzres.dll``, which contains timezone name related + resources. + + .. versionadded:: 2.5.0 + """ + p_wchar = ctypes.POINTER(wintypes.WCHAR) # Pointer to a wide char + + def __init__(self, tzres_loc='tzres.dll'): + # Load the user32 DLL so we can load strings from tzres + user32 = ctypes.WinDLL('user32') + + # Specify the LoadStringW function + user32.LoadStringW.argtypes = (wintypes.HINSTANCE, + wintypes.UINT, + wintypes.LPWSTR, + ctypes.c_int) + + self.LoadStringW = user32.LoadStringW + self._tzres = ctypes.WinDLL(tzres_loc) + self.tzres_loc = tzres_loc + + def load_name(self, offset): + """ + Load a timezone name from a DLL offset (integer). + + >>> from dateutil.tzwin import tzres + >>> tzr = tzres() + >>> print(tzr.load_name(112)) + 'Eastern Standard Time' + + :param offset: + A positive integer value referring to a string from the tzres dll. + + .. note:: + + Offsets found in the registry are generally of the form + ``@tzres.dll,-114``. The offset in this case is 114, not -114. + + """ + resource = self.p_wchar() + lpBuffer = ctypes.cast(ctypes.byref(resource), wintypes.LPWSTR) + nchar = self.LoadStringW(self._tzres._handle, offset, lpBuffer, 0) + return resource[:nchar] + + def name_from_string(self, tzname_str): + """ + Parse strings as returned from the Windows registry into the time zone + name as defined in the registry. + + >>> from dateutil.tzwin import tzres + >>> tzr = tzres() + >>> print(tzr.name_from_string('@tzres.dll,-251')) + 'Dateline Daylight Time' + >>> print(tzr.name_from_string('Eastern Standard Time')) + 'Eastern Standard Time' + + :param tzname_str: + A timezone name string as returned from a Windows registry key. + + :return: + Returns the localized timezone string from tzres.dll if the string + is of the form `@tzres.dll,-offset`, else returns the input string. + """ + if not tzname_str.startswith('@'): + return tzname_str + + name_splt = tzname_str.split(',-') + try: + offset = int(name_splt[1]) + except: + raise ValueError("Malformed timezone string.") + + return self.load_name(offset) + + +class tzwinbase(tzrangebase): + """tzinfo class based on win32's timezones available in the registry.""" + def __init__(self): + raise NotImplementedError('tzwinbase is an abstract base class') + + def __eq__(self, other): + # Compare on all relevant dimensions, including name. + if not isinstance(other, tzwinbase): + return NotImplemented + + return (self._std_offset == other._std_offset and + self._dst_offset == other._dst_offset and + self._stddayofweek == other._stddayofweek and + self._dstdayofweek == other._dstdayofweek and + self._stdweeknumber == other._stdweeknumber and + self._dstweeknumber == other._dstweeknumber and + self._stdhour == other._stdhour and + self._dsthour == other._dsthour and + self._stdminute == other._stdminute and + self._dstminute == other._dstminute and + self._std_abbr == other._std_abbr and + self._dst_abbr == other._dst_abbr) + + @staticmethod + def list(): + """Return a list of all time zones known to the system.""" + with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle: + with winreg.OpenKey(handle, TZKEYNAME) as tzkey: + result = [winreg.EnumKey(tzkey, i) + for i in range(winreg.QueryInfoKey(tzkey)[0])] + return result + + def display(self): + """ + Return the display name of the time zone. + """ + return self._display + + def transitions(self, year): + """ + For a given year, get the DST on and off transition times, expressed + always on the standard time side. For zones with no transitions, this + function returns ``None``. + + :param year: + The year whose transitions you would like to query. + + :return: + Returns a :class:`tuple` of :class:`datetime.datetime` objects, + ``(dston, dstoff)`` for zones with an annual DST transition, or + ``None`` for fixed offset zones. + """ + + if not self.hasdst: + return None + + dston = picknthweekday(year, self._dstmonth, self._dstdayofweek, + self._dsthour, self._dstminute, + self._dstweeknumber) + + dstoff = picknthweekday(year, self._stdmonth, self._stddayofweek, + self._stdhour, self._stdminute, + self._stdweeknumber) + + # Ambiguous dates default to the STD side + dstoff -= self._dst_base_offset + + return dston, dstoff + + def _get_hasdst(self): + return self._dstmonth != 0 + + @property + def _dst_base_offset(self): + return self._dst_base_offset_ + + +class tzwin(tzwinbase): + """ + Time zone object created from the zone info in the Windows registry + + These are similar to :py:class:`dateutil.tz.tzrange` objects in that + the time zone data is provided in the format of a single offset rule + for either 0 or 2 time zone transitions per year. + + :param: name + The name of a Windows time zone key, e.g. "Eastern Standard Time". + The full list of keys can be retrieved with :func:`tzwin.list`. + """ + + def __init__(self, name): + self._name = name + + with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle: + tzkeyname = text_type("{kn}\\{name}").format(kn=TZKEYNAME, name=name) + with winreg.OpenKey(handle, tzkeyname) as tzkey: + keydict = valuestodict(tzkey) + + self._std_abbr = keydict["Std"] + self._dst_abbr = keydict["Dlt"] + + self._display = keydict["Display"] + + # See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm + tup = struct.unpack("=3l16h", keydict["TZI"]) + stdoffset = -tup[0]-tup[1] # Bias + StandardBias * -1 + dstoffset = stdoffset-tup[2] # + DaylightBias * -1 + self._std_offset = datetime.timedelta(minutes=stdoffset) + self._dst_offset = datetime.timedelta(minutes=dstoffset) + + # for the meaning see the win32 TIME_ZONE_INFORMATION structure docs + # http://msdn.microsoft.com/en-us/library/windows/desktop/ms725481(v=vs.85).aspx + (self._stdmonth, + self._stddayofweek, # Sunday = 0 + self._stdweeknumber, # Last = 5 + self._stdhour, + self._stdminute) = tup[4:9] + + (self._dstmonth, + self._dstdayofweek, # Sunday = 0 + self._dstweeknumber, # Last = 5 + self._dsthour, + self._dstminute) = tup[12:17] + + self._dst_base_offset_ = self._dst_offset - self._std_offset + self.hasdst = self._get_hasdst() + + def __repr__(self): + return "tzwin(%s)" % repr(self._name) + + def __reduce__(self): + return (self.__class__, (self._name,)) + + +class tzwinlocal(tzwinbase): + """ + Class representing the local time zone information in the Windows registry + + While :class:`dateutil.tz.tzlocal` makes system calls (via the :mod:`time` + module) to retrieve time zone information, ``tzwinlocal`` retrieves the + rules directly from the Windows registry and creates an object like + :class:`dateutil.tz.tzwin`. + + Because Windows does not have an equivalent of :func:`time.tzset`, on + Windows, :class:`dateutil.tz.tzlocal` instances will always reflect the + time zone settings *at the time that the process was started*, meaning + changes to the machine's time zone settings during the run of a program + on Windows will **not** be reflected by :class:`dateutil.tz.tzlocal`. + Because ``tzwinlocal`` reads the registry directly, it is unaffected by + this issue. + """ + def __init__(self): + with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle: + with winreg.OpenKey(handle, TZLOCALKEYNAME) as tzlocalkey: + keydict = valuestodict(tzlocalkey) + + self._std_abbr = keydict["StandardName"] + self._dst_abbr = keydict["DaylightName"] + + try: + tzkeyname = text_type('{kn}\\{sn}').format(kn=TZKEYNAME, + sn=self._std_abbr) + with winreg.OpenKey(handle, tzkeyname) as tzkey: + _keydict = valuestodict(tzkey) + self._display = _keydict["Display"] + except OSError: + self._display = None + + stdoffset = -keydict["Bias"]-keydict["StandardBias"] + dstoffset = stdoffset-keydict["DaylightBias"] + + self._std_offset = datetime.timedelta(minutes=stdoffset) + self._dst_offset = datetime.timedelta(minutes=dstoffset) + + # For reasons unclear, in this particular key, the day of week has been + # moved to the END of the SYSTEMTIME structure. + tup = struct.unpack("=8h", keydict["StandardStart"]) + + (self._stdmonth, + self._stdweeknumber, # Last = 5 + self._stdhour, + self._stdminute) = tup[1:5] + + self._stddayofweek = tup[7] + + tup = struct.unpack("=8h", keydict["DaylightStart"]) + + (self._dstmonth, + self._dstweeknumber, # Last = 5 + self._dsthour, + self._dstminute) = tup[1:5] + + self._dstdayofweek = tup[7] + + self._dst_base_offset_ = self._dst_offset - self._std_offset + self.hasdst = self._get_hasdst() + + def __repr__(self): + return "tzwinlocal()" + + def __str__(self): + # str will return the standard name, not the daylight name. + return "tzwinlocal(%s)" % repr(self._std_abbr) + + def __reduce__(self): + return (self.__class__, ()) + + +def picknthweekday(year, month, dayofweek, hour, minute, whichweek): + """ dayofweek == 0 means Sunday, whichweek 5 means last instance """ + first = datetime.datetime(year, month, 1, hour, minute) + + # This will work if dayofweek is ISO weekday (1-7) or Microsoft-style (0-6), + # Because 7 % 7 = 0 + weekdayone = first.replace(day=((dayofweek - first.isoweekday()) % 7) + 1) + wd = weekdayone + ((whichweek - 1) * ONEWEEK) + if (wd.month != month): + wd -= ONEWEEK + + return wd + + +def valuestodict(key): + """Convert a registry key's values to a dictionary.""" + dout = {} + size = winreg.QueryInfoKey(key)[1] + tz_res = None + + for i in range(size): + key_name, value, dtype = winreg.EnumValue(key, i) + if dtype == winreg.REG_DWORD or dtype == winreg.REG_DWORD_LITTLE_ENDIAN: + # If it's a DWORD (32-bit integer), it's stored as unsigned - convert + # that to a proper signed integer + if value & (1 << 31): + value = value - (1 << 32) + elif dtype == winreg.REG_SZ: + # If it's a reference to the tzres DLL, load the actual string + if value.startswith('@tzres'): + tz_res = tz_res or tzres() + value = tz_res.name_from_string(value) + + value = value.rstrip('\x00') # Remove trailing nulls + + dout[key_name] = value + + return dout diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/zoneinfo/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/zoneinfo/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..34f11ad66c88047f2c049a4cdcc937b4b78ea6d6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/dateutil/zoneinfo/__init__.py @@ -0,0 +1,167 @@ +# -*- coding: utf-8 -*- +import warnings +import json + +from tarfile import TarFile +from pkgutil import get_data +from io import BytesIO + +from dateutil.tz import tzfile as _tzfile + +__all__ = ["get_zonefile_instance", "gettz", "gettz_db_metadata"] + +ZONEFILENAME = "dateutil-zoneinfo.tar.gz" +METADATA_FN = 'METADATA' + + +class tzfile(_tzfile): + def __reduce__(self): + return (gettz, (self._filename,)) + + +def getzoneinfofile_stream(): + try: + return BytesIO(get_data(__name__, ZONEFILENAME)) + except IOError as e: # TODO switch to FileNotFoundError? + warnings.warn("I/O error({0}): {1}".format(e.errno, e.strerror)) + return None + + +class ZoneInfoFile(object): + def __init__(self, zonefile_stream=None): + if zonefile_stream is not None: + with TarFile.open(fileobj=zonefile_stream) as tf: + self.zones = {zf.name: tzfile(tf.extractfile(zf), filename=zf.name) + for zf in tf.getmembers() + if zf.isfile() and zf.name != METADATA_FN} + # deal with links: They'll point to their parent object. Less + # waste of memory + links = {zl.name: self.zones[zl.linkname] + for zl in tf.getmembers() if + zl.islnk() or zl.issym()} + self.zones.update(links) + try: + metadata_json = tf.extractfile(tf.getmember(METADATA_FN)) + metadata_str = metadata_json.read().decode('UTF-8') + self.metadata = json.loads(metadata_str) + except KeyError: + # no metadata in tar file + self.metadata = None + else: + self.zones = {} + self.metadata = None + + def get(self, name, default=None): + """ + Wrapper for :func:`ZoneInfoFile.zones.get`. This is a convenience method + for retrieving zones from the zone dictionary. + + :param name: + The name of the zone to retrieve. (Generally IANA zone names) + + :param default: + The value to return in the event of a missing key. + + .. versionadded:: 2.6.0 + + """ + return self.zones.get(name, default) + + +# The current API has gettz as a module function, although in fact it taps into +# a stateful class. So as a workaround for now, without changing the API, we +# will create a new "global" class instance the first time a user requests a +# timezone. Ugly, but adheres to the api. +# +# TODO: Remove after deprecation period. +_CLASS_ZONE_INSTANCE = [] + + +def get_zonefile_instance(new_instance=False): + """ + This is a convenience function which provides a :class:`ZoneInfoFile` + instance using the data provided by the ``dateutil`` package. By default, it + caches a single instance of the ZoneInfoFile object and returns that. + + :param new_instance: + If ``True``, a new instance of :class:`ZoneInfoFile` is instantiated and + used as the cached instance for the next call. Otherwise, new instances + are created only as necessary. + + :return: + Returns a :class:`ZoneInfoFile` object. + + .. versionadded:: 2.6 + """ + if new_instance: + zif = None + else: + zif = getattr(get_zonefile_instance, '_cached_instance', None) + + if zif is None: + zif = ZoneInfoFile(getzoneinfofile_stream()) + + get_zonefile_instance._cached_instance = zif + + return zif + + +def gettz(name): + """ + This retrieves a time zone from the local zoneinfo tarball that is packaged + with dateutil. + + :param name: + An IANA-style time zone name, as found in the zoneinfo file. + + :return: + Returns a :class:`dateutil.tz.tzfile` time zone object. + + .. warning:: + It is generally inadvisable to use this function, and it is only + provided for API compatibility with earlier versions. This is *not* + equivalent to ``dateutil.tz.gettz()``, which selects an appropriate + time zone based on the inputs, favoring system zoneinfo. This is ONLY + for accessing the dateutil-specific zoneinfo (which may be out of + date compared to the system zoneinfo). + + .. deprecated:: 2.6 + If you need to use a specific zoneinfofile over the system zoneinfo, + instantiate a :class:`dateutil.zoneinfo.ZoneInfoFile` object and call + :func:`dateutil.zoneinfo.ZoneInfoFile.get(name)` instead. + + Use :func:`get_zonefile_instance` to retrieve an instance of the + dateutil-provided zoneinfo. + """ + warnings.warn("zoneinfo.gettz() will be removed in future versions, " + "to use the dateutil-provided zoneinfo files, instantiate a " + "ZoneInfoFile object and use ZoneInfoFile.zones.get() " + "instead. See the documentation for details.", + DeprecationWarning) + + if len(_CLASS_ZONE_INSTANCE) == 0: + _CLASS_ZONE_INSTANCE.append(ZoneInfoFile(getzoneinfofile_stream())) + return _CLASS_ZONE_INSTANCE[0].zones.get(name) + + +def gettz_db_metadata(): + """ Get the zonefile metadata + + See `zonefile_metadata`_ + + :returns: + A dictionary with the database metadata + + .. deprecated:: 2.6 + See deprecation warning in :func:`zoneinfo.gettz`. To get metadata, + query the attribute ``zoneinfo.ZoneInfoFile.metadata``. + """ + warnings.warn("zoneinfo.gettz_db_metadata() will be removed in future " + "versions, to use the dateutil-provided zoneinfo files, " + "ZoneInfoFile object and query the 'metadata' attribute " + "instead. See the documentation for details.", + DeprecationWarning) + + if len(_CLASS_ZONE_INSTANCE) == 0: + _CLASS_ZONE_INSTANCE.append(ZoneInfoFile(getzoneinfofile_stream())) + return _CLASS_ZONE_INSTANCE[0].metadata diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/__init__.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cdc2b514c42d5e1f82c470fe4863d897806567c7 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/__init__.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/config.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/config.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e46a7dbe96aa924a5e6915956a006bfd22f295b8 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/config.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/hub.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/hub.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..80d61175e27c3ca1a4b1692e3c2cddd5401c7c0f Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/hub.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/info.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/info.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..58a8b6106655424bd814453d530caf96b1d862f9 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/info.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/inspect.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/inspect.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fbdc399a93d088b67f88afc843770996f7ad2e61 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/inspect.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/loading.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/loading.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f8fd6732389c91a799e563cb4d3677ed92308f29 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/loading.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/module.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/module.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..27a3a96302c0eaf8108ee731fbe1e827149a97f8 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/module.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/naming.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/naming.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a8babe7473b4100009f72f3a9f07ebc4d9cb7754 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/naming.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/saving.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/saving.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cd4fc4b14ab75fd565293ca582c533a6c375884a Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/saving.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/visualization.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/visualization.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..21cb671f060cc0c302e1153205c7cc6864c5c48a Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/__pycache__/visualization.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/commands/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/commands/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/commands/__pycache__/__init__.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/commands/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e6a5bc00960f9e393b9e09ee00aff8e4e47910e0 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/commands/__pycache__/__init__.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/commands/__pycache__/evaluate_cli.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/commands/__pycache__/evaluate_cli.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..46af3c82df59bb658aeec16724a172e4d359ccdc Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/commands/__pycache__/evaluate_cli.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/commands/evaluate_cli.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/commands/evaluate_cli.py new file mode 100644 index 0000000000000000000000000000000000000000..80593c4dfa0f96c8d3ea5ff6131c13c0a94181eb --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/commands/evaluate_cli.py @@ -0,0 +1,137 @@ +import argparse +import os +import subprocess +from pathlib import Path + +from cookiecutter.main import cookiecutter +from huggingface_hub import HfApi, Repository, create_repo + +from evaluate.utils.logging import get_logger + + +logger = get_logger(__name__) + +INSTRUCTIONS = """\ +A new repository for your module "{module_name}" of type "{module_type}" has been created at {output_dir} and pushed to the Hugging Face Hub: {repo_url}. + +Here are the next steps: +- implement the module logic in {module_slug}/{module_slug}.py +- document your module in {module_slug}/README.md +- add test cases for your module in {module_slug}/tests.py +- if your module has any dependencies update them in {module_slug}/requirements.txt + +You can test your module's widget locally by running: + +``` +python {output_dir}/{module_slug}/app.py +``` + +When you are happy with your changes you can push your changes with the following commands to the Hugging Face Hub: + +``` +cd {output_dir}/{module_slug} +git add . +git commit -m "Updating module" +git push +``` + +You should then see the update widget on the Hugging Face Hub: {repo_url} +And you can load your module in Python with the following code: + +``` +from evaluate import load +module = load("{namespace}/{module_slug}") +``` +""" + + +def main(): + parser = argparse.ArgumentParser("HuggingFace Evaluate CLI tool", usage="evaluate-cli []") + subparsers = parser.add_subparsers() + parser_create = subparsers.add_parser("create", help="Create new evaluation module.") + parser_create.add_argument( + "module_name", type=str, help='Pretty name of new evaluation module, e.g. "Recall" or "Exact Match".' + ) + parser_create.add_argument( + "--module_type", + default="metric", + type=str, + help="Type of module, has to be one of [metric|comparison|measurement].", + ) + parser_create.add_argument( + "--dataset_name", default="", type=str, help="Name of dataset if evaluation module is dataset specific." + ) + parser_create.add_argument("--module_description", type=str, help="Short description of evaluation module.") + parser_create.add_argument("--output_dir", default=Path.cwd(), type=str, help="Path to output directory.") + parser_create.add_argument( + "--organization", default=None, type=str, help="Organization on the Hub to push evaluation module to." + ) + parser_create.add_argument("--private", action="store_true", help="Sets evaluation module repository to private.") + args = vars(parser.parse_args()) + + if args["module_type"] not in ["metric", "comparison", "measurement"]: + raise ValueError("The module_type needs to be one of metric, comparison, or measurement") + + if "-" in args["module_name"]: + raise ValueError("Hyphens ('-') are not allowed in module names.") + + output_dir = Path(args["output_dir"]) + organization = args["organization"] + module_slug = args["module_name"].lower().replace(" ", "_") + + if organization is None: + hfapi = HfApi() + namespace = hfapi.whoami()["name"] + else: + namespace = organization + args["namespace"] = namespace + repo_url = f"https://huggingface.co/spaces/{namespace}/{module_slug}" + + try: + create_repo(namespace + "/" + module_slug, repo_type="space", space_sdk="gradio", private=args["private"]) + except Exception as exception: + logger.error( + f"Could not create Space for module at hf.co/spaces/{namespace}/{module_slug}. Make sure this space does not exist already." + ) + raise exception + subprocess.run( + f"git clone {repo_url}".split(), + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + check=True, + encoding="utf-8", + cwd=output_dir, + env=os.environ.copy(), + ) + + repo = Repository( + local_dir=output_dir / module_slug, + ) + + cookiecutter( + "https://github.com/huggingface/evaluate/", + directory="templates", + no_input=True, + extra_context=args, + output_dir=output_dir, + overwrite_if_exists=True, + ) + + repo.git_add() + repo.git_commit("add module default template") + repo.git_push() + + print( + INSTRUCTIONS.format( + module_name=args["module_name"], + module_type=args["module_type"], + module_slug=module_slug, + namespace=namespace, + repo_url=repo_url, + output_dir=output_dir, + ) + ) + + +if __name__ == "__main__": + main() diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluation_suite/__pycache__/__init__.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluation_suite/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..741e0b44a9719986b26c7cd7a246d1e43c475e29 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluation_suite/__pycache__/__init__.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a2fe4be8a1332417fb8515f019c1b7e8c41a58bf --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__init__.py @@ -0,0 +1,140 @@ +# Copyright 2022 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +try: + from transformers.pipelines import SUPPORTED_TASKS as SUPPORTED_PIPELINE_TASKS + from transformers.pipelines import TASK_ALIASES + from transformers.pipelines import check_task as check_pipeline_task + + TRANSFORMERS_AVAILABLE = True +except ImportError: + TRANSFORMERS_AVAILABLE = False + +from typing import Dict, List + +from .audio_classification import AudioClassificationEvaluator +from .automatic_speech_recognition import AutomaticSpeechRecognitionEvaluator +from .base import Evaluator +from .image_classification import ImageClassificationEvaluator +from .question_answering import QuestionAnsweringEvaluator +from .text2text_generation import SummarizationEvaluator, Text2TextGenerationEvaluator, TranslationEvaluator +from .text_classification import TextClassificationEvaluator +from .text_generation import TextGenerationEvaluator +from .token_classification import TokenClassificationEvaluator + + +SUPPORTED_EVALUATOR_TASKS = { + "text-classification": { + "implementation": TextClassificationEvaluator, + "default_metric_name": "accuracy", + }, + "image-classification": { + "implementation": ImageClassificationEvaluator, + "default_metric_name": "accuracy", + }, + "question-answering": { + "implementation": QuestionAnsweringEvaluator, + "default_metric_name": "squad", + }, + "token-classification": { + "implementation": TokenClassificationEvaluator, + "default_metric_name": "seqeval", + }, + "text-generation": { + "implementation": TextGenerationEvaluator, + "default_metric_name": "word_count", + }, + "text2text-generation": { + "implementation": Text2TextGenerationEvaluator, + "default_metric_name": "bleu", + }, + "summarization": { + "implementation": SummarizationEvaluator, + "default_metric_name": "rouge", + }, + "translation": { + "implementation": TranslationEvaluator, + "default_metric_name": "bleu", + }, + "automatic-speech-recognition": { + "implementation": AutomaticSpeechRecognitionEvaluator, + "default_metric_name": "wer", + }, + "audio-classification": { + "implementation": AudioClassificationEvaluator, + "default_metric_name": "accuracy", + }, +} + + +def get_supported_tasks() -> List[str]: + """ + Returns a list of supported task strings. + """ + return list(SUPPORTED_EVALUATOR_TASKS.keys()) + + +def check_task(task: str) -> Dict: + """ + Checks an incoming task string, to validate it's correct and returns the default Evaluator class and default metric + name. It first performs a check to validata that the string is a valid `Pipeline` task, then it checks if it's a + valid `Evaluator` task. `Evaluator` tasks are a substet of `Pipeline` tasks. + Args: + task (`str`): + The task defining which evaluator will be returned. Currently accepted tasks are: + - `"image-classification"` + - `"question-answering"` + - `"text-classification"` (alias `"sentiment-analysis"` available) + - `"token-classification"` + Returns: + task_defaults: `dict`, contains the implementasion class of a give Evaluator and the default metric name. + """ + if task in TASK_ALIASES: + task = TASK_ALIASES[task] + if not check_pipeline_task(task): + raise KeyError(f"Unknown task {task}, available tasks are: {get_supported_tasks()}.") + if task in SUPPORTED_EVALUATOR_TASKS.keys() and task in SUPPORTED_PIPELINE_TASKS.keys(): + return SUPPORTED_EVALUATOR_TASKS[task] + raise KeyError(f"Unknown task {task}, available tasks are: {get_supported_tasks()}.") + + +def evaluator(task: str = None) -> Evaluator: + """ + Utility factory method to build an [`Evaluator`]. + Evaluators encapsulate a task and a default metric name. They leverage `pipeline` functionality from `transformers` + to simplify the evaluation of multiple combinations of models, datasets and metrics for a given task. + Args: + task (`str`): + The task defining which evaluator will be returned. Currently accepted tasks are: + - `"image-classification"`: will return a [`ImageClassificationEvaluator`]. + - `"question-answering"`: will return a [`QuestionAnsweringEvaluator`]. + - `"text-classification"` (alias `"sentiment-analysis"` available): will return a [`TextClassificationEvaluator`]. + - `"token-classification"`: will return a [`TokenClassificationEvaluator`]. + Returns: + [`Evaluator`]: An evaluator suitable for the task. + Examples: + ```python + >>> from evaluate import evaluator + >>> # Sentiment analysis evaluator + >>> evaluator("sentiment-analysis") + ```""" + if not TRANSFORMERS_AVAILABLE: + raise ImportError( + "If you want to use the `Evaluator` you need `transformers`. Run `pip install evaluate[transformers]`." + ) + targeted_task = check_task(task) + evaluator_class = targeted_task["implementation"] + default_metric_name = targeted_task["default_metric_name"] + return evaluator_class(task=task, default_metric_name=default_metric_name) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/__init__.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9b1b0e2778103f2a2a1ae17b2a410e715558a109 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/__init__.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/audio_classification.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/audio_classification.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..35434073a6bef92f08c92e82257326f444c1591c Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/audio_classification.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/automatic_speech_recognition.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/automatic_speech_recognition.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..743e61e7496e2750df2372eb19e7d63dd4b98f3b Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/automatic_speech_recognition.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/base.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/base.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c2fcad129f775a0151604878ac51b8e7ae8b8c6d Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/base.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/image_classification.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/image_classification.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e7980856c6359950b7103a7483258b850b9e134a Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/image_classification.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/question_answering.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/question_answering.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..33fffff87e96881c4aeecac5b0e83da7a9b59133 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/question_answering.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/text2text_generation.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/text2text_generation.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..250b36c7e3a99a1764deed1649c65d9678b5f3f9 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/text2text_generation.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/text_classification.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/text_classification.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c15e7ae7635af87d6c26aa94a9235dd2070d3f9c Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/text_classification.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/text_generation.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/text_generation.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..417e710bb53b6378d9853b53ef355d613638fb25 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/text_generation.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/token_classification.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/token_classification.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ee5ef724e27c0b7cd3322ce6722b55f1ee0a9fee Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/token_classification.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/utils.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9a43a5e184583b68702dcb1327848b7b1e12adb2 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/__pycache__/utils.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/audio_classification.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/audio_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..685fb9fd8515f8506b89e9375948fea181f79a8f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/audio_classification.py @@ -0,0 +1,151 @@ +# Copyright 2022 The HuggingFace Evaluate Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from numbers import Number +from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Union + +from datasets import Dataset +from typing_extensions import Literal + +from ..module import EvaluationModule +from ..utils.file_utils import add_end_docstrings, add_start_docstrings +from .base import EVALUATOR_COMPUTE_RETURN_DOCSTRING, EVALUTOR_COMPUTE_START_DOCSTRING, Evaluator + + +if TYPE_CHECKING: + from transformers import FeatureExtractionMixin, Pipeline, PreTrainedModel, PreTrainedTokenizer, TFPreTrainedModel + + +TASK_DOCUMENTATION = r""" + Examples: + + + + Remember that, in order to process audio files, you need ffmpeg installed (https://ffmpeg.org/download.html) + + + + ```python + >>> from evaluate import evaluator + >>> from datasets import load_dataset + + >>> task_evaluator = evaluator("audio-classification") + >>> data = load_dataset("superb", 'ks', split="test[:40]") + >>> results = task_evaluator.compute( + >>> model_or_pipeline=""superb/wav2vec2-base-superb-ks"", + >>> data=data, + >>> label_column="label", + >>> input_column="file", + >>> metric="accuracy", + >>> label_mapping={0: "yes", 1: "no", 2: "up", 3: "down"} + >>> ) + ``` + + + + The evaluator supports raw audio data as well, in the form of a numpy array. However, be aware that calling + the audio column automatically decodes and resamples the audio files, which can be slow for large datasets. + + + + ```python + >>> from evaluate import evaluator + >>> from datasets import load_dataset + + >>> task_evaluator = evaluator("audio-classification") + >>> data = load_dataset("superb", 'ks', split="test[:40]") + >>> data = data.map(lambda example: {"audio": example["audio"]["array"]}) + >>> results = task_evaluator.compute( + >>> model_or_pipeline=""superb/wav2vec2-base-superb-ks"", + >>> data=data, + >>> label_column="label", + >>> input_column="audio", + >>> metric="accuracy", + >>> label_mapping={0: "yes", 1: "no", 2: "up", 3: "down"} + >>> ) + ``` +""" + + +class AudioClassificationEvaluator(Evaluator): + """ + Audio classification evaluator. + This audio classification evaluator can currently be loaded from [`evaluator`] using the default task name + `audio-classification`. + Methods in this class assume a data format compatible with the [`transformers.AudioClassificationPipeline`]. + """ + + PIPELINE_KWARGS = {} + + def __init__(self, task="audio-classification", default_metric_name=None): + super().__init__(task, default_metric_name=default_metric_name) + + def predictions_processor(self, predictions, label_mapping): + pred_label = [max(pred, key=lambda x: x["score"])["label"] for pred in predictions] + pred_label = [label_mapping[pred] if label_mapping is not None else pred for pred in pred_label] + + return {"predictions": pred_label} + + @add_start_docstrings(EVALUTOR_COMPUTE_START_DOCSTRING) + @add_end_docstrings(EVALUATOR_COMPUTE_RETURN_DOCSTRING, TASK_DOCUMENTATION) + def compute( + self, + model_or_pipeline: Union[ + str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel" # noqa: F821 + ] = None, + data: Union[str, Dataset] = None, + subset: Optional[str] = None, + split: Optional[str] = None, + metric: Union[str, EvaluationModule] = None, + tokenizer: Optional[Union[str, "PreTrainedTokenizer"]] = None, # noqa: F821 + feature_extractor: Optional[Union[str, "FeatureExtractionMixin"]] = None, # noqa: F821 + strategy: Literal["simple", "bootstrap"] = "simple", + confidence_level: float = 0.95, + n_resamples: int = 9999, + device: int = None, + random_state: Optional[int] = None, + input_column: str = "file", + label_column: str = "label", + label_mapping: Optional[Dict[str, Number]] = None, + ) -> Tuple[Dict[str, float], Any]: + + """ + input_column (`str`, defaults to `"file"`): + The name of the column containing either the audio files or a raw waveform, represented as a numpy array, in the dataset specified by `data`. + label_column (`str`, defaults to `"label"`): + The name of the column containing the labels in the dataset specified by `data`. + label_mapping (`Dict[str, Number]`, *optional*, defaults to `None`): + We want to map class labels defined by the model in the pipeline to values consistent with those + defined in the `label_column` of the `data` dataset. + """ + + result = super().compute( + model_or_pipeline=model_or_pipeline, + data=data, + subset=subset, + split=split, + metric=metric, + tokenizer=tokenizer, + feature_extractor=feature_extractor, + strategy=strategy, + confidence_level=confidence_level, + n_resamples=n_resamples, + device=device, + random_state=random_state, + input_column=input_column, + label_column=label_column, + label_mapping=label_mapping, + ) + + return result diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/automatic_speech_recognition.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/automatic_speech_recognition.py new file mode 100644 index 0000000000000000000000000000000000000000..ee423826cdd7bac384080b3db8a369cc59a53283 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/automatic_speech_recognition.py @@ -0,0 +1,112 @@ +# Copyright 2022 The HuggingFace Evaluate Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Union + +from datasets import Dataset +from typing_extensions import Literal + +from ..module import EvaluationModule +from ..utils.file_utils import add_end_docstrings, add_start_docstrings +from .base import EVALUATOR_COMPUTE_RETURN_DOCSTRING, EVALUTOR_COMPUTE_START_DOCSTRING, Evaluator + + +if TYPE_CHECKING: + from transformers import Pipeline, PreTrainedModel, PreTrainedTokenizer, TFPreTrainedModel + + +TASK_DOCUMENTATION = r""" + Examples: + ```python + >>> from evaluate import evaluator + >>> from datasets import load_dataset + >>> task_evaluator = evaluator("automatic-speech-recognition") + >>> data = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="validation[:40]") + >>> results = task_evaluator.compute( + >>> model_or_pipeline="https://huggingface.co/openai/whisper-tiny.en", + >>> data=data, + >>> input_column="path", + >>> label_column="sentence", + >>> metric="wer", + >>> ) + ``` +""" + + +class AutomaticSpeechRecognitionEvaluator(Evaluator): + """ + Automatic speech recognition evaluator. + This automatic speech recognition evaluator can currently be loaded from [`evaluator`] using the default task name + `automatic-speech-recognition`. + Methods in this class assume a data format compatible with the [`AutomaticSpeechRecognitionPipeline`]. + """ + + PIPELINE_KWARGS = {"truncation": True} + + def __init__(self, task="automatic-speech-recognition", default_metric_name=None): + super().__init__(task, default_metric_name=default_metric_name) + + def predictions_processor(self, predictions, label_mapping): + return {"predictions": [pred["text"] for pred in predictions]} + + @add_start_docstrings(EVALUTOR_COMPUTE_START_DOCSTRING) + @add_end_docstrings(EVALUATOR_COMPUTE_RETURN_DOCSTRING, TASK_DOCUMENTATION) + def compute( + self, + model_or_pipeline: Union[ + str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel" # noqa: F821 + ] = None, + data: Union[str, Dataset] = None, + subset: Optional[str] = None, + split: Optional[str] = None, + metric: Union[str, EvaluationModule] = None, + tokenizer: Optional[Union[str, "PreTrainedTokenizer"]] = None, # noqa: F821 + strategy: Literal["simple", "bootstrap"] = "simple", + confidence_level: float = 0.95, + n_resamples: int = 9999, + device: int = None, + random_state: Optional[int] = None, + input_column: str = "path", + label_column: str = "sentence", + generation_kwargs: dict = None, + ) -> Tuple[Dict[str, float], Any]: + """ + input_column (`str`, defaults to `"path"`): + the name of the column containing the input audio path in the dataset specified by `data`. + label_column (`str`, defaults to `"sentence"`): + the name of the column containing the labels in the dataset specified by `data`. + generation_kwargs (`Dict`, *optional*, defaults to `None`): + The generation kwargs are passed to the pipeline and set the text generation strategy. + """ + + if generation_kwargs is not None: + self.PIPELINE_KWARGS.update(generation_kwargs) + + result = super().compute( + model_or_pipeline=model_or_pipeline, + data=data, + subset=subset, + split=split, + metric=metric, + tokenizer=tokenizer, + strategy=strategy, + confidence_level=confidence_level, + n_resamples=n_resamples, + device=device, + random_state=random_state, + input_column=input_column, + label_column=label_column, + ) + + return result diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/base.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/base.py new file mode 100644 index 0000000000000000000000000000000000000000..09de31f19dedda5dbe38ce0295d8e5b0d962f389 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/base.py @@ -0,0 +1,544 @@ +# Copyright 2022 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from abc import ABC, abstractmethod +from numbers import Number +from typing import Any, Callable, Dict, List, Optional, Union + +# Lint as: python3 +from datasets import Dataset, load_dataset + +from evaluate.evaluator.utils import choose_split + + +try: + from scipy.stats import bootstrap + + SCIPY_AVAILABLE = True +except ImportError: + SCIPY_AVAILABLE = False + +try: + import transformers + from transformers import Pipeline, pipeline + + TRANSFORMERS_AVAILABLE = True +except ImportError: + TRANSFORMERS_AVAILABLE = False + +from time import perf_counter + +from typing_extensions import Literal + +from ..loading import load +from ..module import EvaluationModule +from ..utils.logging import get_logger +from .utils import DatasetColumn + + +logger = get_logger(__name__) + + +EVALUTOR_COMPUTE_START_DOCSTRING = r""" + Compute the metric for a given pipeline and dataset combination. + Args: + model_or_pipeline (`str` or `Pipeline` or `Callable` or `PreTrainedModel` or `TFPreTrainedModel`, defaults to `None`): + If the argument in not specified, we initialize the default pipeline for the task (in this case + `text-classification` or its alias - `sentiment-analysis`). If the argument is of the type `str` or + is a model instance, we use it to initialize a new `Pipeline` with the given model. Otherwise we assume the + argument specifies a pre-initialized pipeline. + data (`str` or `Dataset`, defaults to `None`): + Specifies the dataset we will run evaluation on. If it is of type `str`, we treat it as the dataset + name, and load it. Otherwise we assume it represents a pre-loaded dataset. + subset (`str`, defaults to `None`): + Defines which dataset subset to load. If `None` is passed the default subset is loaded. + split (`str`, defaults to `None`): + Defines which dataset split to load. If `None` is passed, infers based on the `choose_split` function. + metric (`str` or `EvaluationModule`, defaults to `None`): + Specifies the metric we use in evaluator. If it is of type `str`, we treat it as the metric name, and + load it. Otherwise we assume it represents a pre-loaded metric. + tokenizer (`str` or `PreTrainedTokenizer`, *optional*, defaults to `None`): + Argument can be used to overwrite a default tokenizer if `model_or_pipeline` represents a model for + which we build a pipeline. If `model_or_pipeline` is `None` or a pre-initialized pipeline, we ignore + this argument. + strategy (`Literal["simple", "bootstrap"]`, defaults to "simple"): + specifies the evaluation strategy. Possible values are: + - `"simple"` - we evaluate the metric and return the scores. + - `"bootstrap"` - on top of computing the metric scores, we calculate the confidence interval for each + of the returned metric keys, using `scipy`'s `bootstrap` method + https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.bootstrap.html. + confidence_level (`float`, defaults to `0.95`): + The `confidence_level` value passed to `bootstrap` if `"bootstrap"` strategy is chosen. + n_resamples (`int`, defaults to `9999`): + The `n_resamples` value passed to `bootstrap` if `"bootstrap"` strategy is chosen. + device (`int`, defaults to `None`): + Device ordinal for CPU/GPU support of the pipeline. Setting this to -1 will leverage CPU, a positive + integer will run the model on the associated CUDA device ID. If `None` is provided it will be inferred and + CUDA:0 used if available, CPU otherwise. + random_state (`int`, *optional*, defaults to `None`): + The `random_state` value passed to `bootstrap` if `"bootstrap"` strategy is chosen. Useful for + debugging. +""" + +EVALUATOR_COMPUTE_RETURN_DOCSTRING = r""" + Return: + A `Dict`. The keys represent metric keys calculated for the `metric` spefied in function arguments. For the + `"simple"` strategy, the value is the metric score. For the `"bootstrap"` strategy, the value is a `Dict` + containing the score, the confidence interval and the standard error calculated for each metric key. +""" + + +class Evaluator(ABC): + """ + The [`Evaluator`] class is the class from which all evaluators inherit. Refer to this class for methods shared across + different evaluators. + Base class implementing evaluator operations. + """ + + PIPELINE_KWARGS = {} + METRIC_KWARGS = {} + + def __init__(self, task: str, default_metric_name: str = None): + if not TRANSFORMERS_AVAILABLE: + raise ImportError( + "If you want to use the `Evaluator` you need `transformers`. Run `pip install evaluate[evaluator]`." + ) + if not SCIPY_AVAILABLE: + raise ImportError( + "If you want to use the `Evaluator` you need `scipy>=1.7.1`. Run `pip install evaluate[evaluator]`." + ) + self.task = task + self.default_metric_name = default_metric_name + + @staticmethod + def _compute_confidence_interval( + metric, + metric_inputs, + metric_keys: List[str], + confidence_level: float = 0.95, + n_resamples: int = 9999, + random_state: Optional[int] = None, + ) -> Dict[str, Any]: + """ + A utility function enabling the confidence interval calculation for metrics computed + by the evaluator based on `scipy`'s `bootstrap` method. + """ + + # bootstrap only works with functions that use args and no kwargs + def build_args_metric(metric, key, **kwargs): + def args_metric(*args): + return metric.compute(**{k: v for k, v in zip(kwargs.keys(), args)})[key] + + return args_metric + + bootstrap_dict = {} + for key in metric_keys: + bs = bootstrap( + data=list(metric_inputs.values()), + statistic=build_args_metric(metric, key, **metric_inputs), + paired=True, + vectorized=False, + confidence_level=confidence_level, + n_resamples=n_resamples, + random_state=random_state, + ) + bootstrap_dict[key] = { + "confidence_interval": (bs.confidence_interval.low, bs.confidence_interval.high), + "standard_error": bs.standard_error, + } + return bootstrap_dict + + @staticmethod + def _compute_time_perf(start_time: float, end_time: float, num_samples: int) -> Dict[str, Any]: + """ + A utility function computing time performance metrics: + - `total_time_in_seconds` - pipeline inference runtime for the evaluation data in seconds, + - `samples_per_second` - pipeline throughput in the number of samples per second. + - `latency_in_seconds` - pipeline inference runtime for the evaluation data in seconds per sample, + + """ + latency = end_time - start_time + throughput = num_samples / latency + latency_sample = 1.0 / throughput + + return { + "total_time_in_seconds": latency, + "samples_per_second": throughput, + "latency_in_seconds": latency_sample, + } + + @staticmethod + def _infer_device() -> int: + """Helper function to check if GPU or CPU is available for inference.""" + # try infer with torch first + try: + import torch + + if torch.cuda.is_available(): + device = 0 # first GPU + else: + device = -1 # CPU + except ImportError: + # if not available try TF + try: + import tensorflow as tf + + if len(tf.config.list_physical_devices("GPU")) > 0: + device = 0 # first GPU + else: + device = -1 # CPU + except ImportError: + device = -1 + + if device == -1: + logger.info("No GPU found. The default device for pipeline inference is set to CPU.") + else: + logger.info("GPU found. The default device for pipeline inference is set to GPU (CUDA:0).") + + return device + + @abstractmethod + def predictions_processor(self, *args, **kwargs): + """ + A core method of the `Evaluator` class, which processes the pipeline outputs for compatibility with the metric. + """ + raise NotImplementedError() + + def compute( + self, + model_or_pipeline: Union[ + str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel" # noqa: F821 + ] = None, + data: Union[str, Dataset] = None, + subset: Optional[str] = None, + split: Optional[str] = None, + metric: Union[str, EvaluationModule] = None, + tokenizer: Optional[Union[str, "PreTrainedTokenizer"]] = None, # noqa: F821 + feature_extractor: Optional[Union[str, "FeatureExtractionMixin"]] = None, # noqa: F821 + strategy: Literal["simple", "bootstrap"] = "simple", + confidence_level: float = 0.95, + n_resamples: int = 9999, + device: int = None, + random_state: Optional[int] = None, + input_column: str = "text", + label_column: str = "label", + label_mapping: Optional[Dict[str, Number]] = None, + ) -> Dict[str, float]: + + result = {} + + self.check_for_mismatch_in_device_setup(device, model_or_pipeline) + + # Prepare inputs + data = self.load_data(data=data, subset=subset, split=split) + metric_inputs, pipe_inputs = self.prepare_data(data=data, input_column=input_column, label_column=label_column) + pipe = self.prepare_pipeline( + model_or_pipeline=model_or_pipeline, + tokenizer=tokenizer, + feature_extractor=feature_extractor, + device=device, + ) + metric = self.prepare_metric(metric) + + # Compute predictions + predictions, perf_results = self.call_pipeline(pipe, pipe_inputs) + predictions = self.predictions_processor(predictions, label_mapping) + + metric_inputs.update(predictions) + + # Compute metrics from references and predictions + metric_results = self.compute_metric( + metric=metric, + metric_inputs=metric_inputs, + strategy=strategy, + confidence_level=confidence_level, + n_resamples=n_resamples, + random_state=random_state, + ) + + # TODO: To clarify why `wer` and `cer` return float + # even though metric.compute contract says that it + # returns Optional[dict]. + if type(metric_results) is float: + metric_results = {metric.name: metric_results} + + result.update(metric_results) + result.update(perf_results) + + return result + + @staticmethod + def check_for_mismatch_in_device_setup(device, model_or_pipeline): + if device is not None and device != -1 and isinstance(model_or_pipeline, Pipeline): + if model_or_pipeline.device.type == "cpu": + raise ValueError( + "The value of the `device` kwarg passed to `compute` suggests that this pipe should be run on an " + "accelerator, but the pipe was instantiated on CPU. Pass `device` to the pipeline during " + "initialization to use an accelerator, or pass `device=None` to `compute`. " + ) + elif device != model_or_pipeline.device.index: + raise ValueError( + f"This pipeline was instantiated on device {model_or_pipeline.device.index} but device={device} was passed to `compute`." + ) + + def check_required_columns(self, data: Union[str, Dataset], columns_names: Dict[str, str]): + """ + Ensure the columns required for the evaluation are present in the dataset. + + Args: + data (`str` or [`Dataset`]): + Specifies the dataset we will run evaluation on. + columns_names (`List[str]`): + List of column names to check in the dataset. The keys are the arguments to the [`evaluate.EvaluationModule.compute`] method, + while the values are the column names to check. + + Example: + + ```py + >>> from datasets import load_dataset + >>> from evaluate import evaluator + >>> data = load_dataset("rotten_tomatoes', split="train") + >>> evaluator.check_required_columns(data, {"input_column": "text", "label_column": "label"}) + ``` + """ + for input_name, column_name in columns_names.items(): + if column_name not in data.column_names: + raise ValueError( + f"Invalid `{input_name}` {column_name} specified. The dataset contains the following columns: {data.column_names}." + ) + + @staticmethod + def get_dataset_split(data, subset=None, split=None): + """ + Infers which split to use if `None` is given. + + Args: + data (`str`): + Name of dataset. + subset (`str`): + Name of config for datasets with multiple configurations (e.g. 'glue/cola'). + split (`str`, defaults to `None`): + Split to use. + Returns: + `split`: `str` containing which split to use + + Example: + + ```py + >>> from evaluate import evaluator + >>> evaluator("text-classification").get_dataset_split(data="rotten_tomatoes") + WARNING:evaluate.evaluator.base:Dataset split not defined! Automatically evaluating with split: TEST + 'test' + ``` + """ + if split is None: + split = choose_split(data, subset) + logger.warning(f"Dataset split not defined! Automatically evaluating with split: {split.upper()}") + return split + + def load_data(self, data: Union[str, Dataset], subset: str = None, split: str = None): + """ + Load dataset with given subset and split. + Args: + data ([`Dataset`] or `str`, defaults to `None`): + Specifies the dataset we will run evaluation on. If it is of + type `str`, we treat it as the dataset name, and load it. Otherwise we assume it represents a pre-loaded dataset. + subset (`str`, defaults to `None`): + Specifies dataset subset to be passed to `name` in `load_dataset`. To be + used with datasets with several configurations (e.g. glue/sst2). + split (`str`, defaults to `None`): + User-defined dataset split by name (e.g. train, validation, test). Supports slice-split (`test[:n]`). + If not defined and data is a `str` type, will automatically select the best one via `choose_split()`. + Returns: + data ([`Dataset`]): Loaded dataset which will be used for evaluation. + + Example: + + ```py + >>> from evaluate import evaluator + >>> evaluator("text-classification").load_data(data="rotten_tomatoes", split="train") + Dataset({ + features: ['text', 'label'], + num_rows: 8530 + }) + ``` + """ + if isinstance(data, str): + split = self.get_dataset_split(data, subset, split) + data = load_dataset(data, name=subset, split=split) + return data + elif isinstance(data, Dataset): + if split is not None or subset is not None: + logger.warning("`data` is a preloaded Dataset! Ignoring `subset` and `split`.") + return data + else: + raise ValueError( + "Please specify a valid `data` object - either a `str` with a name or a `Dataset` object." + ) + + def prepare_data(self, data: Dataset, input_column: str, label_column: str, *args, **kwargs): + """ + Prepare data. + + Args: + data ([`Dataset`]): + Specifies the dataset we will run evaluation on. + input_column (`str`, defaults to `"text"`): + The name of the column containing the text feature in the dataset specified by `data`. + second_input_column(`str`, *optional*): + The name of the column containing the second text feature if there is one. Otherwise, set to `None`. + label_column (`str`, defaults to `"label"`): + The name of the column containing the labels in the dataset specified by `data`. + Returns: + `dict`: metric inputs. + `list`: pipeline inputs. + + Example: + + ```py + >>> from evaluate import evaluator + >>> from datasets import load_dataset + + >>> ds = load_dataset("rotten_tomatoes", split="train") + >>> evaluator("text-classification").prepare_data(ds, input_column="text", second_input_column=None, label_column="label") + ``` + """ + + self.check_required_columns(data, {"input_column": input_column, "label_column": label_column}) + + return {"references": data[label_column]}, DatasetColumn(data, input_column) + + def prepare_pipeline( + self, + model_or_pipeline: Union[str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel"], # noqa: F821 + tokenizer: Union["PreTrainedTokenizerBase", "FeatureExtractionMixin"] = None, # noqa: F821 + feature_extractor: Union["PreTrainedTokenizerBase", "FeatureExtractionMixin"] = None, # noqa: F821 + device: int = None, + ): + """ + Prepare pipeline. + + Args: + model_or_pipeline (`str` or [`~transformers.Pipeline`] or `Callable` or [`~transformers.PreTrainedModel`] or [`~transformers.TFPreTrainedModel`], defaults to `None`): + If the argument in not specified, we initialize the default pipeline for the task. If the argument is of the type `str` or + is a model instance, we use it to initialize a new [`~transformers.Pipeline`] with the given model. Otherwise we assume the + argument specifies a pre-initialized pipeline. + preprocessor ([`~transformers.PreTrainedTokenizerBase`] or [`~transformers.FeatureExtractionMixin`], *optional*, defaults to `None`): + Argument can be used to overwrite a default preprocessor if `model_or_pipeline` represents a model for + which we build a pipeline. If `model_or_pipeline` is `None` or a pre-initialized pipeline, we ignore + this argument. + Returns: + The initialized pipeline. + + Example: + + ```py + >>> from evaluate import evaluator + >>> evaluator("text-classification").prepare_pipeline(model_or_pipeline="distilbert-base-uncased") + ``` + """ + + if device is None: + device = self._infer_device() + + if ( + isinstance(model_or_pipeline, str) + or isinstance(model_or_pipeline, transformers.PreTrainedModel) + or isinstance(model_or_pipeline, transformers.TFPreTrainedModel) + ): + pipe = pipeline( + self.task, + model=model_or_pipeline, + tokenizer=tokenizer, + feature_extractor=feature_extractor, + device=device, + ) + else: + if model_or_pipeline is None: + pipe = pipeline(self.task, device=device) + else: + pipe = model_or_pipeline + if tokenizer is not None and feature_extractor is not None: + logger.warning("Ignoring the value of the preprocessor argument (`tokenizer` or `feature_extractor`).") + if (pipe.task != self.task) and not (self.task == "translation" and pipe.task.startswith("translation")): + raise ValueError( + f"Incompatible `model_or_pipeline`. Please specify `model_or_pipeline` compatible with the `{self.task}` task." + ) + return pipe + + def prepare_metric(self, metric: Union[str, EvaluationModule]): + """ + Prepare metric. + + Args: + metric (`str` or [`EvaluationModule`], defaults to `None`): + Specifies the metric we use in evaluator. If it is of type `str`, we treat it as the metric name, and + load it. Otherwise we assume it represents a pre-loaded metric. + + Returns: + The loaded metric. + + Example: + + ```py + >>> from evaluate import evaluator + >>> evaluator("text-classification").prepare_metric("accuracy") + ``` + """ + # Prepare metric. + if metric is None: + if self.default_metric_name is None: + raise ValueError( + "`Evaluator` doesn't specify a default metric. Please specify a valid `metric` argument." + ) + metric = load(self.default_metric_name) + elif isinstance(metric, str): + metric = load(metric) + + return metric + + def call_pipeline(self, pipe, *args, **kwargs): + start_time = perf_counter() + pipe_output = pipe(*args, **kwargs, **self.PIPELINE_KWARGS) + end_time = perf_counter() + return pipe_output, self._compute_time_perf(start_time, end_time, len(pipe_output)) + + def compute_metric( + self, + metric: EvaluationModule, + metric_inputs: Dict, + strategy: Literal["simple", "bootstrap"] = "simple", + confidence_level: float = 0.95, + n_resamples: int = 9999, + random_state: Optional[int] = None, + ): + """Compute and return metrics.""" + result = metric.compute(**metric_inputs, **self.METRIC_KWARGS) + + if strategy == "bootstrap": + metric_keys = result.keys() + bootstrap_dict = self._compute_confidence_interval( + metric, + metric_inputs, + metric_keys, + confidence_level, + n_resamples, + random_state, + ) + for key in metric_keys: + bootstrap_dict[key]["score"] = result[key] + + return bootstrap_dict + + return result diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/image_classification.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/image_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..82831458bb8789ce9c9418d6c19d4af4ba5b35a2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/image_classification.py @@ -0,0 +1,119 @@ +# Copyright 2022 The HuggingFace Evaluate Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from numbers import Number +from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Union + +from datasets import Dataset +from typing_extensions import Literal + +from ..module import EvaluationModule +from ..utils.file_utils import add_end_docstrings, add_start_docstrings +from .base import EVALUATOR_COMPUTE_RETURN_DOCSTRING, EVALUTOR_COMPUTE_START_DOCSTRING, Evaluator + + +if TYPE_CHECKING: + from transformers import FeatureExtractionMixin, Pipeline, PreTrainedModel, PreTrainedTokenizer, TFPreTrainedModel + + +TASK_DOCUMENTATION = r""" + Examples: + ```python + >>> from evaluate import evaluator + >>> from datasets import load_dataset + >>> task_evaluator = evaluator("image-classification") + >>> data = load_dataset("beans", split="test[:40]") + >>> results = task_evaluator.compute( + >>> model_or_pipeline="nateraw/vit-base-beans", + >>> data=data, + >>> label_column="labels", + >>> metric="accuracy", + >>> label_mapping={'angular_leaf_spot': 0, 'bean_rust': 1, 'healthy': 2}, + >>> strategy="bootstrap" + >>> ) + ``` +""" + + +class ImageClassificationEvaluator(Evaluator): + """ + Image classification evaluator. + This image classification evaluator can currently be loaded from [`evaluator`] using the default task name + `image-classification`. + Methods in this class assume a data format compatible with the [`ImageClassificationPipeline`]. + """ + + PIPELINE_KWARGS = {} + + def __init__(self, task="image-classification", default_metric_name=None): + super().__init__(task, default_metric_name=default_metric_name) + + def predictions_processor(self, predictions, label_mapping): + pred_label = [max(pred, key=lambda x: x["score"])["label"] for pred in predictions] + pred_label = [label_mapping[pred] if label_mapping is not None else pred for pred in pred_label] + + return {"predictions": pred_label} + + @add_start_docstrings(EVALUTOR_COMPUTE_START_DOCSTRING) + @add_end_docstrings(EVALUATOR_COMPUTE_RETURN_DOCSTRING, TASK_DOCUMENTATION) + def compute( + self, + model_or_pipeline: Union[ + str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel" # noqa: F821 + ] = None, + data: Union[str, Dataset] = None, + subset: Optional[str] = None, + split: Optional[str] = None, + metric: Union[str, EvaluationModule] = None, + tokenizer: Optional[Union[str, "PreTrainedTokenizer"]] = None, # noqa: F821 + feature_extractor: Optional[Union[str, "FeatureExtractionMixin"]] = None, # noqa: F821 + strategy: Literal["simple", "bootstrap"] = "simple", + confidence_level: float = 0.95, + n_resamples: int = 9999, + device: int = None, + random_state: Optional[int] = None, + input_column: str = "image", + label_column: str = "label", + label_mapping: Optional[Dict[str, Number]] = None, + ) -> Tuple[Dict[str, float], Any]: + + """ + input_column (`str`, defaults to `"image"`): + The name of the column containing the images as PIL ImageFile in the dataset specified by `data`. + label_column (`str`, defaults to `"label"`): + The name of the column containing the labels in the dataset specified by `data`. + label_mapping (`Dict[str, Number]`, *optional*, defaults to `None`): + We want to map class labels defined by the model in the pipeline to values consistent with those + defined in the `label_column` of the `data` dataset. + """ + + result = super().compute( + model_or_pipeline=model_or_pipeline, + data=data, + subset=subset, + split=split, + metric=metric, + tokenizer=tokenizer, + feature_extractor=feature_extractor, + strategy=strategy, + confidence_level=confidence_level, + n_resamples=n_resamples, + device=device, + random_state=random_state, + input_column=input_column, + label_column=label_column, + label_mapping=label_mapping, + ) + + return result diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/question_answering.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/question_answering.py new file mode 100644 index 0000000000000000000000000000000000000000..99b4190eebdda4e90617d0979fe23af2965d3204 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/question_answering.py @@ -0,0 +1,239 @@ +# Copyright 2022 The HuggingFace Evaluate Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union + +# Lint as: python3 +from datasets import Dataset + + +try: + TRANSFORMERS_AVAILABLE = True +except ImportError: + TRANSFORMERS_AVAILABLE = False + +from typing_extensions import Literal + +from ..module import EvaluationModule +from ..utils.file_utils import add_end_docstrings, add_start_docstrings +from ..utils.logging import get_logger +from .base import EVALUATOR_COMPUTE_RETURN_DOCSTRING, EVALUTOR_COMPUTE_START_DOCSTRING, Evaluator +from .utils import DatasetColumn + + +if TYPE_CHECKING: + from transformers import Pipeline, PreTrainedModel, PreTrainedTokenizer, TFPreTrainedModel + + +logger = get_logger(__name__) + + +TASK_DOCUMENTATION = r""" + Examples: + ```python + >>> from evaluate import evaluator + >>> from datasets import load_dataset + >>> task_evaluator = evaluator("question-answering") + >>> data = load_dataset("squad", split="validation[:2]") + >>> results = task_evaluator.compute( + >>> model_or_pipeline="sshleifer/tiny-distilbert-base-cased-distilled-squad", + >>> data=data, + >>> metric="squad", + >>> ) + ``` + + + + Datasets where the answer may be missing in the context are supported, for example SQuAD v2 dataset. In this case, it is safer to pass `squad_v2_format=True` to + the compute() call. + + + + ```python + >>> from evaluate import evaluator + >>> from datasets import load_dataset + >>> task_evaluator = evaluator("question-answering") + >>> data = load_dataset("squad_v2", split="validation[:2]") + >>> results = task_evaluator.compute( + >>> model_or_pipeline="mrm8488/bert-tiny-finetuned-squadv2", + >>> data=data, + >>> metric="squad_v2", + >>> squad_v2_format=True, + >>> ) + ``` +""" + + +class QuestionAnsweringEvaluator(Evaluator): + """ + Question answering evaluator. This evaluator handles + [**extractive** question answering](https://huggingface.co/docs/transformers/task_summary#extractive-question-answering), + where the answer to the question is extracted from a context. + + This question answering evaluator can currently be loaded from [`evaluator`] using the default task name + `question-answering`. + + Methods in this class assume a data format compatible with the + [`~transformers.QuestionAnsweringPipeline`]. + """ + + PIPELINE_KWARGS = {} + + def __init__(self, task="question-answering", default_metric_name=None): + super().__init__(task, default_metric_name=default_metric_name) + + def prepare_data( + self, data: Dataset, question_column: str, context_column: str, id_column: str, label_column: str + ): + """Prepare data.""" + if data is None: + raise ValueError( + "Please specify a valid `data` object - either a `str` with a name or a `Dataset` object." + ) + self.check_required_columns( + data, + { + "question_column": question_column, + "context_column": context_column, + "id_column": id_column, + "label_column": label_column, + }, + ) + + metric_inputs = dict() + metric_inputs["references"] = [ + {"id": element[id_column], "answers": element[label_column]} for element in data + ] + + return metric_inputs, { + "question": DatasetColumn(data, question_column), + "context": DatasetColumn(data, context_column), + } + + def is_squad_v2_format(self, data: Dataset, label_column: str = "answers"): + """ + Check if the provided dataset follows the squad v2 data schema, namely possible samples where the answer is not in the context. + In this case, the answer text list should be `[]`. + """ + original_num_rows = data.num_rows + nonempty_num_rows = data.filter( + lambda x: len(x[label_column]["text"]) > 0, load_from_cache_file=False + ).num_rows + if original_num_rows > nonempty_num_rows: + return True + else: + return False + + def predictions_processor(self, predictions: List, squad_v2_format: bool, ids: List): + result = [] + for i in range(len(predictions)): + pred = {"prediction_text": predictions[i]["answer"], "id": ids[i]} + if squad_v2_format: + pred["no_answer_probability"] = predictions[i]["score"] + result.append(pred) + return {"predictions": result} + + @add_start_docstrings(EVALUTOR_COMPUTE_START_DOCSTRING) + @add_end_docstrings(EVALUATOR_COMPUTE_RETURN_DOCSTRING, TASK_DOCUMENTATION) + def compute( + self, + model_or_pipeline: Union[ + str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel" # noqa: F821 + ] = None, + data: Union[str, Dataset] = None, + subset: Optional[str] = None, + split: Optional[str] = None, + metric: Union[str, EvaluationModule] = None, + tokenizer: Optional[Union[str, "PreTrainedTokenizer"]] = None, # noqa: F821 + strategy: Literal["simple", "bootstrap"] = "simple", + confidence_level: float = 0.95, + n_resamples: int = 9999, + device: int = None, + random_state: Optional[int] = None, + question_column: str = "question", + context_column: str = "context", + id_column: str = "id", + label_column: str = "answers", + squad_v2_format: Optional[bool] = None, + ) -> Tuple[Dict[str, float], Any]: + """ + question_column (`str`, defaults to `"question"`): + The name of the column containing the question in the dataset specified by `data`. + context_column (`str`, defaults to `"context"`): + The name of the column containing the context in the dataset specified by `data`. + id_column (`str`, defaults to `"id"`): + The name of the column containing the identification field of the question and answer pair in the + dataset specified by `data`. + label_column (`str`, defaults to `"answers"`): + The name of the column containing the answers in the dataset specified by `data`. + squad_v2_format (`bool`, *optional*, defaults to `None`): + Whether the dataset follows the format of squad_v2 dataset. This is the case when the provided dataset + has questions where the answer is not in the context, more specifically when are answers as + `{"text": [], "answer_start": []}` in the answer column. If all questions have at least one answer, this parameter + should be set to `False`. If this parameter is not provided, the format will be automatically inferred. + """ + result = {} + self.check_for_mismatch_in_device_setup(device, model_or_pipeline) + + data = self.load_data(data=data, subset=subset, split=split) + metric_inputs, pipe_inputs = self.prepare_data( + data=data, + question_column=question_column, + context_column=context_column, + id_column=id_column, + label_column=label_column, + ) + + if squad_v2_format is None: + squad_v2_format = self.is_squad_v2_format(data=data, label_column=label_column) + logger.warning( + f"`squad_v2_format` parameter not provided to QuestionAnsweringEvaluator.compute(). Automatically inferred `squad_v2_format` as {squad_v2_format}." + ) + pipe = self.prepare_pipeline(model_or_pipeline=model_or_pipeline, tokenizer=tokenizer, device=device) + + metric = self.prepare_metric(metric) + + if squad_v2_format and metric.name == "squad": + logger.warning( + "The dataset has SQuAD v2 format but you are using the SQuAD metric. Consider passing the 'squad_v2' metric." + ) + if not squad_v2_format and metric.name == "squad_v2": + logger.warning( + "The dataset has SQuAD v1 format but you are using the SQuAD v2 metric. Consider passing the 'squad' metric." + ) + + if squad_v2_format: + self.PIPELINE_KWARGS["handle_impossible_answer"] = True + else: + self.PIPELINE_KWARGS["handle_impossible_answer"] = False + + # Compute predictions + predictions, perf_results = self.call_pipeline(pipe, **pipe_inputs) + predictions = self.predictions_processor(predictions, squad_v2_format=squad_v2_format, ids=data[id_column]) + metric_inputs.update(predictions) + + # Compute metrics from references and predictions + metric_results = self.compute_metric( + metric=metric, + metric_inputs=metric_inputs, + strategy=strategy, + confidence_level=confidence_level, + n_resamples=n_resamples, + random_state=random_state, + ) + + result.update(metric_results) + result.update(perf_results) + + return result diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/text2text_generation.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/text2text_generation.py new file mode 100644 index 0000000000000000000000000000000000000000..6dfd2c035695b38c1e4f0d9d4929b12c6be30920 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/text2text_generation.py @@ -0,0 +1,267 @@ +# Copyright 2022 The HuggingFace Evaluate Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Union + +from datasets import Dataset +from typing_extensions import Literal + +from ..module import EvaluationModule +from ..utils.file_utils import add_start_docstrings +from .base import EVALUATOR_COMPUTE_RETURN_DOCSTRING, EVALUTOR_COMPUTE_START_DOCSTRING, Evaluator + + +if TYPE_CHECKING: + from transformers import Pipeline, PreTrainedModel, PreTrainedTokenizer, TFPreTrainedModel + + +TASK_DOCUMENTATION_KWARGS = r""" + input_column (`str`, defaults to `"text"`): + the name of the column containing the input text in the dataset specified by `data`. + label_column (`str`, defaults to `"label"`): + the name of the column containing the labels in the dataset specified by `data`. + generation_kwargs (`Dict`, *optional*, defaults to `None`): + The generation kwargs are passed to the pipeline and set the text generation strategy. +""" + +TEXT2TEXT_TASK_DOCSTRING_EXAMPLE = r""" + Examples: + ```python + >>> from evaluate import evaluator + >>> from datasets import load_dataset + >>> task_evaluator = evaluator("text2text-generation") + >>> data = load_dataset("cnn_dailymail", "3.0.0", split="validation[:40]") + >>> results = task_evaluator.compute( + >>> model_or_pipeline="facebook/bart-large-cnn", + >>> data=data, + >>> input_column="article", + >>> label_column="highlights", + >>> metric="rouge", + >>> ) + ``` +""" + +SUMMARIZATION_TASK_DOCSTRING_EXAMPLE = r""" + Examples: + ```python + >>> from evaluate import evaluator + >>> from datasets import load_dataset + >>> task_evaluator = evaluator("summarization") + >>> data = load_dataset("cnn_dailymail", "3.0.0", split="validation[:40]") + >>> results = task_evaluator.compute( + >>> model_or_pipeline="facebook/bart-large-cnn", + >>> data=data, + >>> input_column="article", + >>> label_column="highlights", + >>> ) + ``` +""" + + +TRANSLATION_TASK_DOCSTRING_EXAMPLE = r""" + Examples: + ```python + >>> from evaluate import evaluator + >>> from datasets import load_dataset + >>> task_evaluator = evaluator("translation") + >>> data = load_dataset("wmt19", "fr-de", split="validation[:40]") + >>> data = data.map(lambda x: {"text": x["translation"]["de"], "label": x["translation"]["fr"]}) + >>> results = task_evaluator.compute( + >>> model_or_pipeline="Helsinki-NLP/opus-mt-de-fr", + >>> data=data, + >>> ) + ``` +""" + + +class Text2TextGenerationEvaluator(Evaluator): + """ + Text2Text generation evaluator. + This Text2Text generation evaluator can currently be loaded from [`evaluator`] using the default task name + `text2text-generation`. + Methods in this class assume a data format compatible with the [`~transformers.Text2TextGenerationPipeline`]. + """ + + PREDICTION_PREFIX = "generated" + PIPELINE_KWARGS = {"truncation": True} + + def __init__(self, task="text2text-generation", default_metric_name=None): + super().__init__(task, default_metric_name=default_metric_name) + + def predictions_processor(self, predictions, label_mapping): + return {"predictions": [pred[f"{self.PREDICTION_PREFIX}_text"] for pred in predictions]} + + @add_start_docstrings( + EVALUTOR_COMPUTE_START_DOCSTRING, + TASK_DOCUMENTATION_KWARGS, + EVALUATOR_COMPUTE_RETURN_DOCSTRING, + TEXT2TEXT_TASK_DOCSTRING_EXAMPLE, + ) + def compute( + self, + model_or_pipeline: Union[ + str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel" # noqa: F821 + ] = None, + data: Union[str, Dataset] = None, + subset: Optional[str] = None, + split: Optional[str] = None, + metric: Union[str, EvaluationModule] = None, + tokenizer: Optional[Union[str, "PreTrainedTokenizer"]] = None, # noqa: F821 + strategy: Literal["simple", "bootstrap"] = "simple", + confidence_level: float = 0.95, + n_resamples: int = 9999, + device: int = None, + random_state: Optional[int] = None, + input_column: str = "text", + label_column: str = "label", + generation_kwargs: dict = None, + ) -> Tuple[Dict[str, float], Any]: + if generation_kwargs is not None: + self.PIPELINE_KWARGS.update(generation_kwargs) + + result = super().compute( + model_or_pipeline=model_or_pipeline, + data=data, + subset=subset, + split=split, + metric=metric, + tokenizer=tokenizer, + strategy=strategy, + confidence_level=confidence_level, + n_resamples=n_resamples, + device=device, + random_state=random_state, + input_column=input_column, + label_column=label_column, + ) + + return result + + +class SummarizationEvaluator(Text2TextGenerationEvaluator): + """ + Text summarization evaluator. + This text summarization evaluator can currently be loaded from [`evaluator`] using the default task name + `summarization`. + Methods in this class assume a data format compatible with the [`SummarizationEvaluator`]. + """ + + PREDICTION_PREFIX = "summary" + PIPELINE_KWARGS = {"truncation": True} + + def __init__(self, task="summarization", default_metric_name=None): + super().__init__(task, default_metric_name=default_metric_name) + + @add_start_docstrings( + EVALUTOR_COMPUTE_START_DOCSTRING, + TASK_DOCUMENTATION_KWARGS, + EVALUATOR_COMPUTE_RETURN_DOCSTRING, + SUMMARIZATION_TASK_DOCSTRING_EXAMPLE, + ) + def compute( + self, + model_or_pipeline: Union[ + str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel" # noqa: F821 + ] = None, + data: Union[str, Dataset] = None, + subset: Optional[str] = None, + split: Optional[str] = None, + metric: Union[str, EvaluationModule] = None, + tokenizer: Optional[Union[str, "PreTrainedTokenizer"]] = None, # noqa: F821 + strategy: Literal["simple", "bootstrap"] = "simple", + confidence_level: float = 0.95, + n_resamples: int = 9999, + device: int = None, + random_state: Optional[int] = None, + input_column: str = "text", + label_column: str = "label", + generation_kwargs: dict = None, + ) -> Tuple[Dict[str, float], Any]: + result = super().compute( + model_or_pipeline=model_or_pipeline, + data=data, + subset=subset, + split=split, + metric=metric, + tokenizer=tokenizer, + strategy=strategy, + confidence_level=confidence_level, + n_resamples=n_resamples, + device=device, + random_state=random_state, + input_column=input_column, + label_column=label_column, + generation_kwargs=generation_kwargs, + ) + + return result + + +class TranslationEvaluator(Text2TextGenerationEvaluator): + """ + Translation evaluator. + This translation generation evaluator can currently be loaded from [`evaluator`] using the default task name + `translation`. + Methods in this class assume a data format compatible with the [`~transformers.TranslationPipeline`]. + """ + + PREDICTION_PREFIX = "translation" + PIPELINE_KWARGS = {"truncation": True} + + def __init__(self, task="translation", default_metric_name=None): + super().__init__(task, default_metric_name=default_metric_name) + + @add_start_docstrings( + EVALUTOR_COMPUTE_START_DOCSTRING, + TASK_DOCUMENTATION_KWARGS, + EVALUATOR_COMPUTE_RETURN_DOCSTRING, + TRANSLATION_TASK_DOCSTRING_EXAMPLE, + ) + def compute( + self, + model_or_pipeline: Union[ + str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel" # noqa: F821 + ] = None, + data: Union[str, Dataset] = None, + subset: Optional[str] = None, + split: Optional[str] = None, + metric: Union[str, EvaluationModule] = None, + tokenizer: Optional[Union[str, "PreTrainedTokenizer"]] = None, # noqa: F821 + strategy: Literal["simple", "bootstrap"] = "simple", + confidence_level: float = 0.95, + n_resamples: int = 9999, + device: int = None, + random_state: Optional[int] = None, + input_column: str = "text", + label_column: str = "label", + generation_kwargs: dict = None, + ) -> Tuple[Dict[str, float], Any]: + result = super().compute( + model_or_pipeline=model_or_pipeline, + data=data, + subset=subset, + split=split, + metric=metric, + tokenizer=tokenizer, + strategy=strategy, + confidence_level=confidence_level, + n_resamples=n_resamples, + device=device, + random_state=random_state, + input_column=input_column, + label_column=label_column, + generation_kwargs=generation_kwargs, + ) + + return result diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/text_classification.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/text_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..200eb01d70336148db473edebebc96e3137c5799 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/text_classification.py @@ -0,0 +1,160 @@ +# Copyright 2022 The HuggingFace Evaluate Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from numbers import Number +from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Union + +from datasets import Dataset, load_dataset +from typing_extensions import Literal + +from ..module import EvaluationModule +from ..utils.file_utils import add_end_docstrings, add_start_docstrings +from .base import EVALUATOR_COMPUTE_RETURN_DOCSTRING, EVALUTOR_COMPUTE_START_DOCSTRING, Evaluator +from .utils import DatasetColumnPair + + +if TYPE_CHECKING: + from transformers import FeatureExtractionMixin, Pipeline, PreTrainedModel, PreTrainedTokenizer, TFPreTrainedModel + + +TASK_DOCUMENTATION = r""" + Examples: + ```python + >>> from evaluate import evaluator + >>> from datasets import load_dataset + >>> task_evaluator = evaluator("text-classification") + >>> data = load_dataset("imdb", split="test[:2]") + >>> results = task_evaluator.compute( + >>> model_or_pipeline="huggingface/prunebert-base-uncased-6-finepruned-w-distil-mnli", + >>> data=data, + >>> metric="accuracy", + >>> label_mapping={"LABEL_0": 0.0, "LABEL_1": 1.0}, + >>> strategy="bootstrap", + >>> n_resamples=10, + >>> random_state=0 + >>> ) + ``` +""" + + +class TextClassificationEvaluator(Evaluator): + """ + Text classification evaluator. + This text classification evaluator can currently be loaded from [`evaluator`] using the default task name + `text-classification` or with a `"sentiment-analysis"` alias. + Methods in this class assume a data format compatible with the [`~transformers.TextClassificationPipeline`] - a single textual + feature as input and a categorical label as output. + """ + + PIPELINE_KWARGS = {"truncation": True} + + def __init__(self, task="text-classification", default_metric_name=None): + super().__init__(task, default_metric_name=default_metric_name) + + def prepare_data(self, data: Union[str, Dataset], input_column: str, second_input_column: str, label_column: str): + if data is None: + raise ValueError( + "Please specify a valid `data` object - either a `str` with a name or a `Dataset` object." + ) + + self.check_required_columns(data, {"input_column": input_column, "label_column": label_column}) + + if second_input_column is not None: + self.check_required_columns(data, {"second_input_column": second_input_column}) + + data = load_dataset(data) if isinstance(data, str) else data + + return {"references": data[label_column]}, DatasetColumnPair( + data, input_column, second_input_column, "text", "text_pair" + ) + + def predictions_processor(self, predictions, label_mapping): + predictions = [ + label_mapping[element["label"]] if label_mapping is not None else element["label"] + for element in predictions + ] + return {"predictions": predictions} + + @add_start_docstrings(EVALUTOR_COMPUTE_START_DOCSTRING) + @add_end_docstrings(EVALUATOR_COMPUTE_RETURN_DOCSTRING, TASK_DOCUMENTATION) + def compute( + self, + model_or_pipeline: Union[ + str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel" # noqa: F821 + ] = None, + data: Union[str, Dataset] = None, + subset: Optional[str] = None, + split: Optional[str] = None, + metric: Union[str, EvaluationModule] = None, + tokenizer: Optional[Union[str, "PreTrainedTokenizer"]] = None, # noqa: F821 + feature_extractor: Optional[Union[str, "FeatureExtractionMixin"]] = None, # noqa: F821 + strategy: Literal["simple", "bootstrap"] = "simple", + confidence_level: float = 0.95, + n_resamples: int = 9999, + device: int = None, + random_state: Optional[int] = None, + input_column: str = "text", + second_input_column: Optional[str] = None, + label_column: str = "label", + label_mapping: Optional[Dict[str, Number]] = None, + ) -> Tuple[Dict[str, float], Any]: + """ + input_column (`str`, *optional*, defaults to `"text"`): + The name of the column containing the text feature in the dataset specified by `data`. + second_input_column (`str`, *optional*, defaults to `None`): + The name of the second column containing the text features. This may be useful for classification tasks + as MNLI, where two columns are used. + label_column (`str`, defaults to `"label"`): + The name of the column containing the labels in the dataset specified by `data`. + label_mapping (`Dict[str, Number]`, *optional*, defaults to `None`): + We want to map class labels defined by the model in the pipeline to values consistent with those + defined in the `label_column` of the `data` dataset. + """ + + result = {} + + self.check_for_mismatch_in_device_setup(device, model_or_pipeline) + + # Prepare inputs + data = self.load_data(data=data, subset=subset, split=split) + metric_inputs, pipe_inputs = self.prepare_data( + data=data, input_column=input_column, second_input_column=second_input_column, label_column=label_column + ) + pipe = self.prepare_pipeline( + model_or_pipeline=model_or_pipeline, + tokenizer=tokenizer, + feature_extractor=feature_extractor, + device=device, + ) + metric = self.prepare_metric(metric) + + # Compute predictions + predictions, perf_results = self.call_pipeline(pipe, pipe_inputs) + predictions = self.predictions_processor(predictions, label_mapping) + metric_inputs.update(predictions) + + # Compute metrics from references and predictions + metric_results = self.compute_metric( + metric=metric, + metric_inputs=metric_inputs, + strategy=strategy, + confidence_level=confidence_level, + n_resamples=n_resamples, + random_state=random_state, + ) + + result.update(metric_results) + result.update(perf_results) + + return result diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/text_generation.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/text_generation.py new file mode 100644 index 0000000000000000000000000000000000000000..15522e860f7eb6fc693780f637337c0fdb22a21c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/text_generation.py @@ -0,0 +1,69 @@ +# Copyright 2022 The HuggingFace Evaluate Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, Tuple + +from datasets import Dataset + +from .base import Evaluator +from .utils import DatasetColumn + + +TASK_DOCUMENTATION_KWARGS = r""" + input_column (`str`, defaults to `"text"`): + the name of the column containing the input text in the dataset specified by `data`. + generation_kwargs (`Dict`, *optional*, defaults to `None`): + The generation kwargs are passed to the pipeline and set the text generation strategy. +""" + + +class TextGenerationEvaluator(Evaluator): + """ + Text generation evaluator. + This Text generation evaluator can currently be loaded from [`evaluator`] using the default task name + `text-generation`. + Methods in this class assume a data format compatible with the [`~transformers.TextGenerationPipeline`]. + """ + + def predictions_processor(self, predictions, *args, **kwargs): + """ + Args: + predictions: A list of lists of dicts + + Returns: + `dict`: All the generated texts are flattened and stored under the "data" key. + """ + return {"data": [pred[f"{self.predictions_prefix}_text"] for pred_list in predictions for pred in pred_list]} + + def __init__(self, task="text-generation", default_metric_name=None, predictions_prefix: str = "generated"): + super().__init__(task=task, default_metric_name=default_metric_name) + self.predictions_prefix = predictions_prefix + + def prepare_data(self, data: Dataset, input_column: str, *args, **kwargs) -> Tuple[Dict, DatasetColumn]: + """ + Prepare data. + + Args: + data ([`Dataset`]): + Specifies the dataset we will run evaluation on. + input_column (`str`, defaults to `"text"`): + The name of the column containing the text feature in the dataset specified by `data`. + Returns: + `dict`: metric inputs. + `list`: pipeline inputs. + """ + + self.check_required_columns(data, {"input_column": input_column}) + + return {}, DatasetColumn(data, input_column) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/token_classification.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/token_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..ba08ebd58d72417eed4e20c93a46c53adaa49811 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/token_classification.py @@ -0,0 +1,278 @@ +# Copyright 2022 The HuggingFace Evaluate Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union + +from datasets import ClassLabel, Dataset, Sequence +from typing_extensions import Literal + +from ..module import EvaluationModule +from ..utils.file_utils import add_end_docstrings, add_start_docstrings +from .base import EVALUATOR_COMPUTE_RETURN_DOCSTRING, EVALUTOR_COMPUTE_START_DOCSTRING, Evaluator +from .utils import DatasetColumn + + +if TYPE_CHECKING: + from transformers import Pipeline, PreTrainedModel, PreTrainedTokenizer, TFPreTrainedModel + + +TASK_DOCUMENTATION = r""" + The dataset input and label columns are expected to be formatted as a list of words and a list of labels respectively, following [conll2003 dataset](https://huggingface.co/datasets/conll2003). Datasets whose inputs are single strings, and labels are a list of offset are not supported. + + Examples: + ```python + >>> from evaluate import evaluator + >>> from datasets import load_dataset + >>> task_evaluator = evaluator("token-classification") + >>> data = load_dataset("conll2003", split="validation[:2]") + >>> results = task_evaluator.compute( + >>> model_or_pipeline="elastic/distilbert-base-uncased-finetuned-conll03-english", + >>> data=data, + >>> metric="seqeval", + >>> ) + ``` + + + + For example, the following dataset format is accepted by the evaluator: + + ```python + dataset = Dataset.from_dict( + mapping={ + "tokens": [["New", "York", "is", "a", "city", "and", "Felix", "a", "person", "."]], + "ner_tags": [[1, 2, 0, 0, 0, 0, 3, 0, 0, 0]], + }, + features=Features({ + "tokens": Sequence(feature=Value(dtype="string")), + "ner_tags": Sequence(feature=ClassLabel(names=["O", "B-LOC", "I-LOC", "B-PER", "I-PER"])), + }), + ) + ``` + + + + + + For example, the following dataset format is **not** accepted by the evaluator: + + ```python + dataset = Dataset.from_dict( + mapping={ + "tokens": [["New York is a city and Felix a person."]], + "starts": [[0, 23]], + "ends": [[7, 27]], + "ner_tags": [["LOC", "PER"]], + }, + features=Features({ + "tokens": Value(dtype="string"), + "starts": Sequence(feature=Value(dtype="int32")), + "ends": Sequence(feature=Value(dtype="int32")), + "ner_tags": Sequence(feature=Value(dtype="string")), + }), + ) + ``` + + +""" + + +class TokenClassificationEvaluator(Evaluator): + """ + Token classification evaluator. + + This token classification evaluator can currently be loaded from [`evaluator`] using the default task name + `token-classification`. + + Methods in this class assume a data format compatible with the [`~transformers.TokenClassificationPipeline`]. + """ + + PIPELINE_KWARGS = {"ignore_labels": []} + + def __init__(self, task="token-classification", default_metric_name=None): + super().__init__(task, default_metric_name=default_metric_name) + + def predictions_processor(self, predictions: List[List[Dict]], words: List[List[str]], join_by: str): + """ + Transform the pipeline predictions into a list of predicted labels of the same length as the true labels. + + Args: + predictions (`List[List[Dict]]`): + List of pipeline predictions, where each token has been labeled. + words (`List[List[str]]`): + Original input data to the pipeline, used to build predicted labels of the same length. + join_by (`str`): + String to use to join two words. In English, it will typically be " ". + + Returns: + `dict`: a dictionary holding the predictions + """ + preds = [] + + # iterate over the data rows + for i, prediction in enumerate(predictions): + pred_processed = [] + + # get a list of tuples giving the indexes of the start and end character of each word + words_offsets = self.words_to_offsets(words[i], join_by) + + token_index = 0 + for word_offset in words_offsets: + # for each word, we may keep only the predicted label for the first token, discard the others + while prediction[token_index]["start"] < word_offset[0]: + token_index += 1 + + if prediction[token_index]["start"] > word_offset[0]: # bad indexing + pred_processed.append("O") + elif prediction[token_index]["start"] == word_offset[0]: + pred_processed.append(prediction[token_index]["entity"]) + + preds.append(pred_processed) + + return {"predictions": preds} + + def words_to_offsets(self, words: List[str], join_by: str): + """ + Convert a list of words to a list of offsets, where word are joined by `join_by`. + + Args: + words (`List[str]`): + List of words to get offsets from. + join_by (`str`): + String to insert between words. + + Returns: + `List[Tuple[int, int]]`: List of the characters (start index, end index) for each of the words. + """ + offsets = [] + + start = 0 + for word in words: + end = start + len(word) - 1 + offsets.append((start, end)) + start = end + len(join_by) + 1 + + return offsets + + def prepare_data(self, data: Union[str, Dataset], input_column: str, label_column: str, join_by: str): + super().prepare_data(data, input_column, label_column) + + if not isinstance(data.features[input_column], Sequence) or not isinstance( + data.features[label_column], Sequence + ): + raise ValueError( + "TokenClassificationEvaluator expects the input and label columns to be provided as lists." + ) + + # If the labels are of type ClassLabel, they are already integers and we have the map stored somewhere. + # Otherwise, we have to get the list of labels manually. + labels_are_int = isinstance(data.features[label_column].feature, ClassLabel) + if labels_are_int: + label_list = data.features[label_column].feature.names # list of string labels + id_to_label = {i: label for i, label in enumerate(label_list)} + references = [[id_to_label[label_id] for label_id in label_ids] for label_ids in data[label_column]] + elif data.features[label_column].feature.dtype.startswith("int"): + raise NotImplementedError( + "References provided as integers, but the reference column is not a Sequence of ClassLabels." + ) + else: + # In the event the labels are not a `Sequence[ClassLabel]`, we have already labels as strings + # An example is labels as ["PER", "PER", "O", "LOC", "O", "LOC", "O"], e.g. in polyglot_ner dataset + references = data[label_column] + + metric_inputs = {"references": references} + data = data.map(lambda x: {input_column: join_by.join(x[input_column])}) + pipeline_inputs = DatasetColumn(data, input_column) + + return metric_inputs, pipeline_inputs + + def prepare_pipeline( + self, + model_or_pipeline: Union[str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel"], # noqa: F821 + tokenizer: Union["PreTrainedTokenizerBase", "FeatureExtractionMixin"] = None, # noqa: F821 + feature_extractor: Union["PreTrainedTokenizerBase", "FeatureExtractionMixin"] = None, # noqa: F821 + device: int = None, + ): + pipe = super().prepare_pipeline(model_or_pipeline, tokenizer, feature_extractor, device) + + # check the pipeline outputs start characters in its predictions + dummy_output = pipe(["2003 New York Gregory"], **self.PIPELINE_KWARGS) + if dummy_output[0][0]["start"] is None: + raise ValueError( + "TokenClassificationEvaluator supports only pipelines giving 'start' index as a pipeline output (got None). " + "Transformers pipelines with a slow tokenizer will raise this error." + ) + + return pipe + + @add_start_docstrings(EVALUTOR_COMPUTE_START_DOCSTRING) + @add_end_docstrings(EVALUATOR_COMPUTE_RETURN_DOCSTRING, TASK_DOCUMENTATION) + def compute( + self, + model_or_pipeline: Union[ + str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel" # noqa: F821 + ] = None, + data: Union[str, Dataset] = None, + subset: Optional[str] = None, + split: str = None, + metric: Union[str, EvaluationModule] = None, + tokenizer: Optional[Union[str, "PreTrainedTokenizer"]] = None, # noqa: F821 + strategy: Literal["simple", "bootstrap"] = "simple", + confidence_level: float = 0.95, + n_resamples: int = 9999, + device: Optional[int] = None, + random_state: Optional[int] = None, + input_column: str = "tokens", + label_column: str = "ner_tags", + join_by: Optional[str] = " ", + ) -> Tuple[Dict[str, float], Any]: + """ + input_column (`str`, defaults to `"tokens"`): + The name of the column containing the tokens feature in the dataset specified by `data`. + label_column (`str`, defaults to `"label"`): + The name of the column containing the labels in the dataset specified by `data`. + join_by (`str`, *optional*, defaults to `" "`): + This evaluator supports dataset whose input column is a list of words. This parameter specifies how to join + words to generate a string input. This is especially useful for languages that do not separate words by a space. + """ + result = {} + + self.check_for_mismatch_in_device_setup(device, model_or_pipeline) + + # Prepare inputs + data = self.load_data(data=data, subset=subset, split=split) + metric_inputs, pipe_inputs = self.prepare_data( + data=data, input_column=input_column, label_column=label_column, join_by=join_by + ) + pipe = self.prepare_pipeline(model_or_pipeline=model_or_pipeline, tokenizer=tokenizer, device=device) + metric = self.prepare_metric(metric) + + # Compute predictions + predictions, perf_results = self.call_pipeline(pipe, pipe_inputs) + predictions = self.predictions_processor(predictions, data[input_column], join_by) + metric_inputs.update(predictions) + + # Compute metrics from references and predictions + metric_results = self.compute_metric( + metric=metric, + metric_inputs=metric_inputs, + strategy=strategy, + confidence_level=confidence_level, + n_resamples=n_resamples, + random_state=random_state, + ) + + result.update(metric_results) + result.update(perf_results) + + return result diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/utils.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e364276d008b689d726b8dbbea1402fa93886d9b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/evaluator/utils.py @@ -0,0 +1,84 @@ +from datasets import Dataset, get_dataset_split_names + + +class DatasetColumn(list): + """Helper class to avoid loading a dataset column into memory when accessing it.""" + + def __init__(self, dataset: Dataset, key: str): + self.dataset = dataset + self.key = key + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, i): + return self.dataset[i][self.key] + + def __iter__(self): + return (self.dataset[i][self.key] for i in range(len(self))) + + +def choose_split(data, subset=None): + available_splits = get_dataset_split_names(data, subset) + preferred_split_order = [ + "test", + "testing", + "eval", + "evaluation", + "validation", + "val", + "valid", + "dev", + "train", + "training", + ] + for split in preferred_split_order: + if split in available_splits: + return split + raise ValueError("No dataset split defined! Pass an explicit value to the `split` kwarg.") + + +class DatasetColumnPair(list): + """Helper class to avoid loading two dataset columns into memory when accessing it.""" + + def __init__( + self, + dataset: Dataset, + first_col: str, + second_col: str, + first_key: str, + second_key: str, + ): + """ + Args: + dataset (Dataset): dataset to build an iterator on + first_col (str): first column name to use in the dataset + second_col (str): second column name to use in the dataset + first_key (str): key name used for the first column in the returned dictionary + second_key (str): key name used for the second column in the returned dictionary + """ + self.dataset = dataset + + self.first_col = first_col + self.second_col = second_col + + self.first_key = first_key + self.second_key = second_key + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, i): + return { + self.first_key: self.dataset[i][self.first_col], + self.second_key: self.dataset[i][self.second_col] if self.second_col else None, + } + + def __iter__(self): + return ( + { + self.first_key: self.dataset[i][self.first_col], + self.second_key: self.dataset[i][self.second_col] if self.second_col else None, + } + for i in range(len(self)) + ) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/utils/__pycache__/__init__.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5bc8c1e8acf5307fed2ddc7c35046b67733256d9 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/utils/__pycache__/file_utils.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/utils/__pycache__/file_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5f78e3a021245d8aa6dec0a7f012bd3f5b739afc Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/utils/__pycache__/file_utils.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/utils/__pycache__/gradio.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/utils/__pycache__/gradio.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..284e4f5f0853da1b3a36ff64a082dad026989b4f Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/utils/__pycache__/gradio.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/utils/__pycache__/logging.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/utils/__pycache__/logging.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7c37a89e3710f3d33f44e4be0c4c1d40bf034140 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/evaluate/utils/__pycache__/logging.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/__pycache__/__init__.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d572a0488c8ad369ddd99770c77a340180a01b2 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/__pycache__/__init__.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/__pycache__/_common.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/__pycache__/_common.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..18b4ca1bfae67756a26a0e1f1983a03343baefab Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/__pycache__/_common.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/_async_client.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/_async_client.py new file mode 100644 index 0000000000000000000000000000000000000000..45285d8390cb0d8ab1a3b9cc6a0ce0d01f95b6c8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/_async_client.py @@ -0,0 +1,3478 @@ +# coding=utf-8 +# Copyright 2023-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# WARNING +# This entire file has been adapted from the sync-client code in `src/huggingface_hub/inference/_client.py`. +# Any change in InferenceClient will be automatically reflected in AsyncInferenceClient. +# To re-generate the code, run `make style` or `python ./utils/generate_async_inference_client.py --update`. +# WARNING +import asyncio +import base64 +import logging +import re +import warnings +from typing import TYPE_CHECKING, Any, AsyncIterable, Dict, List, Literal, Optional, Set, Union, overload + +from huggingface_hub import constants +from huggingface_hub.errors import InferenceTimeoutError +from huggingface_hub.inference._common import ( + TASKS_EXPECTING_IMAGES, + ContentT, + RequestParameters, + _async_stream_chat_completion_response, + _async_stream_text_generation_response, + _b64_encode, + _b64_to_image, + _bytes_to_dict, + _bytes_to_image, + _bytes_to_list, + _get_unsupported_text_generation_kwargs, + _import_numpy, + _set_unsupported_text_generation_kwargs, + raise_text_generation_error, +) +from huggingface_hub.inference._generated.types import ( + AudioClassificationOutputElement, + AudioClassificationOutputTransform, + AudioToAudioOutputElement, + AutomaticSpeechRecognitionOutput, + ChatCompletionInputGrammarType, + ChatCompletionInputMessage, + ChatCompletionInputStreamOptions, + ChatCompletionInputTool, + ChatCompletionInputToolChoiceClass, + ChatCompletionInputToolChoiceEnum, + ChatCompletionOutput, + ChatCompletionStreamOutput, + DocumentQuestionAnsweringOutputElement, + FillMaskOutputElement, + ImageClassificationOutputElement, + ImageClassificationOutputTransform, + ImageSegmentationOutputElement, + ImageSegmentationSubtask, + ImageToImageTargetSize, + ImageToTextOutput, + ImageToVideoTargetSize, + ObjectDetectionOutputElement, + Padding, + QuestionAnsweringOutputElement, + SummarizationOutput, + SummarizationTruncationStrategy, + TableQuestionAnsweringOutputElement, + TextClassificationOutputElement, + TextClassificationOutputTransform, + TextGenerationInputGrammarType, + TextGenerationOutput, + TextGenerationStreamOutput, + TextToSpeechEarlyStoppingEnum, + TokenClassificationAggregationStrategy, + TokenClassificationOutputElement, + TranslationOutput, + TranslationTruncationStrategy, + VisualQuestionAnsweringOutputElement, + ZeroShotClassificationOutputElement, + ZeroShotImageClassificationOutputElement, +) +from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper +from huggingface_hub.utils import build_hf_headers +from huggingface_hub.utils._auth import get_token + +from .._common import _async_yield_from, _import_aiohttp + + +if TYPE_CHECKING: + import numpy as np + from aiohttp import ClientResponse, ClientSession + from PIL.Image import Image + +logger = logging.getLogger(__name__) + + +MODEL_KWARGS_NOT_USED_REGEX = re.compile(r"The following `model_kwargs` are not used by the model: \[(.*?)\]") + + +class AsyncInferenceClient: + """ + Initialize a new Inference Client. + + [`InferenceClient`] aims to provide a unified experience to perform inference. The client can be used + seamlessly with either the (free) Inference API, self-hosted Inference Endpoints, or third-party Inference Providers. + + Args: + model (`str`, `optional`): + The model to run inference with. Can be a model id hosted on the Hugging Face Hub, e.g. `meta-llama/Meta-Llama-3-8B-Instruct` + or a URL to a deployed Inference Endpoint. Defaults to None, in which case a recommended model is + automatically selected for the task. + Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2 + arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL. + provider (`str`, *optional*): + Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `publicai`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"` or `"zai-org"`. + Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers. + If model is a URL or `base_url` is passed, then `provider` is not used. + token (`str`, *optional*): + Hugging Face token. Will default to the locally saved token if not provided. + Note: for better compatibility with OpenAI's client, `token` has been aliased as `api_key`. Those 2 + arguments are mutually exclusive and have the exact same behavior. + timeout (`float`, `optional`): + The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available. + headers (`Dict[str, str]`, `optional`): + Additional headers to send to the server. By default only the authorization and user-agent headers are sent. + Values in this dictionary will override the default values. + bill_to (`str`, `optional`): + The billing account to use for the requests. By default the requests are billed on the user's account. + Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub. + cookies (`Dict[str, str]`, `optional`): + Additional cookies to send to the server. + trust_env ('bool', 'optional'): + Trust environment settings for proxy configuration if the parameter is `True` (`False` by default). + proxies (`Any`, `optional`): + Proxies to use for the request. + base_url (`str`, `optional`): + Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`] + follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None. + api_key (`str`, `optional`): + Token to use for authentication. This is a duplicated argument from `token` to make [`InferenceClient`] + follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None. + """ + + def __init__( + self, + model: Optional[str] = None, + *, + provider: Optional[PROVIDER_OR_POLICY_T] = None, + token: Optional[str] = None, + timeout: Optional[float] = None, + headers: Optional[Dict[str, str]] = None, + cookies: Optional[Dict[str, str]] = None, + trust_env: bool = False, + proxies: Optional[Any] = None, + bill_to: Optional[str] = None, + # OpenAI compatibility + base_url: Optional[str] = None, + api_key: Optional[str] = None, + ) -> None: + if model is not None and base_url is not None: + raise ValueError( + "Received both `model` and `base_url` arguments. Please provide only one of them." + " `base_url` is an alias for `model` to make the API compatible with OpenAI's client." + " If using `base_url` for chat completion, the `/chat/completions` suffix path will be appended to the base url." + " When passing a URL as `model`, the client will not append any suffix path to it." + ) + if token is not None and api_key is not None: + raise ValueError( + "Received both `token` and `api_key` arguments. Please provide only one of them." + " `api_key` is an alias for `token` to make the API compatible with OpenAI's client." + " It has the exact same behavior as `token`." + ) + token = token if token is not None else api_key + if isinstance(token, bool): + # Legacy behavior: previously is was possible to pass `token=False` to disable authentication. This is not + # supported anymore as authentication is required. Better to explicitly raise here rather than risking + # sending the locally saved token without the user knowing about it. + if token is False: + raise ValueError( + "Cannot use `token=False` to disable authentication as authentication is required to run Inference." + ) + warnings.warn( + "Using `token=True` to automatically use the locally saved token is deprecated and will be removed in a future release. " + "Please use `token=None` instead (default).", + DeprecationWarning, + ) + token = get_token() + + self.model: Optional[str] = base_url or model + self.token: Optional[str] = token + + self.headers = {**headers} if headers is not None else {} + if bill_to is not None: + if ( + constants.HUGGINGFACE_HEADER_X_BILL_TO in self.headers + and self.headers[constants.HUGGINGFACE_HEADER_X_BILL_TO] != bill_to + ): + warnings.warn( + f"Overriding existing '{self.headers[constants.HUGGINGFACE_HEADER_X_BILL_TO]}' value in headers with '{bill_to}'.", + UserWarning, + ) + self.headers[constants.HUGGINGFACE_HEADER_X_BILL_TO] = bill_to + + if token is not None and not token.startswith("hf_"): + warnings.warn( + "You've provided an external provider's API key, so requests will be billed directly by the provider. " + "The `bill_to` parameter is only applicable for Hugging Face billing and will be ignored.", + UserWarning, + ) + + # Configure provider + self.provider = provider + + self.cookies = cookies + self.timeout = timeout + self.trust_env = trust_env + self.proxies = proxies + + # Keep track of the sessions to close them properly + self._sessions: Dict["ClientSession", Set["ClientResponse"]] = dict() + + def __repr__(self): + return f"" + + @overload + async def _inner_post( # type: ignore[misc] + self, request_parameters: RequestParameters, *, stream: Literal[False] = ... + ) -> bytes: ... + + @overload + async def _inner_post( # type: ignore[misc] + self, request_parameters: RequestParameters, *, stream: Literal[True] = ... + ) -> AsyncIterable[bytes]: ... + + @overload + async def _inner_post( + self, request_parameters: RequestParameters, *, stream: bool = False + ) -> Union[bytes, AsyncIterable[bytes]]: ... + + async def _inner_post( + self, request_parameters: RequestParameters, *, stream: bool = False + ) -> Union[bytes, AsyncIterable[bytes]]: + """Make a request to the inference server.""" + + aiohttp = _import_aiohttp() + + # TODO: this should be handled in provider helpers directly + if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers: + request_parameters.headers["Accept"] = "image/png" + + # Do not use context manager as we don't want to close the connection immediately when returning + # a stream + session = self._get_client_session(headers=request_parameters.headers) + + try: + response = await session.post( + request_parameters.url, json=request_parameters.json, data=request_parameters.data, proxy=self.proxies + ) + response_error_payload = None + if response.status != 200: + try: + response_error_payload = await response.json() # get payload before connection closed + except Exception: + pass + response.raise_for_status() + if stream: + return _async_yield_from(session, response) + else: + content = await response.read() + await session.close() + return content + except asyncio.TimeoutError as error: + await session.close() + # Convert any `TimeoutError` to a `InferenceTimeoutError` + raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore + except aiohttp.ClientResponseError as error: + error.response_error_payload = response_error_payload + await session.close() + raise error + except Exception: + await session.close() + raise + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc_value, traceback): + await self.close() + + def __del__(self): + if len(self._sessions) > 0: + warnings.warn( + "Deleting 'AsyncInferenceClient' client but some sessions are still open. " + "This can happen if you've stopped streaming data from the server before the stream was complete. " + "To close the client properly, you must call `await client.close()` " + "or use an async context (e.g. `async with AsyncInferenceClient(): ...`." + ) + + async def close(self): + """Close all open sessions. + + By default, 'aiohttp.ClientSession' objects are closed automatically when a call is completed. However, if you + are streaming data from the server and you stop before the stream is complete, you must call this method to + close the session properly. + + Another possibility is to use an async context (e.g. `async with AsyncInferenceClient(): ...`). + """ + await asyncio.gather(*[session.close() for session in self._sessions.keys()]) + + async def audio_classification( + self, + audio: ContentT, + *, + model: Optional[str] = None, + top_k: Optional[int] = None, + function_to_apply: Optional["AudioClassificationOutputTransform"] = None, + ) -> List[AudioClassificationOutputElement]: + """ + Perform audio classification on the provided audio content. + + Args: + audio (Union[str, Path, bytes, BinaryIO]): + The audio content to classify. It can be raw audio bytes, a local audio file, or a URL pointing to an + audio file. + model (`str`, *optional*): + The model to use for audio classification. Can be a model ID hosted on the Hugging Face Hub + or a URL to a deployed Inference Endpoint. If not provided, the default recommended model for + audio classification will be used. + top_k (`int`, *optional*): + When specified, limits the output to the top K most probable classes. + function_to_apply (`"AudioClassificationOutputTransform"`, *optional*): + The function to apply to the model outputs in order to retrieve the scores. + + Returns: + `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> await client.audio_classification("audio.flac") + [ + AudioClassificationOutputElement(score=0.4976358711719513, label='hap'), + AudioClassificationOutputElement(score=0.3677836060523987, label='neu'), + ... + ] + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="audio-classification", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=audio, + parameters={"function_to_apply": function_to_apply, "top_k": top_k}, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + return AudioClassificationOutputElement.parse_obj_as_list(response) + + async def audio_to_audio( + self, + audio: ContentT, + *, + model: Optional[str] = None, + ) -> List[AudioToAudioOutputElement]: + """ + Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation). + + Args: + audio (Union[str, Path, bytes, BinaryIO]): + The audio content for the model. It can be raw audio bytes, a local audio file, or a URL pointing to an + audio file. + model (`str`, *optional*): + The model can be any model which takes an audio file and returns another audio file. Can be a model ID hosted on the Hugging Face Hub + or a URL to a deployed Inference Endpoint. If not provided, the default recommended model for + audio_to_audio will be used. + + Returns: + `List[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob. + + Raises: + `InferenceTimeoutError`: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> audio_output = await client.audio_to_audio("audio.flac") + >>> async for i, item in enumerate(audio_output): + >>> with open(f"output_{i}.flac", "wb") as f: + f.write(item.blob) + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="audio-to-audio", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=audio, + parameters={}, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + audio_output = AudioToAudioOutputElement.parse_obj_as_list(response) + for item in audio_output: + item.blob = base64.b64decode(item.blob) + return audio_output + + async def automatic_speech_recognition( + self, + audio: ContentT, + *, + model: Optional[str] = None, + extra_body: Optional[Dict] = None, + ) -> AutomaticSpeechRecognitionOutput: + """ + Perform automatic speech recognition (ASR or audio-to-text) on the given audio content. + + Args: + audio (Union[str, Path, bytes, BinaryIO]): + The content to transcribe. It can be raw audio bytes, local audio file, or a URL to an audio file. + model (`str`, *optional*): + The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed + Inference Endpoint. If not provided, the default recommended model for ASR will be used. + extra_body (`Dict`, *optional*): + Additional provider-specific parameters to pass to the model. Refer to the provider's documentation + for supported parameters. + Returns: + [`AutomaticSpeechRecognitionOutput`]: An item containing the transcribed text and optionally the timestamp chunks. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> await client.automatic_speech_recognition("hello_world.flac").text + "hello world" + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="automatic-speech-recognition", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=audio, + parameters={**(extra_body or {})}, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + return AutomaticSpeechRecognitionOutput.parse_obj_as_instance(response) + + @overload + async def chat_completion( # type: ignore + self, + messages: List[Union[Dict, ChatCompletionInputMessage]], + *, + model: Optional[str] = None, + stream: Literal[False] = False, + frequency_penalty: Optional[float] = None, + logit_bias: Optional[List[float]] = None, + logprobs: Optional[bool] = None, + max_tokens: Optional[int] = None, + n: Optional[int] = None, + presence_penalty: Optional[float] = None, + response_format: Optional[ChatCompletionInputGrammarType] = None, + seed: Optional[int] = None, + stop: Optional[List[str]] = None, + stream_options: Optional[ChatCompletionInputStreamOptions] = None, + temperature: Optional[float] = None, + tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None, + tool_prompt: Optional[str] = None, + tools: Optional[List[ChatCompletionInputTool]] = None, + top_logprobs: Optional[int] = None, + top_p: Optional[float] = None, + extra_body: Optional[Dict] = None, + ) -> ChatCompletionOutput: ... + + @overload + async def chat_completion( # type: ignore + self, + messages: List[Union[Dict, ChatCompletionInputMessage]], + *, + model: Optional[str] = None, + stream: Literal[True] = True, + frequency_penalty: Optional[float] = None, + logit_bias: Optional[List[float]] = None, + logprobs: Optional[bool] = None, + max_tokens: Optional[int] = None, + n: Optional[int] = None, + presence_penalty: Optional[float] = None, + response_format: Optional[ChatCompletionInputGrammarType] = None, + seed: Optional[int] = None, + stop: Optional[List[str]] = None, + stream_options: Optional[ChatCompletionInputStreamOptions] = None, + temperature: Optional[float] = None, + tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None, + tool_prompt: Optional[str] = None, + tools: Optional[List[ChatCompletionInputTool]] = None, + top_logprobs: Optional[int] = None, + top_p: Optional[float] = None, + extra_body: Optional[Dict] = None, + ) -> AsyncIterable[ChatCompletionStreamOutput]: ... + + @overload + async def chat_completion( + self, + messages: List[Union[Dict, ChatCompletionInputMessage]], + *, + model: Optional[str] = None, + stream: bool = False, + frequency_penalty: Optional[float] = None, + logit_bias: Optional[List[float]] = None, + logprobs: Optional[bool] = None, + max_tokens: Optional[int] = None, + n: Optional[int] = None, + presence_penalty: Optional[float] = None, + response_format: Optional[ChatCompletionInputGrammarType] = None, + seed: Optional[int] = None, + stop: Optional[List[str]] = None, + stream_options: Optional[ChatCompletionInputStreamOptions] = None, + temperature: Optional[float] = None, + tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None, + tool_prompt: Optional[str] = None, + tools: Optional[List[ChatCompletionInputTool]] = None, + top_logprobs: Optional[int] = None, + top_p: Optional[float] = None, + extra_body: Optional[Dict] = None, + ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: ... + + async def chat_completion( + self, + messages: List[Union[Dict, ChatCompletionInputMessage]], + *, + model: Optional[str] = None, + stream: bool = False, + # Parameters from ChatCompletionInput (handled manually) + frequency_penalty: Optional[float] = None, + logit_bias: Optional[List[float]] = None, + logprobs: Optional[bool] = None, + max_tokens: Optional[int] = None, + n: Optional[int] = None, + presence_penalty: Optional[float] = None, + response_format: Optional[ChatCompletionInputGrammarType] = None, + seed: Optional[int] = None, + stop: Optional[List[str]] = None, + stream_options: Optional[ChatCompletionInputStreamOptions] = None, + temperature: Optional[float] = None, + tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None, + tool_prompt: Optional[str] = None, + tools: Optional[List[ChatCompletionInputTool]] = None, + top_logprobs: Optional[int] = None, + top_p: Optional[float] = None, + extra_body: Optional[Dict] = None, + ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: + """ + A method for completing conversations using a specified language model. + + > [!TIP] + > The `client.chat_completion` method is aliased as `client.chat.completions.create` for compatibility with OpenAI's client. + > Inputs and outputs are strictly the same and using either syntax will yield the same results. + > Check out the [Inference guide](https://huggingface.co/docs/huggingface_hub/guides/inference#openai-compatibility) + > for more details about OpenAI's compatibility. + + > [!TIP] + > You can pass provider-specific parameters to the model by using the `extra_body` argument. + + Args: + messages (List of [`ChatCompletionInputMessage`]): + Conversation history consisting of roles and content pairs. + model (`str`, *optional*): + The model to use for chat-completion. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed + Inference Endpoint. If not provided, the default recommended model for chat-based text-generation will be used. + See https://huggingface.co/tasks/text-generation for more details. + If `model` is a model ID, it is passed to the server as the `model` parameter. If you want to define a + custom URL while setting `model` in the request payload, you must set `base_url` when initializing [`InferenceClient`]. + frequency_penalty (`float`, *optional*): + Penalizes new tokens based on their existing frequency + in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0. + logit_bias (`List[float]`, *optional*): + Adjusts the likelihood of specific tokens appearing in the generated output. + logprobs (`bool`, *optional*): + Whether to return log probabilities of the output tokens or not. If true, returns the log + probabilities of each output token returned in the content of message. + max_tokens (`int`, *optional*): + Maximum number of tokens allowed in the response. Defaults to 100. + n (`int`, *optional*): + The number of completions to generate for each prompt. + presence_penalty (`float`, *optional*): + Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the + text so far, increasing the model's likelihood to talk about new topics. + response_format ([`ChatCompletionInputGrammarType`], *optional*): + Grammar constraints. Can be either a JSONSchema or a regex. + seed (Optional[`int`], *optional*): + Seed for reproducible control flow. Defaults to None. + stop (`List[str]`, *optional*): + Up to four strings which trigger the end of the response. + Defaults to None. + stream (`bool`, *optional*): + Enable realtime streaming of responses. Defaults to False. + stream_options ([`ChatCompletionInputStreamOptions`], *optional*): + Options for streaming completions. + temperature (`float`, *optional*): + Controls randomness of the generations. Lower values ensure + less random completions. Range: [0, 2]. Defaults to 1.0. + top_logprobs (`int`, *optional*): + An integer between 0 and 5 specifying the number of most likely tokens to return at each token + position, each with an associated log probability. logprobs must be set to true if this parameter is + used. + top_p (`float`, *optional*): + Fraction of the most likely next words to sample from. + Must be between 0 and 1. Defaults to 1.0. + tool_choice ([`ChatCompletionInputToolChoiceClass`] or [`ChatCompletionInputToolChoiceEnum`], *optional*): + The tool to use for the completion. Defaults to "auto". + tool_prompt (`str`, *optional*): + A prompt to be appended before the tools. + tools (List of [`ChatCompletionInputTool`], *optional*): + A list of tools the model may call. Currently, only functions are supported as a tool. Use this to + provide a list of functions the model may generate JSON inputs for. + extra_body (`Dict`, *optional*): + Additional provider-specific parameters to pass to the model. Refer to the provider's documentation + for supported parameters. + Returns: + [`ChatCompletionOutput`] or Iterable of [`ChatCompletionStreamOutput`]: + Generated text returned from the server: + - if `stream=False`, the generated text is returned as a [`ChatCompletionOutput`] (default). + - if `stream=True`, the generated text is returned token by token as a sequence of [`ChatCompletionStreamOutput`]. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> messages = [{"role": "user", "content": "What is the capital of France?"}] + >>> client = AsyncInferenceClient("meta-llama/Meta-Llama-3-8B-Instruct") + >>> await client.chat_completion(messages, max_tokens=100) + ChatCompletionOutput( + choices=[ + ChatCompletionOutputComplete( + finish_reason='eos_token', + index=0, + message=ChatCompletionOutputMessage( + role='assistant', + content='The capital of France is Paris.', + name=None, + tool_calls=None + ), + logprobs=None + ) + ], + created=1719907176, + id='', + model='meta-llama/Meta-Llama-3-8B-Instruct', + object='text_completion', + system_fingerprint='2.0.4-sha-f426a33', + usage=ChatCompletionOutputUsage( + completion_tokens=8, + prompt_tokens=17, + total_tokens=25 + ) + ) + ``` + + Example using streaming: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> messages = [{"role": "user", "content": "What is the capital of France?"}] + >>> client = AsyncInferenceClient("meta-llama/Meta-Llama-3-8B-Instruct") + >>> async for token in await client.chat_completion(messages, max_tokens=10, stream=True): + ... print(token) + ChatCompletionStreamOutput(choices=[ChatCompletionStreamOutputChoice(delta=ChatCompletionStreamOutputDelta(content='The', role='assistant'), index=0, finish_reason=None)], created=1710498504) + ChatCompletionStreamOutput(choices=[ChatCompletionStreamOutputChoice(delta=ChatCompletionStreamOutputDelta(content=' capital', role='assistant'), index=0, finish_reason=None)], created=1710498504) + (...) + ChatCompletionStreamOutput(choices=[ChatCompletionStreamOutputChoice(delta=ChatCompletionStreamOutputDelta(content=' may', role='assistant'), index=0, finish_reason=None)], created=1710498504) + ``` + + Example using OpenAI's syntax: + ```py + # Must be run in an async context + # instead of `from openai import OpenAI` + from huggingface_hub import AsyncInferenceClient + + # instead of `client = OpenAI(...)` + client = AsyncInferenceClient( + base_url=..., + api_key=..., + ) + + output = await client.chat.completions.create( + model="meta-llama/Meta-Llama-3-8B-Instruct", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Count to 10"}, + ], + stream=True, + max_tokens=1024, + ) + + for chunk in output: + print(chunk.choices[0].delta.content) + ``` + + Example using a third-party provider directly with extra (provider-specific) parameters. Usage will be billed on your Together AI account. + ```py + >>> from huggingface_hub import InferenceClient + >>> client = InferenceClient( + ... provider="together", # Use Together AI provider + ... api_key="", # Pass your Together API key directly + ... ) + >>> client.chat_completion( + ... model="meta-llama/Meta-Llama-3-8B-Instruct", + ... messages=[{"role": "user", "content": "What is the capital of France?"}], + ... extra_body={"safety_model": "Meta-Llama/Llama-Guard-7b"}, + ... ) + ``` + + Example using a third-party provider through Hugging Face Routing. Usage will be billed on your Hugging Face account. + ```py + >>> from huggingface_hub import InferenceClient + >>> client = InferenceClient( + ... provider="sambanova", # Use Sambanova provider + ... api_key="hf_...", # Pass your HF token + ... ) + >>> client.chat_completion( + ... model="meta-llama/Meta-Llama-3-8B-Instruct", + ... messages=[{"role": "user", "content": "What is the capital of France?"}], + ... ) + ``` + + Example using Image + Text as input: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + + # provide a remote URL + >>> image_url ="https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" + # or a base64-encoded image + >>> image_path = "/path/to/image.jpeg" + >>> with open(image_path, "rb") as f: + ... base64_image = base64.b64encode(f.read()).decode("utf-8") + >>> image_url = f"data:image/jpeg;base64,{base64_image}" + + >>> client = AsyncInferenceClient("meta-llama/Llama-3.2-11B-Vision-Instruct") + >>> output = await client.chat.completions.create( + ... messages=[ + ... { + ... "role": "user", + ... "content": [ + ... { + ... "type": "image_url", + ... "image_url": {"url": image_url}, + ... }, + ... { + ... "type": "text", + ... "text": "Describe this image in one sentence.", + ... }, + ... ], + ... }, + ... ], + ... ) + >>> output + The image depicts the iconic Statue of Liberty situated in New York Harbor, New York, on a clear day. + ``` + + Example using tools: + ```py + # Must be run in an async context + >>> client = AsyncInferenceClient("meta-llama/Meta-Llama-3-70B-Instruct") + >>> messages = [ + ... { + ... "role": "system", + ... "content": "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous.", + ... }, + ... { + ... "role": "user", + ... "content": "What's the weather like the next 3 days in San Francisco, CA?", + ... }, + ... ] + >>> tools = [ + ... { + ... "type": "function", + ... "function": { + ... "name": "get_current_weather", + ... "description": "Get the current weather", + ... "parameters": { + ... "type": "object", + ... "properties": { + ... "location": { + ... "type": "string", + ... "description": "The city and state, e.g. San Francisco, CA", + ... }, + ... "format": { + ... "type": "string", + ... "enum": ["celsius", "fahrenheit"], + ... "description": "The temperature unit to use. Infer this from the users location.", + ... }, + ... }, + ... "required": ["location", "format"], + ... }, + ... }, + ... }, + ... { + ... "type": "function", + ... "function": { + ... "name": "get_n_day_weather_forecast", + ... "description": "Get an N-day weather forecast", + ... "parameters": { + ... "type": "object", + ... "properties": { + ... "location": { + ... "type": "string", + ... "description": "The city and state, e.g. San Francisco, CA", + ... }, + ... "format": { + ... "type": "string", + ... "enum": ["celsius", "fahrenheit"], + ... "description": "The temperature unit to use. Infer this from the users location.", + ... }, + ... "num_days": { + ... "type": "integer", + ... "description": "The number of days to forecast", + ... }, + ... }, + ... "required": ["location", "format", "num_days"], + ... }, + ... }, + ... }, + ... ] + + >>> response = await client.chat_completion( + ... model="meta-llama/Meta-Llama-3-70B-Instruct", + ... messages=messages, + ... tools=tools, + ... tool_choice="auto", + ... max_tokens=500, + ... ) + >>> response.choices[0].message.tool_calls[0].function + ChatCompletionOutputFunctionDefinition( + arguments={ + 'location': 'San Francisco, CA', + 'format': 'fahrenheit', + 'num_days': 3 + }, + name='get_n_day_weather_forecast', + description=None + ) + ``` + + Example using response_format: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient("meta-llama/Meta-Llama-3-70B-Instruct") + >>> messages = [ + ... { + ... "role": "user", + ... "content": "I saw a puppy a cat and a raccoon during my bike ride in the park. What did I saw and when?", + ... }, + ... ] + >>> response_format = { + ... "type": "json", + ... "value": { + ... "properties": { + ... "location": {"type": "string"}, + ... "activity": {"type": "string"}, + ... "animals_seen": {"type": "integer", "minimum": 1, "maximum": 5}, + ... "animals": {"type": "array", "items": {"type": "string"}}, + ... }, + ... "required": ["location", "activity", "animals_seen", "animals"], + ... }, + ... } + >>> response = await client.chat_completion( + ... messages=messages, + ... response_format=response_format, + ... max_tokens=500, + ... ) + >>> response.choices[0].message.content + '{\n\n"activity": "bike ride",\n"animals": ["puppy", "cat", "raccoon"],\n"animals_seen": 3,\n"location": "park"}' + ``` + """ + # Since `chat_completion(..., model=xxx)` is also a payload parameter for the server, we need to handle 'model' differently. + # `self.model` takes precedence over 'model' argument for building URL. + # `model` takes precedence for payload value. + model_id_or_url = self.model or model + payload_model = model or self.model + + # Get the provider helper + provider_helper = get_provider_helper( + self.provider, + task="conversational", + model=model_id_or_url + if model_id_or_url is not None and model_id_or_url.startswith(("http://", "https://")) + else payload_model, + ) + + # Prepare the payload + parameters = { + "model": payload_model, + "frequency_penalty": frequency_penalty, + "logit_bias": logit_bias, + "logprobs": logprobs, + "max_tokens": max_tokens, + "n": n, + "presence_penalty": presence_penalty, + "response_format": response_format, + "seed": seed, + "stop": stop, + "temperature": temperature, + "tool_choice": tool_choice, + "tool_prompt": tool_prompt, + "tools": tools, + "top_logprobs": top_logprobs, + "top_p": top_p, + "stream": stream, + "stream_options": stream_options, + **(extra_body or {}), + } + request_parameters = provider_helper.prepare_request( + inputs=messages, + parameters=parameters, + headers=self.headers, + model=model_id_or_url, + api_key=self.token, + ) + data = await self._inner_post(request_parameters, stream=stream) + + if stream: + return _async_stream_chat_completion_response(data) # type: ignore[arg-type] + + return ChatCompletionOutput.parse_obj_as_instance(data) # type: ignore[arg-type] + + async def document_question_answering( + self, + image: ContentT, + question: str, + *, + model: Optional[str] = None, + doc_stride: Optional[int] = None, + handle_impossible_answer: Optional[bool] = None, + lang: Optional[str] = None, + max_answer_len: Optional[int] = None, + max_question_len: Optional[int] = None, + max_seq_len: Optional[int] = None, + top_k: Optional[int] = None, + word_boxes: Optional[List[Union[List[float], str]]] = None, + ) -> List[DocumentQuestionAnsweringOutputElement]: + """ + Answer questions on document images. + + Args: + image (`Union[str, Path, bytes, BinaryIO]`): + The input image for the context. It can be raw bytes, an image file, or a URL to an online image. + question (`str`): + Question to be answered. + model (`str`, *optional*): + The model to use for the document question answering task. Can be a model ID hosted on the Hugging Face Hub or a URL to + a deployed Inference Endpoint. If not provided, the default recommended document question answering model will be used. + Defaults to None. + doc_stride (`int`, *optional*): + If the words in the document are too long to fit with the question for the model, it will be split in + several chunks with some overlap. This argument controls the size of that overlap. + handle_impossible_answer (`bool`, *optional*): + Whether to accept impossible as an answer + lang (`str`, *optional*): + Language to use while running OCR. Defaults to english. + max_answer_len (`int`, *optional*): + The maximum length of predicted answers (e.g., only answers with a shorter length are considered). + max_question_len (`int`, *optional*): + The maximum length of the question after tokenization. It will be truncated if needed. + max_seq_len (`int`, *optional*): + The maximum length of the total sentence (context + question) in tokens of each chunk passed to the + model. The context will be split in several chunks (using doc_stride as overlap) if needed. + top_k (`int`, *optional*): + The number of answers to return (will be chosen by order of likelihood). Can return less than top_k + answers if there are not enough options available within the context. + word_boxes (`List[Union[List[float], str`, *optional*): + A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR + step and use the provided bounding boxes instead. + Returns: + `List[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> await client.document_question_answering(image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png", question="What is the invoice number?") + [DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16)] + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id) + inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)} + request_parameters = provider_helper.prepare_request( + inputs=inputs, + parameters={ + "doc_stride": doc_stride, + "handle_impossible_answer": handle_impossible_answer, + "lang": lang, + "max_answer_len": max_answer_len, + "max_question_len": max_question_len, + "max_seq_len": max_seq_len, + "top_k": top_k, + "word_boxes": word_boxes, + }, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + return DocumentQuestionAnsweringOutputElement.parse_obj_as_list(response) + + async def feature_extraction( + self, + text: str, + *, + normalize: Optional[bool] = None, + prompt_name: Optional[str] = None, + truncate: Optional[bool] = None, + truncation_direction: Optional[Literal["Left", "Right"]] = None, + model: Optional[str] = None, + ) -> "np.ndarray": + """ + Generate embeddings for a given text. + + Args: + text (`str`): + The text to embed. + model (`str`, *optional*): + The model to use for the feature extraction task. Can be a model ID hosted on the Hugging Face Hub or a URL to + a deployed Inference Endpoint. If not provided, the default recommended feature extraction model will be used. + Defaults to None. + normalize (`bool`, *optional*): + Whether to normalize the embeddings or not. + Only available on server powered by Text-Embedding-Inference. + prompt_name (`str`, *optional*): + The name of the prompt that should be used by for encoding. If not set, no prompt will be applied. + Must be a key in the `Sentence Transformers` configuration `prompts` dictionary. + For example if ``prompt_name`` is "query" and the ``prompts`` is {"query": "query: ",...}, + then the sentence "What is the capital of France?" will be encoded as "query: What is the capital of France?" + because the prompt text will be prepended before any text to encode. + truncate (`bool`, *optional*): + Whether to truncate the embeddings or not. + Only available on server powered by Text-Embedding-Inference. + truncation_direction (`Literal["Left", "Right"]`, *optional*): + Which side of the input should be truncated when `truncate=True` is passed. + + Returns: + `np.ndarray`: The embedding representing the input text as a float32 numpy array. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> await client.feature_extraction("Hi, who are you?") + array([[ 2.424802 , 2.93384 , 1.1750331 , ..., 1.240499, -0.13776633, -0.7889173 ], + [-0.42943227, -0.6364878 , -1.693462 , ..., 0.41978157, -2.4336355 , 0.6162071 ], + ..., + [ 0.28552425, -0.928395 , -1.2077185 , ..., 0.76810825, -2.1069427 , 0.6236161 ]], dtype=float32) + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="feature-extraction", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=text, + parameters={ + "normalize": normalize, + "prompt_name": prompt_name, + "truncate": truncate, + "truncation_direction": truncation_direction, + }, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + np = _import_numpy() + return np.array(provider_helper.get_response(response), dtype="float32") + + async def fill_mask( + self, + text: str, + *, + model: Optional[str] = None, + targets: Optional[List[str]] = None, + top_k: Optional[int] = None, + ) -> List[FillMaskOutputElement]: + """ + Fill in a hole with a missing word (token to be precise). + + Args: + text (`str`): + a string to be filled from, must contain the [MASK] token (check model card for exact name of the mask). + model (`str`, *optional*): + The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to + a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used. + targets (`List[str`, *optional*): + When passed, the model will limit the scores to the passed targets instead of looking up in the whole + vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first + resulting token will be used (with a warning, and that might be slower). + top_k (`int`, *optional*): + When passed, overrides the number of predictions to return. + Returns: + `List[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated + probability, token reference, and completed text. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> await client.fill_mask("The goal of life is .") + [ + FillMaskOutputElement(score=0.06897063553333282, token=11098, token_str=' happiness', sequence='The goal of life is happiness.'), + FillMaskOutputElement(score=0.06554922461509705, token=45075, token_str=' immortality', sequence='The goal of life is immortality.') + ] + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="fill-mask", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=text, + parameters={"targets": targets, "top_k": top_k}, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + return FillMaskOutputElement.parse_obj_as_list(response) + + async def image_classification( + self, + image: ContentT, + *, + model: Optional[str] = None, + function_to_apply: Optional["ImageClassificationOutputTransform"] = None, + top_k: Optional[int] = None, + ) -> List[ImageClassificationOutputElement]: + """ + Perform image classification on the given image using the specified model. + + Args: + image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`): + The image to classify. It can be raw bytes, an image file, a URL to an online image, or a PIL Image. + model (`str`, *optional*): + The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a + deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used. + function_to_apply (`"ImageClassificationOutputTransform"`, *optional*): + The function to apply to the model outputs in order to retrieve the scores. + top_k (`int`, *optional*): + When specified, limits the output to the top K most probable classes. + Returns: + `List[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> await client.image_classification("https://upload.wikimedia.org/wikipedia/commons/thumb/4/43/Cute_dog.jpg/320px-Cute_dog.jpg") + [ImageClassificationOutputElement(label='Blenheim spaniel', score=0.9779096841812134), ...] + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="image-classification", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=image, + parameters={"function_to_apply": function_to_apply, "top_k": top_k}, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + return ImageClassificationOutputElement.parse_obj_as_list(response) + + async def image_segmentation( + self, + image: ContentT, + *, + model: Optional[str] = None, + mask_threshold: Optional[float] = None, + overlap_mask_area_threshold: Optional[float] = None, + subtask: Optional["ImageSegmentationSubtask"] = None, + threshold: Optional[float] = None, + ) -> List[ImageSegmentationOutputElement]: + """ + Perform image segmentation on the given image using the specified model. + + > [!WARNING] + > You must have `PIL` installed if you want to work with images (`pip install Pillow`). + + Args: + image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`): + The image to segment. It can be raw bytes, an image file, a URL to an online image, or a PIL Image. + model (`str`, *optional*): + The model to use for image segmentation. Can be a model ID hosted on the Hugging Face Hub or a URL to a + deployed Inference Endpoint. If not provided, the default recommended model for image segmentation will be used. + mask_threshold (`float`, *optional*): + Threshold to use when turning the predicted masks into binary values. + overlap_mask_area_threshold (`float`, *optional*): + Mask overlap threshold to eliminate small, disconnected segments. + subtask (`"ImageSegmentationSubtask"`, *optional*): + Segmentation task to be performed, depending on model capabilities. + threshold (`float`, *optional*): + Probability threshold to filter out predicted masks. + Returns: + `List[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> await client.image_segmentation("cat.jpg") + [ImageSegmentationOutputElement(score=0.989008, label='LABEL_184', mask=), ...] + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="image-segmentation", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=image, + parameters={ + "mask_threshold": mask_threshold, + "overlap_mask_area_threshold": overlap_mask_area_threshold, + "subtask": subtask, + "threshold": threshold, + }, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + output = ImageSegmentationOutputElement.parse_obj_as_list(response) + for item in output: + item.mask = _b64_to_image(item.mask) # type: ignore [assignment] + return output + + async def image_to_image( + self, + image: ContentT, + prompt: Optional[str] = None, + *, + negative_prompt: Optional[str] = None, + num_inference_steps: Optional[int] = None, + guidance_scale: Optional[float] = None, + model: Optional[str] = None, + target_size: Optional[ImageToImageTargetSize] = None, + **kwargs, + ) -> "Image": + """ + Perform image-to-image translation using a specified model. + + > [!WARNING] + > You must have `PIL` installed if you want to work with images (`pip install Pillow`). + + Args: + image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`): + The input image for translation. It can be raw bytes, an image file, a URL to an online image, or a PIL Image. + prompt (`str`, *optional*): + The text prompt to guide the image generation. + negative_prompt (`str`, *optional*): + One prompt to guide what NOT to include in image generation. + num_inference_steps (`int`, *optional*): + For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher + quality image at the expense of slower inference. + guidance_scale (`float`, *optional*): + For diffusion models. A higher guidance scale value encourages the model to generate images closely + linked to the text prompt at the expense of lower image quality. + model (`str`, *optional*): + The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed + Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None. + target_size (`ImageToImageTargetSize`, *optional*): + The size in pixels of the output image. This parameter is only supported by some providers and for + specific models. It will be ignored when unsupported. + + Returns: + `Image`: The translated image. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> image = await client.image_to_image("cat.jpg", prompt="turn the cat into a tiger") + >>> image.save("tiger.jpg") + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="image-to-image", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=image, + parameters={ + "prompt": prompt, + "negative_prompt": negative_prompt, + "target_size": target_size, + "num_inference_steps": num_inference_steps, + "guidance_scale": guidance_scale, + **kwargs, + }, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + response = provider_helper.get_response(response, request_parameters) + return _bytes_to_image(response) + + async def image_to_video( + self, + image: ContentT, + *, + model: Optional[str] = None, + prompt: Optional[str] = None, + negative_prompt: Optional[str] = None, + num_frames: Optional[float] = None, + num_inference_steps: Optional[int] = None, + guidance_scale: Optional[float] = None, + seed: Optional[int] = None, + target_size: Optional[ImageToVideoTargetSize] = None, + **kwargs, + ) -> bytes: + """ + Generate a video from an input image. + + Args: + image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`): + The input image to generate a video from. It can be raw bytes, an image file, a URL to an online image, or a PIL Image. + model (`str`, *optional*): + The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed + Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None. + prompt (`str`, *optional*): + The text prompt to guide the video generation. + negative_prompt (`str`, *optional*): + One prompt to guide what NOT to include in video generation. + num_frames (`float`, *optional*): + The num_frames parameter determines how many video frames are generated. + num_inference_steps (`int`, *optional*): + For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher + quality image at the expense of slower inference. + guidance_scale (`float`, *optional*): + For diffusion models. A higher guidance scale value encourages the model to generate videos closely + linked to the text prompt at the expense of lower image quality. + seed (`int`, *optional*): + The seed to use for the video generation. + target_size (`ImageToVideoTargetSize`, *optional*): + The size in pixel of the output video frames. + num_inference_steps (`int`, *optional*): + The number of denoising steps. More denoising steps usually lead to a higher quality video at the + expense of slower inference. + seed (`int`, *optional*): + Seed for the random number generator. + + Returns: + `bytes`: The generated video. + + Examples: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> video = await client.image_to_video("cat.jpg", model="Wan-AI/Wan2.2-I2V-A14B", prompt="turn the cat into a tiger") + >>> with open("tiger.mp4", "wb") as f: + ... f.write(video) + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="image-to-video", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=image, + parameters={ + "prompt": prompt, + "negative_prompt": negative_prompt, + "num_frames": num_frames, + "num_inference_steps": num_inference_steps, + "guidance_scale": guidance_scale, + "seed": seed, + "target_size": target_size, + **kwargs, + }, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + response = provider_helper.get_response(response, request_parameters) + return response + + async def image_to_text(self, image: ContentT, *, model: Optional[str] = None) -> ImageToTextOutput: + """ + Takes an input image and return text. + + Models can have very different outputs depending on your use case (image captioning, optical character recognition + (OCR), Pix2Struct, etc). Please have a look to the model card to learn more about a model's specificities. + + Args: + image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`): + The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image. + model (`str`, *optional*): + The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed + Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None. + + Returns: + [`ImageToTextOutput`]: The generated text. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> await client.image_to_text("cat.jpg") + 'a cat standing in a grassy field ' + >>> await client.image_to_text("https://upload.wikimedia.org/wikipedia/commons/thumb/4/43/Cute_dog.jpg/320px-Cute_dog.jpg") + 'a dog laying on the grass next to a flower pot ' + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="image-to-text", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=image, + parameters={}, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + output_list: List[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response) + return output_list[0] + + async def object_detection( + self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None + ) -> List[ObjectDetectionOutputElement]: + """ + Perform object detection on the given image using the specified model. + + > [!WARNING] + > You must have `PIL` installed if you want to work with images (`pip install Pillow`). + + Args: + image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`): + The image to detect objects on. It can be raw bytes, an image file, a URL to an online image, or a PIL Image. + model (`str`, *optional*): + The model to use for object detection. Can be a model ID hosted on the Hugging Face Hub or a URL to a + deployed Inference Endpoint. If not provided, the default recommended model for object detection (DETR) will be used. + threshold (`float`, *optional*): + The probability necessary to make a prediction. + Returns: + `List[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + `ValueError`: + If the request output is not a List. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> await client.object_detection("people.jpg") + [ObjectDetectionOutputElement(score=0.9486683011054993, label='person', box=ObjectDetectionBoundingBox(xmin=59, ymin=39, xmax=420, ymax=510)), ...] + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="object-detection", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=image, + parameters={"threshold": threshold}, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + return ObjectDetectionOutputElement.parse_obj_as_list(response) + + async def question_answering( + self, + question: str, + context: str, + *, + model: Optional[str] = None, + align_to_words: Optional[bool] = None, + doc_stride: Optional[int] = None, + handle_impossible_answer: Optional[bool] = None, + max_answer_len: Optional[int] = None, + max_question_len: Optional[int] = None, + max_seq_len: Optional[int] = None, + top_k: Optional[int] = None, + ) -> Union[QuestionAnsweringOutputElement, List[QuestionAnsweringOutputElement]]: + """ + Retrieve the answer to a question from a given text. + + Args: + question (`str`): + Question to be answered. + context (`str`): + The context of the question. + model (`str`): + The model to use for the question answering task. Can be a model ID hosted on the Hugging Face Hub or a URL to + a deployed Inference Endpoint. + align_to_words (`bool`, *optional*): + Attempts to align the answer to real words. Improves quality on space separated languages. Might hurt + on non-space-separated languages (like Japanese or Chinese) + doc_stride (`int`, *optional*): + If the context is too long to fit with the question for the model, it will be split in several chunks + with some overlap. This argument controls the size of that overlap. + handle_impossible_answer (`bool`, *optional*): + Whether to accept impossible as an answer. + max_answer_len (`int`, *optional*): + The maximum length of predicted answers (e.g., only answers with a shorter length are considered). + max_question_len (`int`, *optional*): + The maximum length of the question after tokenization. It will be truncated if needed. + max_seq_len (`int`, *optional*): + The maximum length of the total sentence (context + question) in tokens of each chunk passed to the + model. The context will be split in several chunks (using docStride as overlap) if needed. + top_k (`int`, *optional*): + The number of answers to return (will be chosen by order of likelihood). Note that we return less than + topk answers if there are not enough options available within the context. + + Returns: + Union[`QuestionAnsweringOutputElement`, List[`QuestionAnsweringOutputElement`]]: + When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`. + When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`. + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> await client.question_answering(question="What's my name?", context="My name is Clara and I live in Berkeley.") + QuestionAnsweringOutputElement(answer='Clara', end=16, score=0.9326565265655518, start=11) + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="question-answering", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs={"question": question, "context": context}, + parameters={ + "align_to_words": align_to_words, + "doc_stride": doc_stride, + "handle_impossible_answer": handle_impossible_answer, + "max_answer_len": max_answer_len, + "max_question_len": max_question_len, + "max_seq_len": max_seq_len, + "top_k": top_k, + }, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + # Parse the response as a single `QuestionAnsweringOutputElement` when top_k is 1 or not provided, or a list of `QuestionAnsweringOutputElement` to ensure backward compatibility. + output = QuestionAnsweringOutputElement.parse_obj(response) + return output + + async def sentence_similarity( + self, sentence: str, other_sentences: List[str], *, model: Optional[str] = None + ) -> List[float]: + """ + Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings. + + Args: + sentence (`str`): + The main sentence to compare to others. + other_sentences (`List[str]`): + The list of sentences to compare to. + model (`str`, *optional*): + The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to + a deployed Inference Endpoint. If not provided, the default recommended sentence similarity model will be used. + Defaults to None. + + Returns: + `List[float]`: The similarity scores between the main sentence and the given comparison sentences. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> await client.sentence_similarity( + ... "Machine learning is so easy.", + ... other_sentences=[ + ... "Deep learning is so straightforward.", + ... "This is so difficult, like rocket science.", + ... "I can't believe how much I struggled with this.", + ... ], + ... ) + [0.7785726189613342, 0.45876261591911316, 0.2906220555305481] + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="sentence-similarity", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs={"source_sentence": sentence, "sentences": other_sentences}, + parameters={}, + extra_payload={}, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + return _bytes_to_list(response) + + async def summarization( + self, + text: str, + *, + model: Optional[str] = None, + clean_up_tokenization_spaces: Optional[bool] = None, + generate_parameters: Optional[Dict[str, Any]] = None, + truncation: Optional["SummarizationTruncationStrategy"] = None, + ) -> SummarizationOutput: + """ + Generate a summary of a given text using a specified model. + + Args: + text (`str`): + The input text to summarize. + model (`str`, *optional*): + The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed + Inference Endpoint. If not provided, the default recommended model for summarization will be used. + clean_up_tokenization_spaces (`bool`, *optional*): + Whether to clean up the potential extra spaces in the text output. + generate_parameters (`Dict[str, Any]`, *optional*): + Additional parametrization of the text generation algorithm. + truncation (`"SummarizationTruncationStrategy"`, *optional*): + The truncation strategy to use. + Returns: + [`SummarizationOutput`]: The generated summary text. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> await client.summarization("The Eiffel tower...") + SummarizationOutput(generated_text="The Eiffel tower is one of the most famous landmarks in the world....") + ``` + """ + parameters = { + "clean_up_tokenization_spaces": clean_up_tokenization_spaces, + "generate_parameters": generate_parameters, + "truncation": truncation, + } + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="summarization", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=text, + parameters=parameters, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + return SummarizationOutput.parse_obj_as_list(response)[0] + + async def table_question_answering( + self, + table: Dict[str, Any], + query: str, + *, + model: Optional[str] = None, + padding: Optional["Padding"] = None, + sequential: Optional[bool] = None, + truncation: Optional[bool] = None, + ) -> TableQuestionAnsweringOutputElement: + """ + Retrieve the answer to a question from information given in a table. + + Args: + table (`str`): + A table of data represented as a dict of lists where entries are headers and the lists are all the + values, all lists must have the same size. + query (`str`): + The query in plain text that you want to ask the table. + model (`str`): + The model to use for the table-question-answering task. Can be a model ID hosted on the Hugging Face + Hub or a URL to a deployed Inference Endpoint. + padding (`"Padding"`, *optional*): + Activates and controls padding. + sequential (`bool`, *optional*): + Whether to do inference sequentially or as a batch. Batching is faster, but models like SQA require the + inference to be done sequentially to extract relations within sequences, given their conversational + nature. + truncation (`bool`, *optional*): + Activates and controls truncation. + + Returns: + [`TableQuestionAnsweringOutputElement`]: a table question answering output containing the answer, coordinates, cells and the aggregator used. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> query = "How many stars does the transformers repository have?" + >>> table = {"Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"]} + >>> await client.table_question_answering(table, query, model="google/tapas-base-finetuned-wtq") + TableQuestionAnsweringOutputElement(answer='36542', coordinates=[[0, 1]], cells=['36542'], aggregator='AVERAGE') + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="table-question-answering", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs={"query": query, "table": table}, + parameters={"model": model, "padding": padding, "sequential": sequential, "truncation": truncation}, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response) + + async def tabular_classification(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[str]: + """ + Classifying a target category (a group) based on a set of attributes. + + Args: + table (`Dict[str, Any]`): + Set of attributes to classify. + model (`str`, *optional*): + The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to + a deployed Inference Endpoint. If not provided, the default recommended tabular classification model will be used. + Defaults to None. + + Returns: + `List`: a list of labels, one per row in the initial table. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> table = { + ... "fixed_acidity": ["7.4", "7.8", "10.3"], + ... "volatile_acidity": ["0.7", "0.88", "0.32"], + ... "citric_acid": ["0", "0", "0.45"], + ... "residual_sugar": ["1.9", "2.6", "6.4"], + ... "chlorides": ["0.076", "0.098", "0.073"], + ... "free_sulfur_dioxide": ["11", "25", "5"], + ... "total_sulfur_dioxide": ["34", "67", "13"], + ... "density": ["0.9978", "0.9968", "0.9976"], + ... "pH": ["3.51", "3.2", "3.23"], + ... "sulphates": ["0.56", "0.68", "0.82"], + ... "alcohol": ["9.4", "9.8", "12.6"], + ... } + >>> await client.tabular_classification(table=table, model="julien-c/wine-quality") + ["5", "5", "5"] + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="tabular-classification", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=None, + extra_payload={"table": table}, + parameters={}, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + return _bytes_to_list(response) + + async def tabular_regression(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[float]: + """ + Predicting a numerical target value given a set of attributes/features in a table. + + Args: + table (`Dict[str, Any]`): + Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical. + model (`str`, *optional*): + The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to + a deployed Inference Endpoint. If not provided, the default recommended tabular regression model will be used. + Defaults to None. + + Returns: + `List`: a list of predicted numerical target values. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> table = { + ... "Height": ["11.52", "12.48", "12.3778"], + ... "Length1": ["23.2", "24", "23.9"], + ... "Length2": ["25.4", "26.3", "26.5"], + ... "Length3": ["30", "31.2", "31.1"], + ... "Species": ["Bream", "Bream", "Bream"], + ... "Width": ["4.02", "4.3056", "4.6961"], + ... } + >>> await client.tabular_regression(table, model="scikit-learn/Fish-Weight") + [110, 120, 130] + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="tabular-regression", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=None, + parameters={}, + extra_payload={"table": table}, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + return _bytes_to_list(response) + + async def text_classification( + self, + text: str, + *, + model: Optional[str] = None, + top_k: Optional[int] = None, + function_to_apply: Optional["TextClassificationOutputTransform"] = None, + ) -> List[TextClassificationOutputElement]: + """ + Perform text classification (e.g. sentiment-analysis) on the given text. + + Args: + text (`str`): + A string to be classified. + model (`str`, *optional*): + The model to use for the text classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to + a deployed Inference Endpoint. If not provided, the default recommended text classification model will be used. + Defaults to None. + top_k (`int`, *optional*): + When specified, limits the output to the top K most probable classes. + function_to_apply (`"TextClassificationOutputTransform"`, *optional*): + The function to apply to the model outputs in order to retrieve the scores. + + Returns: + `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> await client.text_classification("I like you") + [ + TextClassificationOutputElement(label='POSITIVE', score=0.9998695850372314), + TextClassificationOutputElement(label='NEGATIVE', score=0.0001304351753788069), + ] + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="text-classification", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=text, + parameters={ + "function_to_apply": function_to_apply, + "top_k": top_k, + }, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + return TextClassificationOutputElement.parse_obj_as_list(response)[0] # type: ignore [return-value] + + @overload + async def text_generation( + self, + prompt: str, + *, + details: Literal[True], + stream: Literal[True], + model: Optional[str] = None, + # Parameters from `TextGenerationInputGenerateParameters` (maintained manually) + adapter_id: Optional[str] = None, + best_of: Optional[int] = None, + decoder_input_details: Optional[bool] = None, + do_sample: Optional[bool] = None, + frequency_penalty: Optional[float] = None, + grammar: Optional[TextGenerationInputGrammarType] = None, + max_new_tokens: Optional[int] = None, + repetition_penalty: Optional[float] = None, + return_full_text: Optional[bool] = None, + seed: Optional[int] = None, + stop: Optional[List[str]] = None, + stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead + temperature: Optional[float] = None, + top_k: Optional[int] = None, + top_n_tokens: Optional[int] = None, + top_p: Optional[float] = None, + truncate: Optional[int] = None, + typical_p: Optional[float] = None, + watermark: Optional[bool] = None, + ) -> AsyncIterable[TextGenerationStreamOutput]: ... + + @overload + async def text_generation( + self, + prompt: str, + *, + details: Literal[True], + stream: Optional[Literal[False]] = None, + model: Optional[str] = None, + # Parameters from `TextGenerationInputGenerateParameters` (maintained manually) + adapter_id: Optional[str] = None, + best_of: Optional[int] = None, + decoder_input_details: Optional[bool] = None, + do_sample: Optional[bool] = None, + frequency_penalty: Optional[float] = None, + grammar: Optional[TextGenerationInputGrammarType] = None, + max_new_tokens: Optional[int] = None, + repetition_penalty: Optional[float] = None, + return_full_text: Optional[bool] = None, + seed: Optional[int] = None, + stop: Optional[List[str]] = None, + stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead + temperature: Optional[float] = None, + top_k: Optional[int] = None, + top_n_tokens: Optional[int] = None, + top_p: Optional[float] = None, + truncate: Optional[int] = None, + typical_p: Optional[float] = None, + watermark: Optional[bool] = None, + ) -> TextGenerationOutput: ... + + @overload + async def text_generation( + self, + prompt: str, + *, + details: Optional[Literal[False]] = None, + stream: Literal[True], + model: Optional[str] = None, + # Parameters from `TextGenerationInputGenerateParameters` (maintained manually) + adapter_id: Optional[str] = None, + best_of: Optional[int] = None, + decoder_input_details: Optional[bool] = None, + do_sample: Optional[bool] = None, + frequency_penalty: Optional[float] = None, + grammar: Optional[TextGenerationInputGrammarType] = None, + max_new_tokens: Optional[int] = None, + repetition_penalty: Optional[float] = None, + return_full_text: Optional[bool] = None, # Manual default value + seed: Optional[int] = None, + stop: Optional[List[str]] = None, + stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead + temperature: Optional[float] = None, + top_k: Optional[int] = None, + top_n_tokens: Optional[int] = None, + top_p: Optional[float] = None, + truncate: Optional[int] = None, + typical_p: Optional[float] = None, + watermark: Optional[bool] = None, + ) -> AsyncIterable[str]: ... + + @overload + async def text_generation( + self, + prompt: str, + *, + details: Optional[Literal[False]] = None, + stream: Optional[Literal[False]] = None, + model: Optional[str] = None, + # Parameters from `TextGenerationInputGenerateParameters` (maintained manually) + adapter_id: Optional[str] = None, + best_of: Optional[int] = None, + decoder_input_details: Optional[bool] = None, + do_sample: Optional[bool] = None, + frequency_penalty: Optional[float] = None, + grammar: Optional[TextGenerationInputGrammarType] = None, + max_new_tokens: Optional[int] = None, + repetition_penalty: Optional[float] = None, + return_full_text: Optional[bool] = None, + seed: Optional[int] = None, + stop: Optional[List[str]] = None, + stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead + temperature: Optional[float] = None, + top_k: Optional[int] = None, + top_n_tokens: Optional[int] = None, + top_p: Optional[float] = None, + truncate: Optional[int] = None, + typical_p: Optional[float] = None, + watermark: Optional[bool] = None, + ) -> str: ... + + @overload + async def text_generation( + self, + prompt: str, + *, + details: Optional[bool] = None, + stream: Optional[bool] = None, + model: Optional[str] = None, + # Parameters from `TextGenerationInputGenerateParameters` (maintained manually) + adapter_id: Optional[str] = None, + best_of: Optional[int] = None, + decoder_input_details: Optional[bool] = None, + do_sample: Optional[bool] = None, + frequency_penalty: Optional[float] = None, + grammar: Optional[TextGenerationInputGrammarType] = None, + max_new_tokens: Optional[int] = None, + repetition_penalty: Optional[float] = None, + return_full_text: Optional[bool] = None, + seed: Optional[int] = None, + stop: Optional[List[str]] = None, + stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead + temperature: Optional[float] = None, + top_k: Optional[int] = None, + top_n_tokens: Optional[int] = None, + top_p: Optional[float] = None, + truncate: Optional[int] = None, + typical_p: Optional[float] = None, + watermark: Optional[bool] = None, + ) -> Union[str, TextGenerationOutput, AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]: ... + + async def text_generation( + self, + prompt: str, + *, + details: Optional[bool] = None, + stream: Optional[bool] = None, + model: Optional[str] = None, + # Parameters from `TextGenerationInputGenerateParameters` (maintained manually) + adapter_id: Optional[str] = None, + best_of: Optional[int] = None, + decoder_input_details: Optional[bool] = None, + do_sample: Optional[bool] = None, + frequency_penalty: Optional[float] = None, + grammar: Optional[TextGenerationInputGrammarType] = None, + max_new_tokens: Optional[int] = None, + repetition_penalty: Optional[float] = None, + return_full_text: Optional[bool] = None, + seed: Optional[int] = None, + stop: Optional[List[str]] = None, + stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead + temperature: Optional[float] = None, + top_k: Optional[int] = None, + top_n_tokens: Optional[int] = None, + top_p: Optional[float] = None, + truncate: Optional[int] = None, + typical_p: Optional[float] = None, + watermark: Optional[bool] = None, + ) -> Union[str, TextGenerationOutput, AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]: + """ + Given a prompt, generate the following text. + + > [!TIP] + > If you want to generate a response from chat messages, you should use the [`InferenceClient.chat_completion`] method. + > It accepts a list of messages instead of a single text prompt and handles the chat templating for you. + + Args: + prompt (`str`): + Input text. + details (`bool`, *optional*): + By default, text_generation returns a string. Pass `details=True` if you want a detailed output (tokens, + probabilities, seed, finish reason, etc.). Only available for models running on with the + `text-generation-inference` backend. + stream (`bool`, *optional*): + By default, text_generation returns the full generated text. Pass `stream=True` if you want a stream of + tokens to be returned. Only available for models running on with the `text-generation-inference` + backend. + model (`str`, *optional*): + The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed + Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None. + adapter_id (`str`, *optional*): + Lora adapter id. + best_of (`int`, *optional*): + Generate best_of sequences and return the one if the highest token logprobs. + decoder_input_details (`bool`, *optional*): + Return the decoder input token logprobs and ids. You must set `details=True` as well for it to be taken + into account. Defaults to `False`. + do_sample (`bool`, *optional*): + Activate logits sampling + frequency_penalty (`float`, *optional*): + Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in + the text so far, decreasing the model's likelihood to repeat the same line verbatim. + grammar ([`TextGenerationInputGrammarType`], *optional*): + Grammar constraints. Can be either a JSONSchema or a regex. + max_new_tokens (`int`, *optional*): + Maximum number of generated tokens. Defaults to 100. + repetition_penalty (`float`, *optional*): + The parameter for repetition penalty. 1.0 means no penalty. See [this + paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. + return_full_text (`bool`, *optional*): + Whether to prepend the prompt to the generated text + seed (`int`, *optional*): + Random sampling seed + stop (`List[str]`, *optional*): + Stop generating tokens if a member of `stop` is generated. + stop_sequences (`List[str]`, *optional*): + Deprecated argument. Use `stop` instead. + temperature (`float`, *optional*): + The value used to module the logits distribution. + top_n_tokens (`int`, *optional*): + Return information about the `top_n_tokens` most likely tokens at each generation step, instead of + just the sampled token. + top_k (`int`, *optional`): + The number of highest probability vocabulary tokens to keep for top-k-filtering. + top_p (`float`, *optional`): + If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or + higher are kept for generation. + truncate (`int`, *optional`): + Truncate inputs tokens to the given size. + typical_p (`float`, *optional`): + Typical Decoding mass + See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information + watermark (`bool`, *optional*): + Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) + + Returns: + `Union[str, TextGenerationOutput, Iterable[str], Iterable[TextGenerationStreamOutput]]`: + Generated text returned from the server: + - if `stream=False` and `details=False`, the generated text is returned as a `str` (default) + - if `stream=True` and `details=False`, the generated text is returned token by token as a `Iterable[str]` + - if `stream=False` and `details=True`, the generated text is returned with more details as a [`~huggingface_hub.TextGenerationOutput`] + - if `details=True` and `stream=True`, the generated text is returned token by token as a iterable of [`~huggingface_hub.TextGenerationStreamOutput`] + + Raises: + `ValidationError`: + If input values are not valid. No HTTP call is made to the server. + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + + # Case 1: generate text + >>> await client.text_generation("The huggingface_hub library is ", max_new_tokens=12) + '100% open source and built to be easy to use.' + + # Case 2: iterate over the generated tokens. Useful for large generation. + >>> async for token in await client.text_generation("The huggingface_hub library is ", max_new_tokens=12, stream=True): + ... print(token) + 100 + % + open + source + and + built + to + be + easy + to + use + . + + # Case 3: get more details about the generation process. + >>> await client.text_generation("The huggingface_hub library is ", max_new_tokens=12, details=True) + TextGenerationOutput( + generated_text='100% open source and built to be easy to use.', + details=TextGenerationDetails( + finish_reason='length', + generated_tokens=12, + seed=None, + prefill=[ + TextGenerationPrefillOutputToken(id=487, text='The', logprob=None), + TextGenerationPrefillOutputToken(id=53789, text=' hugging', logprob=-13.171875), + (...) + TextGenerationPrefillOutputToken(id=204, text=' ', logprob=-7.0390625) + ], + tokens=[ + TokenElement(id=1425, text='100', logprob=-1.0175781, special=False), + TokenElement(id=16, text='%', logprob=-0.0463562, special=False), + (...) + TokenElement(id=25, text='.', logprob=-0.5703125, special=False) + ], + best_of_sequences=None + ) + ) + + # Case 4: iterate over the generated tokens with more details. + # Last object is more complete, containing the full generated text and the finish reason. + >>> async for details in await client.text_generation("The huggingface_hub library is ", max_new_tokens=12, details=True, stream=True): + ... print(details) + ... + TextGenerationStreamOutput(token=TokenElement(id=1425, text='100', logprob=-1.0175781, special=False), generated_text=None, details=None) + TextGenerationStreamOutput(token=TokenElement(id=16, text='%', logprob=-0.0463562, special=False), generated_text=None, details=None) + TextGenerationStreamOutput(token=TokenElement(id=1314, text=' open', logprob=-1.3359375, special=False), generated_text=None, details=None) + TextGenerationStreamOutput(token=TokenElement(id=3178, text=' source', logprob=-0.28100586, special=False), generated_text=None, details=None) + TextGenerationStreamOutput(token=TokenElement(id=273, text=' and', logprob=-0.5961914, special=False), generated_text=None, details=None) + TextGenerationStreamOutput(token=TokenElement(id=3426, text=' built', logprob=-1.9423828, special=False), generated_text=None, details=None) + TextGenerationStreamOutput(token=TokenElement(id=271, text=' to', logprob=-1.4121094, special=False), generated_text=None, details=None) + TextGenerationStreamOutput(token=TokenElement(id=314, text=' be', logprob=-1.5224609, special=False), generated_text=None, details=None) + TextGenerationStreamOutput(token=TokenElement(id=1833, text=' easy', logprob=-2.1132812, special=False), generated_text=None, details=None) + TextGenerationStreamOutput(token=TokenElement(id=271, text=' to', logprob=-0.08520508, special=False), generated_text=None, details=None) + TextGenerationStreamOutput(token=TokenElement(id=745, text=' use', logprob=-0.39453125, special=False), generated_text=None, details=None) + TextGenerationStreamOutput(token=TokenElement( + id=25, + text='.', + logprob=-0.5703125, + special=False), + generated_text='100% open source and built to be easy to use.', + details=TextGenerationStreamOutputStreamDetails(finish_reason='length', generated_tokens=12, seed=None) + ) + + # Case 5: generate constrained output using grammar + >>> response = await client.text_generation( + ... prompt="I saw a puppy a cat and a raccoon during my bike ride in the park", + ... model="HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1", + ... max_new_tokens=100, + ... repetition_penalty=1.3, + ... grammar={ + ... "type": "json", + ... "value": { + ... "properties": { + ... "location": {"type": "string"}, + ... "activity": {"type": "string"}, + ... "animals_seen": {"type": "integer", "minimum": 1, "maximum": 5}, + ... "animals": {"type": "array", "items": {"type": "string"}}, + ... }, + ... "required": ["location", "activity", "animals_seen", "animals"], + ... }, + ... }, + ... ) + >>> json.loads(response) + { + "activity": "bike riding", + "animals": ["puppy", "cat", "raccoon"], + "animals_seen": 3, + "location": "park" + } + ``` + """ + if decoder_input_details and not details: + warnings.warn( + "`decoder_input_details=True` has been passed to the server but `details=False` is set meaning that" + " the output from the server will be truncated." + ) + decoder_input_details = False + + if stop_sequences is not None: + warnings.warn( + "`stop_sequences` is a deprecated argument for `text_generation` task" + " and will be removed in version '0.28.0'. Use `stop` instead.", + FutureWarning, + ) + if stop is None: + stop = stop_sequences # use deprecated arg if provided + + # Build payload + parameters = { + "adapter_id": adapter_id, + "best_of": best_of, + "decoder_input_details": decoder_input_details, + "details": details, + "do_sample": do_sample, + "frequency_penalty": frequency_penalty, + "grammar": grammar, + "max_new_tokens": max_new_tokens, + "repetition_penalty": repetition_penalty, + "return_full_text": return_full_text, + "seed": seed, + "stop": stop, + "temperature": temperature, + "top_k": top_k, + "top_n_tokens": top_n_tokens, + "top_p": top_p, + "truncate": truncate, + "typical_p": typical_p, + "watermark": watermark, + } + + # Remove some parameters if not a TGI server + unsupported_kwargs = _get_unsupported_text_generation_kwargs(model) + if len(unsupported_kwargs) > 0: + # The server does not support some parameters + # => means it is not a TGI server + # => remove unsupported parameters and warn the user + + ignored_parameters = [] + for key in unsupported_kwargs: + if parameters.get(key): + ignored_parameters.append(key) + parameters.pop(key, None) + if len(ignored_parameters) > 0: + warnings.warn( + "API endpoint/model for text-generation is not served via TGI. Ignoring following parameters:" + f" {', '.join(ignored_parameters)}.", + UserWarning, + ) + if details: + warnings.warn( + "API endpoint/model for text-generation is not served via TGI. Parameter `details=True` will" + " be ignored meaning only the generated text will be returned.", + UserWarning, + ) + details = False + if stream: + raise ValueError( + "API endpoint/model for text-generation is not served via TGI. Cannot return output as a stream." + " Please pass `stream=False` as input." + ) + + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="text-generation", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=prompt, + parameters=parameters, + extra_payload={"stream": stream}, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + + # Handle errors separately for more precise error messages + try: + bytes_output = await self._inner_post(request_parameters, stream=stream or False) + except _import_aiohttp().ClientResponseError as e: + match = MODEL_KWARGS_NOT_USED_REGEX.search(e.response_error_payload["error"]) + if e.status == 400 and match: + unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")] + _set_unsupported_text_generation_kwargs(model, unused_params) + return await self.text_generation( # type: ignore + prompt=prompt, + details=details, + stream=stream, + model=model_id, + adapter_id=adapter_id, + best_of=best_of, + decoder_input_details=decoder_input_details, + do_sample=do_sample, + frequency_penalty=frequency_penalty, + grammar=grammar, + max_new_tokens=max_new_tokens, + repetition_penalty=repetition_penalty, + return_full_text=return_full_text, + seed=seed, + stop=stop, + temperature=temperature, + top_k=top_k, + top_n_tokens=top_n_tokens, + top_p=top_p, + truncate=truncate, + typical_p=typical_p, + watermark=watermark, + ) + raise_text_generation_error(e) + + # Parse output + if stream: + return _async_stream_text_generation_response(bytes_output, details) # type: ignore + + data = _bytes_to_dict(bytes_output) # type: ignore[arg-type] + + # Data can be a single element (dict) or an iterable of dicts where we select the first element of. + if isinstance(data, list): + data = data[0] + response = provider_helper.get_response(data, request_parameters) + return TextGenerationOutput.parse_obj_as_instance(response) if details else response["generated_text"] + + async def text_to_image( + self, + prompt: str, + *, + negative_prompt: Optional[str] = None, + height: Optional[int] = None, + width: Optional[int] = None, + num_inference_steps: Optional[int] = None, + guidance_scale: Optional[float] = None, + model: Optional[str] = None, + scheduler: Optional[str] = None, + seed: Optional[int] = None, + extra_body: Optional[Dict[str, Any]] = None, + ) -> "Image": + """ + Generate an image based on a given text using a specified model. + + > [!WARNING] + > You must have `PIL` installed if you want to work with images (`pip install Pillow`). + + > [!TIP] + > You can pass provider-specific parameters to the model by using the `extra_body` argument. + + Args: + prompt (`str`): + The prompt to generate an image from. + negative_prompt (`str`, *optional*): + One prompt to guide what NOT to include in image generation. + height (`int`, *optional*): + The height in pixels of the output image + width (`int`, *optional*): + The width in pixels of the output image + num_inference_steps (`int`, *optional*): + The number of denoising steps. More denoising steps usually lead to a higher quality image at the + expense of slower inference. + guidance_scale (`float`, *optional*): + A higher guidance scale value encourages the model to generate images closely linked to the text + prompt, but values too high may cause saturation and other artifacts. + model (`str`, *optional*): + The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed + Inference Endpoint. If not provided, the default recommended text-to-image model will be used. + Defaults to None. + scheduler (`str`, *optional*): + Override the scheduler with a compatible one. + seed (`int`, *optional*): + Seed for the random number generator. + extra_body (`Dict[str, Any]`, *optional*): + Additional provider-specific parameters to pass to the model. Refer to the provider's documentation + for supported parameters. + + Returns: + `Image`: The generated image. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + + >>> image = await client.text_to_image("An astronaut riding a horse on the moon.") + >>> image.save("astronaut.png") + + >>> image = await client.text_to_image( + ... "An astronaut riding a horse on the moon.", + ... negative_prompt="low resolution, blurry", + ... model="stabilityai/stable-diffusion-2-1", + ... ) + >>> image.save("better_astronaut.png") + ``` + Example using a third-party provider directly. Usage will be billed on your fal.ai account. + ```py + >>> from huggingface_hub import InferenceClient + >>> client = InferenceClient( + ... provider="fal-ai", # Use fal.ai provider + ... api_key="fal-ai-api-key", # Pass your fal.ai API key + ... ) + >>> image = client.text_to_image( + ... "A majestic lion in a fantasy forest", + ... model="black-forest-labs/FLUX.1-schnell", + ... ) + >>> image.save("lion.png") + ``` + + Example using a third-party provider through Hugging Face Routing. Usage will be billed on your Hugging Face account. + ```py + >>> from huggingface_hub import InferenceClient + >>> client = InferenceClient( + ... provider="replicate", # Use replicate provider + ... api_key="hf_...", # Pass your HF token + ... ) + >>> image = client.text_to_image( + ... "An astronaut riding a horse on the moon.", + ... model="black-forest-labs/FLUX.1-dev", + ... ) + >>> image.save("astronaut.png") + ``` + + Example using Replicate provider with extra parameters + ```py + >>> from huggingface_hub import InferenceClient + >>> client = InferenceClient( + ... provider="replicate", # Use replicate provider + ... api_key="hf_...", # Pass your HF token + ... ) + >>> image = client.text_to_image( + ... "An astronaut riding a horse on the moon.", + ... model="black-forest-labs/FLUX.1-schnell", + ... extra_body={"output_quality": 100}, + ... ) + >>> image.save("astronaut.png") + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="text-to-image", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=prompt, + parameters={ + "negative_prompt": negative_prompt, + "height": height, + "width": width, + "num_inference_steps": num_inference_steps, + "guidance_scale": guidance_scale, + "scheduler": scheduler, + "seed": seed, + **(extra_body or {}), + }, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + response = provider_helper.get_response(response) + return _bytes_to_image(response) + + async def text_to_video( + self, + prompt: str, + *, + model: Optional[str] = None, + guidance_scale: Optional[float] = None, + negative_prompt: Optional[List[str]] = None, + num_frames: Optional[float] = None, + num_inference_steps: Optional[int] = None, + seed: Optional[int] = None, + extra_body: Optional[Dict[str, Any]] = None, + ) -> bytes: + """ + Generate a video based on a given text. + + > [!TIP] + > You can pass provider-specific parameters to the model by using the `extra_body` argument. + + Args: + prompt (`str`): + The prompt to generate a video from. + model (`str`, *optional*): + The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed + Inference Endpoint. If not provided, the default recommended text-to-video model will be used. + Defaults to None. + guidance_scale (`float`, *optional*): + A higher guidance scale value encourages the model to generate videos closely linked to the text + prompt, but values too high may cause saturation and other artifacts. + negative_prompt (`List[str]`, *optional*): + One or several prompt to guide what NOT to include in video generation. + num_frames (`float`, *optional*): + The num_frames parameter determines how many video frames are generated. + num_inference_steps (`int`, *optional*): + The number of denoising steps. More denoising steps usually lead to a higher quality video at the + expense of slower inference. + seed (`int`, *optional*): + Seed for the random number generator. + extra_body (`Dict[str, Any]`, *optional*): + Additional provider-specific parameters to pass to the model. Refer to the provider's documentation + for supported parameters. + + Returns: + `bytes`: The generated video. + + Example: + + Example using a third-party provider directly. Usage will be billed on your fal.ai account. + ```py + >>> from huggingface_hub import InferenceClient + >>> client = InferenceClient( + ... provider="fal-ai", # Using fal.ai provider + ... api_key="fal-ai-api-key", # Pass your fal.ai API key + ... ) + >>> video = client.text_to_video( + ... "A majestic lion running in a fantasy forest", + ... model="tencent/HunyuanVideo", + ... ) + >>> with open("lion.mp4", "wb") as file: + ... file.write(video) + ``` + + Example using a third-party provider through Hugging Face Routing. Usage will be billed on your Hugging Face account. + ```py + >>> from huggingface_hub import InferenceClient + >>> client = InferenceClient( + ... provider="replicate", # Using replicate provider + ... api_key="hf_...", # Pass your HF token + ... ) + >>> video = client.text_to_video( + ... "A cat running in a park", + ... model="genmo/mochi-1-preview", + ... ) + >>> with open("cat.mp4", "wb") as file: + ... file.write(video) + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="text-to-video", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=prompt, + parameters={ + "guidance_scale": guidance_scale, + "negative_prompt": negative_prompt, + "num_frames": num_frames, + "num_inference_steps": num_inference_steps, + "seed": seed, + **(extra_body or {}), + }, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + response = provider_helper.get_response(response, request_parameters) + return response + + async def text_to_speech( + self, + text: str, + *, + model: Optional[str] = None, + do_sample: Optional[bool] = None, + early_stopping: Optional[Union[bool, "TextToSpeechEarlyStoppingEnum"]] = None, + epsilon_cutoff: Optional[float] = None, + eta_cutoff: Optional[float] = None, + max_length: Optional[int] = None, + max_new_tokens: Optional[int] = None, + min_length: Optional[int] = None, + min_new_tokens: Optional[int] = None, + num_beam_groups: Optional[int] = None, + num_beams: Optional[int] = None, + penalty_alpha: Optional[float] = None, + temperature: Optional[float] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + typical_p: Optional[float] = None, + use_cache: Optional[bool] = None, + extra_body: Optional[Dict[str, Any]] = None, + ) -> bytes: + """ + Synthesize an audio of a voice pronouncing a given text. + + > [!TIP] + > You can pass provider-specific parameters to the model by using the `extra_body` argument. + + Args: + text (`str`): + The text to synthesize. + model (`str`, *optional*): + The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed + Inference Endpoint. If not provided, the default recommended text-to-speech model will be used. + Defaults to None. + do_sample (`bool`, *optional*): + Whether to use sampling instead of greedy decoding when generating new tokens. + early_stopping (`Union[bool, "TextToSpeechEarlyStoppingEnum"]`, *optional*): + Controls the stopping condition for beam-based methods. + epsilon_cutoff (`float`, *optional*): + If set to float strictly between 0 and 1, only tokens with a conditional probability greater than + epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on + the size of the model. See [Truncation Sampling as Language Model + Desmoothing](https://hf.co/papers/2210.15191) for more details. + eta_cutoff (`float`, *optional*): + Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly + between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff) + * exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token + probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3, + depending on the size of the model. See [Truncation Sampling as Language Model + Desmoothing](https://hf.co/papers/2210.15191) for more details. + max_length (`int`, *optional*): + The maximum length (in tokens) of the generated text, including the input. + max_new_tokens (`int`, *optional*): + The maximum number of tokens to generate. Takes precedence over max_length. + min_length (`int`, *optional*): + The minimum length (in tokens) of the generated text, including the input. + min_new_tokens (`int`, *optional*): + The minimum number of tokens to generate. Takes precedence over min_length. + num_beam_groups (`int`, *optional*): + Number of groups to divide num_beams into in order to ensure diversity among different groups of beams. + See [this paper](https://hf.co/papers/1610.02424) for more details. + num_beams (`int`, *optional*): + Number of beams to use for beam search. + penalty_alpha (`float`, *optional*): + The value balances the model confidence and the degeneration penalty in contrastive search decoding. + temperature (`float`, *optional*): + The value used to modulate the next token probabilities. + top_k (`int`, *optional*): + The number of highest probability vocabulary tokens to keep for top-k-filtering. + top_p (`float`, *optional*): + If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to + top_p or higher are kept for generation. + typical_p (`float`, *optional*): + Local typicality measures how similar the conditional probability of predicting a target token next is + to the expected conditional probability of predicting a random token next, given the partial text + already generated. If set to float < 1, the smallest set of the most locally typical tokens with + probabilities that add up to typical_p or higher are kept for generation. See [this + paper](https://hf.co/papers/2202.00666) for more details. + use_cache (`bool`, *optional*): + Whether the model should use the past last key/values attentions to speed up decoding + extra_body (`Dict[str, Any]`, *optional*): + Additional provider-specific parameters to pass to the model. Refer to the provider's documentation + for supported parameters. + Returns: + `bytes`: The generated audio. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from pathlib import Path + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + + >>> audio = await client.text_to_speech("Hello world") + >>> Path("hello_world.flac").write_bytes(audio) + ``` + + Example using a third-party provider directly. Usage will be billed on your Replicate account. + ```py + >>> from huggingface_hub import InferenceClient + >>> client = InferenceClient( + ... provider="replicate", + ... api_key="your-replicate-api-key", # Pass your Replicate API key directly + ... ) + >>> audio = client.text_to_speech( + ... text="Hello world", + ... model="OuteAI/OuteTTS-0.3-500M", + ... ) + >>> Path("hello_world.flac").write_bytes(audio) + ``` + + Example using a third-party provider through Hugging Face Routing. Usage will be billed on your Hugging Face account. + ```py + >>> from huggingface_hub import InferenceClient + >>> client = InferenceClient( + ... provider="replicate", + ... api_key="hf_...", # Pass your HF token + ... ) + >>> audio =client.text_to_speech( + ... text="Hello world", + ... model="OuteAI/OuteTTS-0.3-500M", + ... ) + >>> Path("hello_world.flac").write_bytes(audio) + ``` + Example using Replicate provider with extra parameters + ```py + >>> from huggingface_hub import InferenceClient + >>> client = InferenceClient( + ... provider="replicate", # Use replicate provider + ... api_key="hf_...", # Pass your HF token + ... ) + >>> audio = client.text_to_speech( + ... "Hello, my name is Kororo, an awesome text-to-speech model.", + ... model="hexgrad/Kokoro-82M", + ... extra_body={"voice": "af_nicole"}, + ... ) + >>> Path("hello.flac").write_bytes(audio) + ``` + + Example music-gen using "YuE-s1-7B-anneal-en-cot" on fal.ai + ```py + >>> from huggingface_hub import InferenceClient + >>> lyrics = ''' + ... [verse] + ... In the town where I was born + ... Lived a man who sailed to sea + ... And he told us of his life + ... In the land of submarines + ... So we sailed on to the sun + ... 'Til we found a sea of green + ... And we lived beneath the waves + ... In our yellow submarine + + ... [chorus] + ... We all live in a yellow submarine + ... Yellow submarine, yellow submarine + ... We all live in a yellow submarine + ... Yellow submarine, yellow submarine + ... ''' + >>> genres = "pavarotti-style tenor voice" + >>> client = InferenceClient( + ... provider="fal-ai", + ... model="m-a-p/YuE-s1-7B-anneal-en-cot", + ... api_key=..., + ... ) + >>> audio = client.text_to_speech(lyrics, extra_body={"genres": genres}) + >>> with open("output.mp3", "wb") as f: + ... f.write(audio) + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="text-to-speech", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=text, + parameters={ + "do_sample": do_sample, + "early_stopping": early_stopping, + "epsilon_cutoff": epsilon_cutoff, + "eta_cutoff": eta_cutoff, + "max_length": max_length, + "max_new_tokens": max_new_tokens, + "min_length": min_length, + "min_new_tokens": min_new_tokens, + "num_beam_groups": num_beam_groups, + "num_beams": num_beams, + "penalty_alpha": penalty_alpha, + "temperature": temperature, + "top_k": top_k, + "top_p": top_p, + "typical_p": typical_p, + "use_cache": use_cache, + **(extra_body or {}), + }, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + response = provider_helper.get_response(response) + return response + + async def token_classification( + self, + text: str, + *, + model: Optional[str] = None, + aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None, + ignore_labels: Optional[List[str]] = None, + stride: Optional[int] = None, + ) -> List[TokenClassificationOutputElement]: + """ + Perform token classification on the given text. + Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. + + Args: + text (`str`): + A string to be classified. + model (`str`, *optional*): + The model to use for the token classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to + a deployed Inference Endpoint. If not provided, the default recommended token classification model will be used. + Defaults to None. + aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*): + The strategy used to fuse tokens based on model predictions + ignore_labels (`List[str`, *optional*): + A list of labels to ignore + stride (`int`, *optional*): + The number of overlapping tokens between chunks when splitting the input text. + + Returns: + `List[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> await client.token_classification("My name is Sarah Jessica Parker but you can call me Jessica") + [ + TokenClassificationOutputElement( + entity_group='PER', + score=0.9971321225166321, + word='Sarah Jessica Parker', + start=11, + end=31, + ), + TokenClassificationOutputElement( + entity_group='PER', + score=0.9773476123809814, + word='Jessica', + start=52, + end=59, + ) + ] + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="token-classification", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=text, + parameters={ + "aggregation_strategy": aggregation_strategy, + "ignore_labels": ignore_labels, + "stride": stride, + }, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + return TokenClassificationOutputElement.parse_obj_as_list(response) + + async def translation( + self, + text: str, + *, + model: Optional[str] = None, + src_lang: Optional[str] = None, + tgt_lang: Optional[str] = None, + clean_up_tokenization_spaces: Optional[bool] = None, + truncation: Optional["TranslationTruncationStrategy"] = None, + generate_parameters: Optional[Dict[str, Any]] = None, + ) -> TranslationOutput: + """ + Convert text from one language to another. + + Check out https://huggingface.co/tasks/translation for more information on how to choose the best model for + your specific use case. Source and target languages usually depend on the model. + However, it is possible to specify source and target languages for certain models. If you are working with one of these models, + you can use `src_lang` and `tgt_lang` arguments to pass the relevant information. + + Args: + text (`str`): + A string to be translated. + model (`str`, *optional*): + The model to use for the translation task. Can be a model ID hosted on the Hugging Face Hub or a URL to + a deployed Inference Endpoint. If not provided, the default recommended translation model will be used. + Defaults to None. + src_lang (`str`, *optional*): + The source language of the text. Required for models that can translate from multiple languages. + tgt_lang (`str`, *optional*): + Target language to translate to. Required for models that can translate to multiple languages. + clean_up_tokenization_spaces (`bool`, *optional*): + Whether to clean up the potential extra spaces in the text output. + truncation (`"TranslationTruncationStrategy"`, *optional*): + The truncation strategy to use. + generate_parameters (`Dict[str, Any]`, *optional*): + Additional parametrization of the text generation algorithm. + + Returns: + [`TranslationOutput`]: The generated translated text. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + `ValueError`: + If only one of the `src_lang` and `tgt_lang` arguments are provided. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> await client.translation("My name is Wolfgang and I live in Berlin") + 'Mein Name ist Wolfgang und ich lebe in Berlin.' + >>> await client.translation("My name is Wolfgang and I live in Berlin", model="Helsinki-NLP/opus-mt-en-fr") + TranslationOutput(translation_text='Je m'appelle Wolfgang et je vis à Berlin.') + ``` + + Specifying languages: + ```py + >>> client.translation("My name is Sarah Jessica Parker but you can call me Jessica", model="facebook/mbart-large-50-many-to-many-mmt", src_lang="en_XX", tgt_lang="fr_XX") + "Mon nom est Sarah Jessica Parker mais vous pouvez m'appeler Jessica" + ``` + """ + # Throw error if only one of `src_lang` and `tgt_lang` was given + if src_lang is not None and tgt_lang is None: + raise ValueError("You cannot specify `src_lang` without specifying `tgt_lang`.") + + if src_lang is None and tgt_lang is not None: + raise ValueError("You cannot specify `tgt_lang` without specifying `src_lang`.") + + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="translation", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=text, + parameters={ + "src_lang": src_lang, + "tgt_lang": tgt_lang, + "clean_up_tokenization_spaces": clean_up_tokenization_spaces, + "truncation": truncation, + "generate_parameters": generate_parameters, + }, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + return TranslationOutput.parse_obj_as_list(response)[0] + + async def visual_question_answering( + self, + image: ContentT, + question: str, + *, + model: Optional[str] = None, + top_k: Optional[int] = None, + ) -> List[VisualQuestionAnsweringOutputElement]: + """ + Answering open-ended questions based on an image. + + Args: + image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`): + The input image for the context. It can be raw bytes, an image file, a URL to an online image, or a PIL Image. + question (`str`): + Question to be answered. + model (`str`, *optional*): + The model to use for the visual question answering task. Can be a model ID hosted on the Hugging Face Hub or a URL to + a deployed Inference Endpoint. If not provided, the default recommended visual question answering model will be used. + Defaults to None. + top_k (`int`, *optional*): + The number of answers to return (will be chosen by order of likelihood). Note that we return less than + topk answers if there are not enough options available within the context. + Returns: + `List[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability. + + Raises: + `InferenceTimeoutError`: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> await client.visual_question_answering( + ... image="https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg", + ... question="What is the animal doing?" + ... ) + [ + VisualQuestionAnsweringOutputElement(score=0.778609573841095, answer='laying down'), + VisualQuestionAnsweringOutputElement(score=0.6957435607910156, answer='sitting'), + ] + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="visual-question-answering", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=image, + parameters={"top_k": top_k}, + headers=self.headers, + model=model_id, + api_key=self.token, + extra_payload={"question": question, "image": _b64_encode(image)}, + ) + response = await self._inner_post(request_parameters) + return VisualQuestionAnsweringOutputElement.parse_obj_as_list(response) + + async def zero_shot_classification( + self, + text: str, + candidate_labels: List[str], + *, + multi_label: Optional[bool] = False, + hypothesis_template: Optional[str] = None, + model: Optional[str] = None, + ) -> List[ZeroShotClassificationOutputElement]: + """ + Provide as input a text and a set of candidate labels to classify the input text. + + Args: + text (`str`): + The input text to classify. + candidate_labels (`List[str]`): + The set of possible class labels to classify the text into. + labels (`List[str]`, *optional*): + (deprecated) List of strings. Each string is the verbalization of a possible label for the input text. + multi_label (`bool`, *optional*): + Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of + the label likelihoods for each sequence is 1. If true, the labels are considered independent and + probabilities are normalized for each candidate. + hypothesis_template (`str`, *optional*): + The sentence used in conjunction with `candidate_labels` to attempt the text classification by + replacing the placeholder with the candidate labels. + model (`str`, *optional*): + The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed + Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot classification model will be used. + + + Returns: + `List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example with `multi_label=False`: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> text = ( + ... "A new model offers an explanation for how the Galilean satellites formed around the solar system's" + ... "largest world. Konstantin Batygin did not set out to solve one of the solar system's most puzzling" + ... " mysteries when he went for a run up a hill in Nice, France." + ... ) + >>> labels = ["space & cosmos", "scientific discovery", "microbiology", "robots", "archeology"] + >>> await client.zero_shot_classification(text, labels) + [ + ZeroShotClassificationOutputElement(label='scientific discovery', score=0.7961668968200684), + ZeroShotClassificationOutputElement(label='space & cosmos', score=0.18570658564567566), + ZeroShotClassificationOutputElement(label='microbiology', score=0.00730885099619627), + ZeroShotClassificationOutputElement(label='archeology', score=0.006258360575884581), + ZeroShotClassificationOutputElement(label='robots', score=0.004559356719255447), + ] + >>> await client.zero_shot_classification(text, labels, multi_label=True) + [ + ZeroShotClassificationOutputElement(label='scientific discovery', score=0.9829297661781311), + ZeroShotClassificationOutputElement(label='space & cosmos', score=0.755190908908844), + ZeroShotClassificationOutputElement(label='microbiology', score=0.0005462635890580714), + ZeroShotClassificationOutputElement(label='archeology', score=0.00047131875180639327), + ZeroShotClassificationOutputElement(label='robots', score=0.00030448526376858354), + ] + ``` + + Example with `multi_label=True` and a custom `hypothesis_template`: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + >>> await client.zero_shot_classification( + ... text="I really like our dinner and I'm very happy. I don't like the weather though.", + ... labels=["positive", "negative", "pessimistic", "optimistic"], + ... multi_label=True, + ... hypothesis_template="This text is {} towards the weather" + ... ) + [ + ZeroShotClassificationOutputElement(label='negative', score=0.9231801629066467), + ZeroShotClassificationOutputElement(label='pessimistic', score=0.8760990500450134), + ZeroShotClassificationOutputElement(label='optimistic', score=0.0008674879791215062), + ZeroShotClassificationOutputElement(label='positive', score=0.0005250611575320363) + ] + ``` + """ + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="zero-shot-classification", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=text, + parameters={ + "candidate_labels": candidate_labels, + "multi_label": multi_label, + "hypothesis_template": hypothesis_template, + }, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + output = _bytes_to_dict(response) + return [ + ZeroShotClassificationOutputElement.parse_obj_as_instance({"label": label, "score": score}) + for label, score in zip(output["labels"], output["scores"]) + ] + + async def zero_shot_image_classification( + self, + image: ContentT, + candidate_labels: List[str], + *, + model: Optional[str] = None, + hypothesis_template: Optional[str] = None, + # deprecated argument + labels: List[str] = None, # type: ignore + ) -> List[ZeroShotImageClassificationOutputElement]: + """ + Provide input image and text labels to predict text labels for the image. + + Args: + image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`): + The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image. + candidate_labels (`List[str]`): + The candidate labels for this image + labels (`List[str]`, *optional*): + (deprecated) List of string possible labels. There must be at least 2 labels. + model (`str`, *optional*): + The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed + Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot image classification model will be used. + hypothesis_template (`str`, *optional*): + The sentence used in conjunction with `candidate_labels` to attempt the image classification by + replacing the placeholder with the candidate labels. + + Returns: + `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence. + + Raises: + [`InferenceTimeoutError`]: + If the model is unavailable or the request times out. + `aiohttp.ClientResponseError`: + If the request fails with an HTTP error status code other than HTTP 503. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient() + + >>> await client.zero_shot_image_classification( + ... "https://upload.wikimedia.org/wikipedia/commons/thumb/4/43/Cute_dog.jpg/320px-Cute_dog.jpg", + ... labels=["dog", "cat", "horse"], + ... ) + [ZeroShotImageClassificationOutputElement(label='dog', score=0.956),...] + ``` + """ + # Raise ValueError if input is less than 2 labels + if len(candidate_labels) < 2: + raise ValueError("You must specify at least 2 classes to compare.") + + model_id = model or self.model + provider_helper = get_provider_helper(self.provider, task="zero-shot-image-classification", model=model_id) + request_parameters = provider_helper.prepare_request( + inputs=image, + parameters={ + "candidate_labels": candidate_labels, + "hypothesis_template": hypothesis_template, + }, + headers=self.headers, + model=model_id, + api_key=self.token, + ) + response = await self._inner_post(request_parameters) + return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response) + + def _get_client_session(self, headers: Optional[Dict] = None) -> "ClientSession": + aiohttp = _import_aiohttp() + client_headers = self.headers.copy() + if headers is not None: + client_headers.update(headers) + + # Return a new aiohttp ClientSession with correct settings. + session = aiohttp.ClientSession( + headers=client_headers, + cookies=self.cookies, + timeout=aiohttp.ClientTimeout(self.timeout), + trust_env=self.trust_env, + ) + + # Keep track of sessions to close them later + self._sessions[session] = set() + + # Override the `._request` method to register responses to be closed + session._wrapped_request = session._request + + async def _request(method, url, **kwargs): + response = await session._wrapped_request(method, url, **kwargs) + self._sessions[session].add(response) + return response + + session._request = _request + + # Override the 'close' method to + # 1. close ongoing responses + # 2. deregister the session when closed + session._close = session.close + + async def close_session(): + for response in self._sessions[session]: + response.close() + await session._close() + self._sessions.pop(session, None) + + session.close = close_session + return session + + async def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]: + """ + Get information about the deployed endpoint. + + This endpoint is only available on endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI). + Endpoints powered by `transformers` return an empty payload. + + Args: + model (`str`, *optional*): + The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed + Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None. + + Returns: + `Dict[str, Any]`: Information about the endpoint. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient("meta-llama/Meta-Llama-3-70B-Instruct") + >>> await client.get_endpoint_info() + { + 'model_id': 'meta-llama/Meta-Llama-3-70B-Instruct', + 'model_sha': None, + 'model_dtype': 'torch.float16', + 'model_device_type': 'cuda', + 'model_pipeline_tag': None, + 'max_concurrent_requests': 128, + 'max_best_of': 2, + 'max_stop_sequences': 4, + 'max_input_length': 8191, + 'max_total_tokens': 8192, + 'waiting_served_ratio': 0.3, + 'max_batch_total_tokens': 1259392, + 'max_waiting_tokens': 20, + 'max_batch_size': None, + 'validation_workers': 32, + 'max_client_batch_size': 4, + 'version': '2.0.2', + 'sha': 'dccab72549635c7eb5ddb17f43f0b7cdff07c214', + 'docker_label': 'sha-dccab72' + } + ``` + """ + if self.provider != "hf-inference": + raise ValueError(f"Getting endpoint info is not supported on '{self.provider}'.") + + model = model or self.model + if model is None: + raise ValueError("Model id not provided.") + if model.startswith(("http://", "https://")): + url = model.rstrip("/") + "/info" + else: + url = f"{constants.INFERENCE_ENDPOINT}/models/{model}/info" + + async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client: + response = await client.get(url, proxy=self.proxies) + response.raise_for_status() + return await response.json() + + async def health_check(self, model: Optional[str] = None) -> bool: + """ + Check the health of the deployed endpoint. + + Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI). + + Args: + model (`str`, *optional*): + URL of the Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None. + + Returns: + `bool`: True if everything is working fine. + + Example: + ```py + # Must be run in an async context + >>> from huggingface_hub import AsyncInferenceClient + >>> client = AsyncInferenceClient("https://jzgu0buei5.us-east-1.aws.endpoints.huggingface.cloud") + >>> await client.health_check() + True + ``` + """ + if self.provider != "hf-inference": + raise ValueError(f"Health check is not supported on '{self.provider}'.") + + model = model or self.model + if model is None: + raise ValueError("Model id not provided.") + if not model.startswith(("http://", "https://")): + raise ValueError("Model must be an Inference Endpoint URL.") + url = model.rstrip("/") + "/health" + + async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client: + response = await client.get(url, proxy=self.proxies) + return response.status == 200 + + @property + def chat(self) -> "ProxyClientChat": + return ProxyClientChat(self) + + +class _ProxyClient: + """Proxy class to be able to call `client.chat.completion.create(...)` as OpenAI client.""" + + def __init__(self, client: AsyncInferenceClient): + self._client = client + + +class ProxyClientChat(_ProxyClient): + """Proxy class to be able to call `client.chat.completion.create(...)` as OpenAI client.""" + + @property + def completions(self) -> "ProxyClientChatCompletions": + return ProxyClientChatCompletions(self._client) + + +class ProxyClientChatCompletions(_ProxyClient): + """Proxy class to be able to call `client.chat.completion.create(...)` as OpenAI client.""" + + @property + def create(self): + return self._client.chat_completion diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/audio_classification.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/audio_classification.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f1351a30e924c9664cc75300de3e7e6edc6c3ec6 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/audio_classification.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/base.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/base.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ee6e244f122769d79b6a107e0e6f54e8d344861c Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/base.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/chat_completion.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/chat_completion.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eede5f84cb91f2a5187fbeac6931b06dc336ba35 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/chat_completion.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/fill_mask.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/fill_mask.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f6e59bae79fc76582e130c168edae97db0b63b5a Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/fill_mask.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/image_segmentation.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/image_segmentation.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8768d3b24e68079ec83bd01ed245f706f7d472a4 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/image_segmentation.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/image_to_image.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/image_to_image.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6b4a44dff18f9b3bf50f195d9951412368ba8117 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/image_to_image.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/image_to_text.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/image_to_text.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b0c018614efab0b7b80ee35e37f94485925fa22 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/image_to_text.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/image_to_video.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/image_to_video.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..484be40c705ce15e6892eb5c0dc47040ebb8316e Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/image_to_video.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/question_answering.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/question_answering.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6914093cb61ac7b84c2e29b1801ab1c8f8e7a7a0 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/question_answering.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/sentence_similarity.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/sentence_similarity.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..864a15a5698651ec725046ed37e63e93ee859875 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/sentence_similarity.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/text2text_generation.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/text2text_generation.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d76d5a5ba87fa40b9c8e69371803bf02ba7d470f Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/text2text_generation.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/text_generation.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/text_generation.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a0ea4177822a2f7f9a2489dbefc699442fabb9db Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/text_generation.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/text_to_image.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/text_to_image.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9c89bf5df4af5e1dba2ce3f44c951ca6df3a8fb6 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/text_to_image.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/text_to_speech.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/text_to_speech.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cb59f81e4d6e32e850770f760ea35efb0a5423b6 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/text_to_speech.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/text_to_video.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/text_to_video.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ed4218d6b8b0fba1ed6b58ab18cb39fedbad8394 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/text_to_video.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/token_classification.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/token_classification.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4f13f3249af937cd88ba3502056d18fa0989062d Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/token_classification.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/translation.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/translation.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..23c067a8e9547ec34952efd08ff79f982256676e Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/translation.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/video_classification.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/video_classification.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c76599deac4bfeff896008cd5381cea4908f533b Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/video_classification.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/zero_shot_image_classification.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/zero_shot_image_classification.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cbd13197c69afa50710fd63c31a8b094cdb244c6 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/zero_shot_image_classification.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/zero_shot_object_detection.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/zero_shot_object_detection.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3806eda431d4882acbed0af31af0c0475ef2e371 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__pycache__/zero_shot_object_detection.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/_cli_hacks.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/_cli_hacks.py new file mode 100644 index 0000000000000000000000000000000000000000..64251bbb745dc3b4b561f0eb249be65108b20d82 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/_cli_hacks.py @@ -0,0 +1,88 @@ +import asyncio +import sys +from functools import partial + +import typer + + +def _patch_anyio_open_process(): + """ + Patch anyio.open_process to allow detached processes on Windows and Unix-like systems. + + This is necessary to prevent the MCP client from being interrupted by Ctrl+C when running in the CLI. + """ + import subprocess + + import anyio + + if getattr(anyio, "_tiny_agents_patched", False): + return + anyio._tiny_agents_patched = True # ty: ignore[invalid-assignment] + + original_open_process = anyio.open_process + + if sys.platform == "win32": + # On Windows, we need to set the creation flags to create a new process group + + async def open_process_in_new_group(*args, **kwargs): + """ + Wrapper for open_process to handle Windows-specific process creation flags. + """ + # Ensure we pass the creation flags for Windows + kwargs.setdefault("creationflags", subprocess.CREATE_NEW_PROCESS_GROUP) + return await original_open_process(*args, **kwargs) + + anyio.open_process = open_process_in_new_group # ty: ignore[invalid-assignment] + else: + # For Unix-like systems, we can use setsid to create a new session + async def open_process_in_new_group(*args, **kwargs): + """ + Wrapper for open_process to handle Unix-like systems with start_new_session=True. + """ + kwargs.setdefault("start_new_session", True) + return await original_open_process(*args, **kwargs) + + anyio.open_process = open_process_in_new_group # ty: ignore[invalid-assignment] + + +async def _async_prompt(exit_event: asyncio.Event, prompt: str = "» ") -> str: + """ + Asynchronous prompt function that reads input from stdin without blocking. + + This function is designed to work in an asynchronous context, allowing the event loop to gracefully stop it (e.g. on Ctrl+C). + + Alternatively, we could use https://github.com/vxgmichel/aioconsole but that would be an additional dependency. + """ + loop = asyncio.get_event_loop() + + if sys.platform == "win32": + # Windows: Use run_in_executor to avoid blocking the event loop + # Degraded solution: this is not ideal as user will have to CTRL+C once more to stop the prompt (and it'll not be graceful) + return await loop.run_in_executor(None, partial(typer.prompt, prompt, prompt_suffix=" ")) + else: + # UNIX-like: Use loop.add_reader for non-blocking stdin read + future = loop.create_future() + + def on_input(): + line = sys.stdin.readline() + loop.remove_reader(sys.stdin) + future.set_result(line) + + print(prompt, end=" ", flush=True) + loop.add_reader(sys.stdin, on_input) # not supported on Windows + + # Wait for user input or exit event + # Wait until either the user hits enter or exit_event is set + exit_task = asyncio.create_task(exit_event.wait()) + await asyncio.wait( + [future, exit_task], + return_when=asyncio.FIRST_COMPLETED, + ) + + # Check which one has been triggered + if exit_event.is_set(): + future.cancel() + return "" + + line = await future + return line.strip() diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/agent.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/agent.py new file mode 100644 index 0000000000000000000000000000000000000000..b9eb347ed60a7178caecc8d54d4b6b2593d80884 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/agent.py @@ -0,0 +1,100 @@ +from __future__ import annotations + +import asyncio +from typing import AsyncGenerator, Dict, Iterable, List, Optional, Union + +from huggingface_hub import ChatCompletionInputMessage, ChatCompletionStreamOutput, MCPClient + +from .._providers import PROVIDER_OR_POLICY_T +from .constants import DEFAULT_SYSTEM_PROMPT, EXIT_LOOP_TOOLS, MAX_NUM_TURNS +from .types import ServerConfig + + +class Agent(MCPClient): + """ + Implementation of a Simple Agent, which is a simple while loop built right on top of an [`MCPClient`]. + + > [!WARNING] + > This class is experimental and might be subject to breaking changes in the future without prior notice. + + Args: + model (`str`, *optional*): + The model to run inference with. Can be a model id hosted on the Hugging Face Hub, e.g. `meta-llama/Meta-Llama-3-8B-Instruct` + or a URL to a deployed Inference Endpoint or other local or remote endpoint. + servers (`Iterable[Dict]`): + MCP servers to connect to. Each server is a dictionary containing a `type` key and a `config` key. The `type` key can be `"stdio"` or `"sse"`, and the `config` key is a dictionary of arguments for the server. + provider (`str`, *optional*): + Name of the provider to use for inference. Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers. + If model is a URL or `base_url` is passed, then `provider` is not used. + base_url (`str`, *optional*): + The base URL to run inference. Defaults to None. + api_key (`str`, *optional*): + Token to use for authentication. Will default to the locally Hugging Face saved token if not provided. You can also use your own provider API key to interact directly with the provider's service. + prompt (`str`, *optional*): + The system prompt to use for the agent. Defaults to the default system prompt in `constants.py`. + """ + + def __init__( + self, + *, + model: Optional[str] = None, + servers: Iterable[ServerConfig], + provider: Optional[PROVIDER_OR_POLICY_T] = None, + base_url: Optional[str] = None, + api_key: Optional[str] = None, + prompt: Optional[str] = None, + ): + super().__init__(model=model, provider=provider, base_url=base_url, api_key=api_key) + self._servers_cfg = list(servers) + self.messages: List[Union[Dict, ChatCompletionInputMessage]] = [ + {"role": "system", "content": prompt or DEFAULT_SYSTEM_PROMPT} + ] + + async def load_tools(self) -> None: + for cfg in self._servers_cfg: + await self.add_mcp_server(**cfg) + + async def run( + self, + user_input: str, + *, + abort_event: Optional[asyncio.Event] = None, + ) -> AsyncGenerator[Union[ChatCompletionStreamOutput, ChatCompletionInputMessage], None]: + """ + Run the agent with the given user input. + + Args: + user_input (`str`): + The user input to run the agent with. + abort_event (`asyncio.Event`, *optional*): + An event that can be used to abort the agent. If the event is set, the agent will stop running. + """ + self.messages.append({"role": "user", "content": user_input}) + + num_turns: int = 0 + next_turn_should_call_tools = True + + while True: + if abort_event and abort_event.is_set(): + return + + async for item in self.process_single_turn_with_tools( + self.messages, + exit_loop_tools=EXIT_LOOP_TOOLS, + exit_if_first_chunk_no_tool=(num_turns > 0 and next_turn_should_call_tools), + ): + yield item + + num_turns += 1 + last = self.messages[-1] + + if last.get("role") == "tool" and last.get("name") in {t.function.name for t in EXIT_LOOP_TOOLS}: + return + + if last.get("role") != "tool" and num_turns > MAX_NUM_TURNS: + return + + if last.get("role") != "tool" and next_turn_should_call_tools: + return + + next_turn_should_call_tools = last.get("role") != "tool" diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/cli.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/cli.py new file mode 100644 index 0000000000000000000000000000000000000000..a8aaea687a2b372e5379f09dffc219e5ea5b38b8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/cli.py @@ -0,0 +1,247 @@ +import asyncio +import os +import signal +import traceback +from typing import Optional + +import typer +from rich import print + +from ._cli_hacks import _async_prompt, _patch_anyio_open_process +from .agent import Agent +from .utils import _load_agent_config + + +app = typer.Typer( + rich_markup_mode="rich", + help="A squad of lightweight composable AI applications built on Hugging Face's Inference Client and MCP stack.", +) + +run_cli = typer.Typer( + name="run", + help="Run the Agent in the CLI", + invoke_without_command=True, +) +app.add_typer(run_cli, name="run") + + +async def run_agent( + agent_path: Optional[str], +) -> None: + """ + Tiny Agent loop. + + Args: + agent_path (`str`, *optional*): + Path to a local folder containing an `agent.json` and optionally a custom `PROMPT.md` or `AGENTS.md` file or a built-in agent stored in a Hugging Face dataset. + + """ + _patch_anyio_open_process() # Hacky way to prevent stdio connections to be stopped by Ctrl+C + + config, prompt = _load_agent_config(agent_path) + + inputs = config.get("inputs", []) + servers = config.get("servers", []) + + abort_event = asyncio.Event() + exit_event = asyncio.Event() + first_sigint = True + + loop = asyncio.get_running_loop() + original_sigint_handler = signal.getsignal(signal.SIGINT) + + def _sigint_handler() -> None: + nonlocal first_sigint + if first_sigint: + first_sigint = False + abort_event.set() + print("\n[red]Interrupted. Press Ctrl+C again to quit.[/red]", flush=True) + return + + print("\n[red]Exiting...[/red]", flush=True) + exit_event.set() + + try: + sigint_registered_in_loop = False + try: + loop.add_signal_handler(signal.SIGINT, _sigint_handler) + sigint_registered_in_loop = True + except (AttributeError, NotImplementedError): + # Windows (or any loop that doesn't support it) : fall back to sync + signal.signal(signal.SIGINT, lambda *_: _sigint_handler()) + + # Handle inputs (i.e. env variables injection) + resolved_inputs: dict[str, str] = {} + + if len(inputs) > 0: + print( + "[bold blue]Some initial inputs are required by the agent. " + "Please provide a value or leave empty to load from env.[/bold blue]" + ) + for input_item in inputs: + input_id = input_item["id"] + description = input_item["description"] + env_special_value = f"${{input:{input_id}}}" + + # Check if the input is used by any server or as an apiKey + input_usages = set() + for server in servers: + # Check stdio's "env" and http/sse's "headers" mappings + env_or_headers = server.get("env", {}) if server["type"] == "stdio" else server.get("headers", {}) + for key, value in env_or_headers.items(): + if env_special_value in value: + input_usages.add(key) + + raw_api_key = config.get("apiKey") + if isinstance(raw_api_key, str) and env_special_value in raw_api_key: + input_usages.add("apiKey") + + if not input_usages: + print( + f"[yellow]Input '{input_id}' defined in config but not used by any server or as an API key." + " Skipping.[/yellow]" + ) + continue + + # Prompt user for input + env_variable_key = input_id.replace("-", "_").upper() + print( + f"[blue] • {input_id}[/blue]: {description}. (default: load from {env_variable_key}).", + end=" ", + ) + user_input = (await _async_prompt(exit_event=exit_event)).strip() + if exit_event.is_set(): + return + + # Fallback to environment variable when user left blank + final_value = user_input + if not final_value: + final_value = os.getenv(env_variable_key, "") + if final_value: + print(f"[green]Value successfully loaded from '{env_variable_key}'[/green]") + else: + print( + f"[yellow]No value found for '{env_variable_key}' in environment variables. Continuing.[/yellow]" + ) + resolved_inputs[input_id] = final_value + + # Inject resolved value (can be empty) into stdio's env or http/sse's headers + for server in servers: + env_or_headers = server.get("env", {}) if server["type"] == "stdio" else server.get("headers", {}) + for key, value in env_or_headers.items(): + if env_special_value in value: + env_or_headers[key] = env_or_headers[key].replace(env_special_value, final_value) + + print() + + raw_api_key = config.get("apiKey") + if isinstance(raw_api_key, str): + substituted_api_key = raw_api_key + for input_id, val in resolved_inputs.items(): + substituted_api_key = substituted_api_key.replace(f"${{input:{input_id}}}", val) + config["apiKey"] = substituted_api_key + # Main agent loop + async with Agent( + provider=config.get("provider"), # type: ignore[arg-type] + model=config.get("model"), + base_url=config.get("endpointUrl"), # type: ignore[arg-type] + api_key=config.get("apiKey"), + servers=servers, # type: ignore[arg-type] + prompt=prompt, + ) as agent: + await agent.load_tools() + print(f"[bold blue]Agent loaded with {len(agent.available_tools)} tools:[/bold blue]") + for t in agent.available_tools: + print(f"[blue] • {t.function.name}[/blue]") + + while True: + abort_event.clear() + + # Check if we should exit + if exit_event.is_set(): + return + + try: + user_input = await _async_prompt(exit_event=exit_event) + first_sigint = True + except EOFError: + print("\n[red]EOF received, exiting.[/red]", flush=True) + break + except KeyboardInterrupt: + if not first_sigint and abort_event.is_set(): + continue + else: + print("\n[red]Keyboard interrupt during input processing.[/red]", flush=True) + break + + try: + async for chunk in agent.run(user_input, abort_event=abort_event): + if abort_event.is_set() and not first_sigint: + break + if exit_event.is_set(): + return + + if hasattr(chunk, "choices"): + delta = chunk.choices[0].delta + if delta.content: + print(delta.content, end="", flush=True) + if delta.tool_calls: + for call in delta.tool_calls: + if call.id: + print(f"", end="") + if call.function.name: + print(f"{call.function.name}", end=" ") + if call.function.arguments: + print(f"{call.function.arguments}", end="") + else: + print( + f"\n\n[green]Tool[{chunk.name}] {chunk.tool_call_id}\n{chunk.content}[/green]\n", + flush=True, + ) + + print() + + except Exception as e: + tb_str = traceback.format_exc() + print(f"\n[bold red]Error during agent run: {e}\n{tb_str}[/bold red]", flush=True) + first_sigint = True # Allow graceful interrupt for the next command + + except Exception as e: + tb_str = traceback.format_exc() + print(f"\n[bold red]An unexpected error occurred: {e}\n{tb_str}[/bold red]", flush=True) + raise e + + finally: + if sigint_registered_in_loop: + try: + loop.remove_signal_handler(signal.SIGINT) + except (AttributeError, NotImplementedError): + pass + else: + signal.signal(signal.SIGINT, original_sigint_handler) + + +@run_cli.callback() +def run( + path: Optional[str] = typer.Argument( + None, + help=( + "Path to a local folder containing an agent.json file or a built-in agent " + "stored in the 'tiny-agents/tiny-agents' Hugging Face dataset " + "(https://huggingface.co/datasets/tiny-agents/tiny-agents)" + ), + show_default=False, + ), +): + try: + asyncio.run(run_agent(path)) + except KeyboardInterrupt: + print("\n[red]Application terminated by KeyboardInterrupt.[/red]", flush=True) + raise typer.Exit(code=130) + except Exception as e: + print(f"\n[bold red]An unexpected error occurred: {e}[/bold red]", flush=True) + raise e + + +if __name__ == "__main__": + app() diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/constants.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..1ccade43b151cc9650bfd8cb43d7e907c92447ef --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/constants.py @@ -0,0 +1,82 @@ +from __future__ import annotations + +import sys +from pathlib import Path +from typing import List + +from huggingface_hub import ChatCompletionInputTool + + +FILENAME_CONFIG = "agent.json" +PROMPT_FILENAMES = ("PROMPT.md", "AGENTS.md") + +DEFAULT_AGENT = { + "model": "Qwen/Qwen2.5-72B-Instruct", + "provider": "nebius", + "servers": [ + { + "type": "stdio", + "command": "npx", + "args": [ + "-y", + "@modelcontextprotocol/server-filesystem", + str(Path.home() / ("Desktop" if sys.platform == "darwin" else "")), + ], + }, + { + "type": "stdio", + "command": "npx", + "args": ["@playwright/mcp@latest"], + }, + ], +} + + +DEFAULT_SYSTEM_PROMPT = """ +You are an agent - please keep going until the user’s query is completely +resolved, before ending your turn and yielding back to the user. Only terminate +your turn when you are sure that the problem is solved, or if you need more +info from the user to solve the problem. +If you are not sure about anything pertaining to the user’s request, use your +tools to read files and gather the relevant information: do NOT guess or make +up an answer. +You MUST plan extensively before each function call, and reflect extensively +on the outcomes of the previous function calls. DO NOT do this entire process +by making function calls only, as this can impair your ability to solve the +problem and think insightfully. +""".strip() + +MAX_NUM_TURNS = 10 + +TASK_COMPLETE_TOOL: ChatCompletionInputTool = ChatCompletionInputTool.parse_obj( # type: ignore[assignment] + { + "type": "function", + "function": { + "name": "task_complete", + "description": "Call this tool when the task given by the user is complete", + "parameters": { + "type": "object", + "properties": {}, + }, + }, + } +) + +ASK_QUESTION_TOOL: ChatCompletionInputTool = ChatCompletionInputTool.parse_obj( # type: ignore[assignment] + { + "type": "function", + "function": { + "name": "ask_question", + "description": "Ask the user for more info required to solve or clarify their problem.", + "parameters": { + "type": "object", + "properties": {}, + }, + }, + } +) + +EXIT_LOOP_TOOLS: List[ChatCompletionInputTool] = [TASK_COMPLETE_TOOL, ASK_QUESTION_TOOL] + + +DEFAULT_REPO_ID = "tiny-agents/tiny-agents" diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/mcp_client.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/mcp_client.py new file mode 100644 index 0000000000000000000000000000000000000000..67d1fc5d15c898a4130f341e62e60d32c7663d28 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/mcp_client.py @@ -0,0 +1,384 @@ +import json +import logging +from contextlib import AsyncExitStack +from datetime import timedelta +from pathlib import Path +from typing import TYPE_CHECKING, Any, AsyncIterable, Dict, List, Literal, Optional, Union, overload + +from typing_extensions import NotRequired, TypeAlias, TypedDict, Unpack + +from ...utils._runtime import get_hf_hub_version +from .._generated._async_client import AsyncInferenceClient +from .._generated.types import ( + ChatCompletionInputMessage, + ChatCompletionInputTool, + ChatCompletionStreamOutput, + ChatCompletionStreamOutputDeltaToolCall, +) +from .._providers import PROVIDER_OR_POLICY_T +from .utils import format_result + + +if TYPE_CHECKING: + from mcp import ClientSession + +logger = logging.getLogger(__name__) + +# Type alias for tool names +ToolName: TypeAlias = str + +ServerType: TypeAlias = Literal["stdio", "sse", "http"] + + +class StdioServerParameters_T(TypedDict): + command: str + args: NotRequired[List[str]] + env: NotRequired[Dict[str, str]] + cwd: NotRequired[Union[str, Path, None]] + + +class SSEServerParameters_T(TypedDict): + url: str + headers: NotRequired[Dict[str, Any]] + timeout: NotRequired[float] + sse_read_timeout: NotRequired[float] + + +class StreamableHTTPParameters_T(TypedDict): + url: str + headers: NotRequired[dict[str, Any]] + timeout: NotRequired[timedelta] + sse_read_timeout: NotRequired[timedelta] + terminate_on_close: NotRequired[bool] + + +class MCPClient: + """ + Client for connecting to one or more MCP servers and processing chat completions with tools. + + > [!WARNING] + > This class is experimental and might be subject to breaking changes in the future without prior notice. + + Args: + model (`str`, `optional`): + The model to run inference with. Can be a model id hosted on the Hugging Face Hub, e.g. `meta-llama/Meta-Llama-3-8B-Instruct` + or a URL to a deployed Inference Endpoint or other local or remote endpoint. + provider (`str`, *optional*): + Name of the provider to use for inference. Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers. + If model is a URL or `base_url` is passed, then `provider` is not used. + base_url (`str`, *optional*): + The base URL to run inference. Defaults to None. + api_key (`str`, `optional`): + Token to use for authentication. Will default to the locally Hugging Face saved token if not provided. You can also use your own provider API key to interact directly with the provider's service. + """ + + def __init__( + self, + *, + model: Optional[str] = None, + provider: Optional[PROVIDER_OR_POLICY_T] = None, + base_url: Optional[str] = None, + api_key: Optional[str] = None, + ): + # Initialize MCP sessions as a dictionary of ClientSession objects + self.sessions: Dict[ToolName, "ClientSession"] = {} + self.exit_stack = AsyncExitStack() + self.available_tools: List[ChatCompletionInputTool] = [] + # To be able to send the model in the payload if `base_url` is provided + if model is None and base_url is None: + raise ValueError("At least one of `model` or `base_url` should be set in `MCPClient`.") + self.payload_model = model + self.client = AsyncInferenceClient( + model=None if base_url is not None else model, + provider=provider, + api_key=api_key, + base_url=base_url, + ) + + async def __aenter__(self): + """Enter the context manager""" + await self.client.__aenter__() + await self.exit_stack.__aenter__() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """Exit the context manager""" + await self.client.__aexit__(exc_type, exc_val, exc_tb) + await self.cleanup() + + async def cleanup(self): + """Clean up resources""" + await self.client.close() + await self.exit_stack.aclose() + + @overload + async def add_mcp_server(self, type: Literal["stdio"], **params: Unpack[StdioServerParameters_T]): ... + + @overload + async def add_mcp_server(self, type: Literal["sse"], **params: Unpack[SSEServerParameters_T]): ... + + @overload + async def add_mcp_server(self, type: Literal["http"], **params: Unpack[StreamableHTTPParameters_T]): ... + + async def add_mcp_server(self, type: ServerType, **params: Any): + """Connect to an MCP server + + Args: + type (`str`): + Type of the server to connect to. Can be one of: + - "stdio": Standard input/output server (local) + - "sse": Server-sent events (SSE) server + - "http": StreamableHTTP server + **params (`Dict[str, Any]`): + Server parameters that can be either: + - For stdio servers: + - command (str): The command to run the MCP server + - args (List[str], optional): Arguments for the command + - env (Dict[str, str], optional): Environment variables for the command + - cwd (Union[str, Path, None], optional): Working directory for the command + - allowed_tools (List[str], optional): List of tool names to allow from this server + - For SSE servers: + - url (str): The URL of the SSE server + - headers (Dict[str, Any], optional): Headers for the SSE connection + - timeout (float, optional): Connection timeout + - sse_read_timeout (float, optional): SSE read timeout + - allowed_tools (List[str], optional): List of tool names to allow from this server + - For StreamableHTTP servers: + - url (str): The URL of the StreamableHTTP server + - headers (Dict[str, Any], optional): Headers for the StreamableHTTP connection + - timeout (timedelta, optional): Connection timeout + - sse_read_timeout (timedelta, optional): SSE read timeout + - terminate_on_close (bool, optional): Whether to terminate on close + - allowed_tools (List[str], optional): List of tool names to allow from this server + """ + from mcp import ClientSession, StdioServerParameters + from mcp import types as mcp_types + + # Extract allowed_tools configuration if provided + allowed_tools = params.pop("allowed_tools", None) + + # Determine server type and create appropriate parameters + if type == "stdio": + # Handle stdio server + from mcp.client.stdio import stdio_client + + logger.info(f"Connecting to stdio MCP server with command: {params['command']} {params.get('args', [])}") + + client_kwargs = {"command": params["command"]} + for key in ["args", "env", "cwd"]: + if params.get(key) is not None: + client_kwargs[key] = params[key] + server_params = StdioServerParameters(**client_kwargs) + read, write = await self.exit_stack.enter_async_context(stdio_client(server_params)) + elif type == "sse": + # Handle SSE server + from mcp.client.sse import sse_client + + logger.info(f"Connecting to SSE MCP server at: {params['url']}") + + client_kwargs = {"url": params["url"]} + for key in ["headers", "timeout", "sse_read_timeout"]: + if params.get(key) is not None: + client_kwargs[key] = params[key] + read, write = await self.exit_stack.enter_async_context(sse_client(**client_kwargs)) + elif type == "http": + # Handle StreamableHTTP server + from mcp.client.streamable_http import streamablehttp_client + + logger.info(f"Connecting to StreamableHTTP MCP server at: {params['url']}") + + client_kwargs = {"url": params["url"]} + for key in ["headers", "timeout", "sse_read_timeout", "terminate_on_close"]: + if params.get(key) is not None: + client_kwargs[key] = params[key] + read, write, _ = await self.exit_stack.enter_async_context(streamablehttp_client(**client_kwargs)) + # ^ TODO: should be handle `get_session_id_callback`? (function to retrieve the current session ID) + else: + raise ValueError(f"Unsupported server type: {type}") + + session = await self.exit_stack.enter_async_context( + ClientSession( + read_stream=read, + write_stream=write, + client_info=mcp_types.Implementation( + name="huggingface_hub.MCPClient", + version=get_hf_hub_version(), + ), + ) + ) + + logger.debug("Initializing session...") + await session.initialize() + + # List available tools + response = await session.list_tools() + logger.debug("Connected to server with tools:", [tool.name for tool in response.tools]) + + # Filter tools based on allowed_tools configuration + filtered_tools = response.tools + + if allowed_tools is not None: + filtered_tools = [tool for tool in response.tools if tool.name in allowed_tools] + logger.debug( + f"Tool filtering applied. Using {len(filtered_tools)} of {len(response.tools)} available tools: {[tool.name for tool in filtered_tools]}" + ) + + for tool in filtered_tools: + if tool.name in self.sessions: + logger.warning(f"Tool '{tool.name}' already defined by another server. Skipping.") + continue + + # Map tool names to their server for later lookup + self.sessions[tool.name] = session + + # Add tool to the list of available tools (for use in chat completions) + self.available_tools.append( + ChatCompletionInputTool.parse_obj_as_instance( + { + "type": "function", + "function": { + "name": tool.name, + "description": tool.description, + "parameters": tool.inputSchema, + }, + } + ) + ) + + async def process_single_turn_with_tools( + self, + messages: List[Union[Dict, ChatCompletionInputMessage]], + exit_loop_tools: Optional[List[ChatCompletionInputTool]] = None, + exit_if_first_chunk_no_tool: bool = False, + ) -> AsyncIterable[Union[ChatCompletionStreamOutput, ChatCompletionInputMessage]]: + """Process a query using `self.model` and available tools, yielding chunks and tool outputs. + + Args: + messages (`List[Dict]`): + List of message objects representing the conversation history + exit_loop_tools (`List[ChatCompletionInputTool]`, *optional*): + List of tools that should exit the generator when called + exit_if_first_chunk_no_tool (`bool`, *optional*): + Exit if no tool is present in the first chunks. Default to False. + + Yields: + [`ChatCompletionStreamOutput`] chunks or [`ChatCompletionInputMessage`] objects + """ + # Prepare tools list based on options + tools = self.available_tools + if exit_loop_tools is not None: + tools = [*exit_loop_tools, *self.available_tools] + + # Create the streaming request + response = await self.client.chat.completions.create( + model=self.payload_model, + messages=messages, + tools=tools, + tool_choice="auto", + stream=True, + ) + + message: Dict[str, Any] = {"role": "unknown", "content": ""} + final_tool_calls: Dict[int, ChatCompletionStreamOutputDeltaToolCall] = {} + num_of_chunks = 0 + + # Read from stream + async for chunk in response: + num_of_chunks += 1 + delta = chunk.choices[0].delta if chunk.choices and len(chunk.choices) > 0 else None + if not delta: + continue + + # Process message + if delta.role: + message["role"] = delta.role + if delta.content: + message["content"] += delta.content + + # Process tool calls + if delta.tool_calls: + for tool_call in delta.tool_calls: + idx = tool_call.index + # first chunk for this tool call + if idx not in final_tool_calls: + final_tool_calls[idx] = tool_call + if final_tool_calls[idx].function.arguments is None: + final_tool_calls[idx].function.arguments = "" + continue + # safety before concatenating text to .function.arguments + if final_tool_calls[idx].function.arguments is None: + final_tool_calls[idx].function.arguments = "" + + if tool_call.function.arguments: + final_tool_calls[idx].function.arguments += tool_call.function.arguments + + # Optionally exit early if no tools in first chunks + if exit_if_first_chunk_no_tool and num_of_chunks <= 2 and len(final_tool_calls) == 0: + return + + # Yield each chunk to caller + yield chunk + + # Add the assistant message with tool calls (if any) to messages + if message["content"] or final_tool_calls: + # if the role is unknown, set it to assistant + if message.get("role") == "unknown": + message["role"] = "assistant" + # Convert final_tool_calls to the format expected by OpenAI + if final_tool_calls: + tool_calls_list: List[Dict[str, Any]] = [] + for tc in final_tool_calls.values(): + tool_calls_list.append( + { + "id": tc.id, + "type": "function", + "function": { + "name": tc.function.name, + "arguments": tc.function.arguments or "{}", + }, + } + ) + message["tool_calls"] = tool_calls_list + messages.append(message) + + # Process tool calls one by one + for tool_call in final_tool_calls.values(): + function_name = tool_call.function.name + try: + function_args = json.loads(tool_call.function.arguments or "{}") + except json.JSONDecodeError as err: + tool_message = { + "role": "tool", + "tool_call_id": tool_call.id, + "name": function_name, + "content": f"Invalid JSON generated by the model: {err}", + } + tool_message_as_obj = ChatCompletionInputMessage.parse_obj_as_instance(tool_message) + messages.append(tool_message_as_obj) + yield tool_message_as_obj + continue # move to next tool call + + tool_message = {"role": "tool", "tool_call_id": tool_call.id, "content": "", "name": function_name} + + # Check if this is an exit loop tool + if exit_loop_tools and function_name in [t.function.name for t in exit_loop_tools]: + tool_message_as_obj = ChatCompletionInputMessage.parse_obj_as_instance(tool_message) + messages.append(tool_message_as_obj) + yield tool_message_as_obj + return + + # Execute tool call with the appropriate session + session = self.sessions.get(function_name) + if session is not None: + try: + result = await session.call_tool(function_name, function_args) + tool_message["content"] = format_result(result) + except Exception as err: + tool_message["content"] = f"Error: MCP tool call failed with error message: {err}" + else: + tool_message["content"] = f"Error: No session found for tool: {function_name}" + + # Yield tool message + tool_message_as_obj = ChatCompletionInputMessage.parse_obj_as_instance(tool_message) + messages.append(tool_message_as_obj) + yield tool_message_as_obj diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/types.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/types.py new file mode 100644 index 0000000000000000000000000000000000000000..100f67832ea02d7d5b6886d117536e97efe1c6ff --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/types.py @@ -0,0 +1,45 @@ +from typing import Dict, List, Literal, TypedDict, Union + +from typing_extensions import NotRequired + + +class InputConfig(TypedDict, total=False): + id: str + description: str + type: str + password: bool + + +class StdioServerConfig(TypedDict): + type: Literal["stdio"] + command: str + args: List[str] + env: Dict[str, str] + cwd: str + allowed_tools: NotRequired[List[str]] + + +class HTTPServerConfig(TypedDict): + type: Literal["http"] + url: str + headers: Dict[str, str] + allowed_tools: NotRequired[List[str]] + + +class SSEServerConfig(TypedDict): + type: Literal["sse"] + url: str + headers: Dict[str, str] + allowed_tools: NotRequired[List[str]] + + +ServerConfig = Union[StdioServerConfig, HTTPServerConfig, SSEServerConfig] + + +# AgentConfig root object +class AgentConfig(TypedDict): + model: str + provider: str + apiKey: NotRequired[str] + inputs: List[InputConfig] + servers: List[ServerConfig] diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/utils.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..ddab10d6770397e4b1ad20ef4470679f3bfd60bb --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_mcp/utils.py @@ -0,0 +1,128 @@ +""" +Utility functions for MCPClient and Tiny Agents. + +Formatting utilities taken from the JS SDK: https://github.com/huggingface/huggingface.js/blob/main/packages/mcp-client/src/ResultFormatter.ts. +""" + +import json +from pathlib import Path +from typing import TYPE_CHECKING, List, Optional, Tuple + +from huggingface_hub import snapshot_download +from huggingface_hub.errors import EntryNotFoundError + +from .constants import DEFAULT_AGENT, DEFAULT_REPO_ID, FILENAME_CONFIG, PROMPT_FILENAMES +from .types import AgentConfig + + +if TYPE_CHECKING: + from mcp import types as mcp_types + + +def format_result(result: "mcp_types.CallToolResult") -> str: + """ + Formats a mcp.types.CallToolResult content into a human-readable string. + + Args: + result (CallToolResult) + Object returned by mcp.ClientSession.call_tool. + + Returns: + str + A formatted string representing the content of the result. + """ + content = result.content + + if len(content) == 0: + return "[No content]" + + formatted_parts: List[str] = [] + + for item in content: + if item.type == "text": + formatted_parts.append(item.text) + + elif item.type == "image": + formatted_parts.append( + f"[Binary Content: Image {item.mimeType}, {_get_base64_size(item.data)} bytes]\n" + f"The task is complete and the content accessible to the User" + ) + + elif item.type == "audio": + formatted_parts.append( + f"[Binary Content: Audio {item.mimeType}, {_get_base64_size(item.data)} bytes]\n" + f"The task is complete and the content accessible to the User" + ) + + elif item.type == "resource": + resource = item.resource + + if hasattr(resource, "text"): + formatted_parts.append(resource.text) + + elif hasattr(resource, "blob"): + formatted_parts.append( + f"[Binary Content ({resource.uri}): {resource.mimeType}, {_get_base64_size(resource.blob)} bytes]\n" + f"The task is complete and the content accessible to the User" + ) + + return "\n".join(formatted_parts) + + +def _get_base64_size(base64_str: str) -> int: + """Estimate the byte size of a base64-encoded string.""" + # Remove any prefix like "data:image/png;base64," + if "," in base64_str: + base64_str = base64_str.split(",")[1] + + padding = 0 + if base64_str.endswith("=="): + padding = 2 + elif base64_str.endswith("="): + padding = 1 + + return (len(base64_str) * 3) // 4 - padding + + +def _load_agent_config(agent_path: Optional[str]) -> Tuple[AgentConfig, Optional[str]]: + """Load server config and prompt.""" + + def _read_dir(directory: Path) -> Tuple[AgentConfig, Optional[str]]: + cfg_file = directory / FILENAME_CONFIG + if not cfg_file.exists(): + raise FileNotFoundError(f" Config file not found in {directory}! Please make sure it exists locally") + + config: AgentConfig = json.loads(cfg_file.read_text(encoding="utf-8")) + prompt: Optional[str] = None + for filename in PROMPT_FILENAMES: + prompt_file = directory / filename + if prompt_file.exists(): + prompt = prompt_file.read_text(encoding="utf-8") + break + return config, prompt + + if agent_path is None: + return DEFAULT_AGENT, None # type: ignore[return-value] + + path = Path(agent_path).expanduser() + + if path.is_file(): + return json.loads(path.read_text(encoding="utf-8")), None + + if path.is_dir(): + return _read_dir(path) + + # fetch from the Hub + try: + repo_dir = Path( + snapshot_download( + repo_id=DEFAULT_REPO_ID, + allow_patterns=f"{agent_path}/*", + repo_type="dataset", + ) + ) + return _read_dir(repo_dir / agent_path) + except Exception as err: + raise EntryNotFoundError( + f" Agent {agent_path} not found in tiny-agents/tiny-agents! Please make sure it exists in https://huggingface.co/datasets/tiny-agents/tiny-agents." + ) from err diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/__pycache__/clarifai.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/__pycache__/clarifai.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..250fbf9ed14811e24ee1853bee87fa98fa396211 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/__pycache__/clarifai.cpython-312.pyc differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/_common.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/_common.py new file mode 100644 index 0000000000000000000000000000000000000000..366fc3f45d6760e21c748e0ead7e4b3510efbc72 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/_common.py @@ -0,0 +1,323 @@ +from functools import lru_cache +from typing import Any, Dict, List, Optional, Union, overload + +from huggingface_hub import constants +from huggingface_hub.hf_api import InferenceProviderMapping +from huggingface_hub.inference._common import MimeBytes, RequestParameters +from huggingface_hub.inference._generated.types.chat_completion import ChatCompletionInputMessage +from huggingface_hub.utils import build_hf_headers, get_token, logging + + +logger = logging.get_logger(__name__) + + +# Dev purposes only. +# If you want to try to run inference for a new model locally before it's registered on huggingface.co +# for a given Inference Provider, you can add it to the following dictionary. +HARDCODED_MODEL_INFERENCE_MAPPING: Dict[str, Dict[str, InferenceProviderMapping]] = { + # "HF model ID" => InferenceProviderMapping object initialized with "Model ID on Inference Provider's side" + # + # Example: + # "Qwen/Qwen2.5-Coder-32B-Instruct": InferenceProviderMapping(hf_model_id="Qwen/Qwen2.5-Coder-32B-Instruct", + # provider_id="Qwen2.5-Coder-32B-Instruct", + # task="conversational", + # status="live") + "cerebras": {}, + "cohere": {}, + "clarifai": {}, + "fal-ai": {}, + "fireworks-ai": {}, + "groq": {}, + "hf-inference": {}, + "hyperbolic": {}, + "nebius": {}, + "nscale": {}, + "replicate": {}, + "sambanova": {}, + "scaleway": {}, + "together": {}, + "zai-org": {}, +} + + +@overload +def filter_none(obj: Dict[str, Any]) -> Dict[str, Any]: ... +@overload +def filter_none(obj: List[Any]) -> List[Any]: ... + + +def filter_none(obj: Union[Dict[str, Any], List[Any]]) -> Union[Dict[str, Any], List[Any]]: + if isinstance(obj, dict): + cleaned: Dict[str, Any] = {} + for k, v in obj.items(): + if v is None: + continue + if isinstance(v, (dict, list)): + v = filter_none(v) + cleaned[k] = v + return cleaned + + if isinstance(obj, list): + return [filter_none(v) if isinstance(v, (dict, list)) else v for v in obj] + + raise ValueError(f"Expected dict or list, got {type(obj)}") + + +class TaskProviderHelper: + """Base class for task-specific provider helpers.""" + + def __init__(self, provider: str, base_url: str, task: str) -> None: + self.provider = provider + self.task = task + self.base_url = base_url + + def prepare_request( + self, + *, + inputs: Any, + parameters: Dict[str, Any], + headers: Dict, + model: Optional[str], + api_key: Optional[str], + extra_payload: Optional[Dict[str, Any]] = None, + ) -> RequestParameters: + """ + Prepare the request to be sent to the provider. + + Each step (api_key, model, headers, url, payload) can be customized in subclasses. + """ + # api_key from user, or local token, or raise error + api_key = self._prepare_api_key(api_key) + + # mapped model from HF model ID + provider_mapping_info = self._prepare_mapping_info(model) + + # default HF headers + user headers (to customize in subclasses) + headers = self._prepare_headers(headers, api_key) + + # routed URL if HF token, or direct URL (to customize in '_prepare_route' in subclasses) + url = self._prepare_url(api_key, provider_mapping_info.provider_id) + + # prepare payload (to customize in subclasses) + payload = self._prepare_payload_as_dict(inputs, parameters, provider_mapping_info=provider_mapping_info) + if payload is not None: + payload = recursive_merge(payload, filter_none(extra_payload or {})) + + # body data (to customize in subclasses) + data = self._prepare_payload_as_bytes(inputs, parameters, provider_mapping_info, extra_payload) + + # check if both payload and data are set and return + if payload is not None and data is not None: + raise ValueError("Both payload and data cannot be set in the same request.") + if payload is None and data is None: + raise ValueError("Either payload or data must be set in the request.") + + # normalize headers to lowercase and add content-type if not present + normalized_headers = self._normalize_headers(headers, payload, data) + + return RequestParameters( + url=url, + task=self.task, + model=provider_mapping_info.provider_id, + json=payload, + data=data, + headers=normalized_headers, + ) + + def get_response( + self, + response: Union[bytes, Dict], + request_params: Optional[RequestParameters] = None, + ) -> Any: + """ + Return the response in the expected format. + + Override this method in subclasses for customized response handling.""" + return response + + def _prepare_api_key(self, api_key: Optional[str]) -> str: + """Return the API key to use for the request. + + Usually not overwritten in subclasses.""" + if api_key is None: + api_key = get_token() + if api_key is None: + raise ValueError( + f"You must provide an api_key to work with {self.provider} API or log in with `hf auth login`." + ) + return api_key + + def _prepare_mapping_info(self, model: Optional[str]) -> InferenceProviderMapping: + """Return the mapped model ID to use for the request. + + Usually not overwritten in subclasses.""" + if model is None: + raise ValueError(f"Please provide an HF model ID supported by {self.provider}.") + + # hardcoded mapping for local testing + if HARDCODED_MODEL_INFERENCE_MAPPING.get(self.provider, {}).get(model): + return HARDCODED_MODEL_INFERENCE_MAPPING[self.provider][model] + + provider_mapping = None + for mapping in _fetch_inference_provider_mapping(model): + if mapping.provider == self.provider: + provider_mapping = mapping + break + + if provider_mapping is None: + raise ValueError(f"Model {model} is not supported by provider {self.provider}.") + + if provider_mapping.task != self.task: + raise ValueError( + f"Model {model} is not supported for task {self.task} and provider {self.provider}. " + f"Supported task: {provider_mapping.task}." + ) + + if provider_mapping.status == "staging": + logger.warning( + f"Model {model} is in staging mode for provider {self.provider}. Meant for test purposes only." + ) + if provider_mapping.status == "error": + logger.warning( + f"Our latest automated health check on model '{model}' for provider '{self.provider}' did not complete successfully. " + "Inference call might fail." + ) + return provider_mapping + + def _normalize_headers( + self, headers: Dict[str, Any], payload: Optional[Dict[str, Any]], data: Optional[MimeBytes] + ) -> Dict[str, Any]: + """Normalize the headers to use for the request. + + Override this method in subclasses for customized headers. + """ + normalized_headers = {key.lower(): value for key, value in headers.items() if value is not None} + if normalized_headers.get("content-type") is None: + if data is not None and data.mime_type is not None: + normalized_headers["content-type"] = data.mime_type + elif payload is not None: + normalized_headers["content-type"] = "application/json" + return normalized_headers + + def _prepare_headers(self, headers: Dict, api_key: str) -> Dict[str, Any]: + """Return the headers to use for the request. + + Override this method in subclasses for customized headers. + """ + return {**build_hf_headers(token=api_key), **headers} + + def _prepare_url(self, api_key: str, mapped_model: str) -> str: + """Return the URL to use for the request. + + Usually not overwritten in subclasses.""" + base_url = self._prepare_base_url(api_key) + route = self._prepare_route(mapped_model, api_key) + return f"{base_url.rstrip('/')}/{route.lstrip('/')}" + + def _prepare_base_url(self, api_key: str) -> str: + """Return the base URL to use for the request. + + Usually not overwritten in subclasses.""" + # Route to the proxy if the api_key is a HF TOKEN + if api_key.startswith("hf_"): + logger.info(f"Calling '{self.provider}' provider through Hugging Face router.") + return constants.INFERENCE_PROXY_TEMPLATE.format(provider=self.provider) + else: + logger.info(f"Calling '{self.provider}' provider directly.") + return self.base_url + + def _prepare_route(self, mapped_model: str, api_key: str) -> str: + """Return the route to use for the request. + + Override this method in subclasses for customized routes. + """ + return "" + + def _prepare_payload_as_dict( + self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping + ) -> Optional[Dict]: + """Return the payload to use for the request, as a dict. + + Override this method in subclasses for customized payloads. + Only one of `_prepare_payload_as_dict` and `_prepare_payload_as_bytes` should return a value. + """ + return None + + def _prepare_payload_as_bytes( + self, + inputs: Any, + parameters: Dict, + provider_mapping_info: InferenceProviderMapping, + extra_payload: Optional[Dict], + ) -> Optional[MimeBytes]: + """Return the body to use for the request, as bytes. + + Override this method in subclasses for customized body data. + Only one of `_prepare_payload_as_dict` and `_prepare_payload_as_bytes` should return a value. + """ + return None + + +class BaseConversationalTask(TaskProviderHelper): + """ + Base class for conversational (chat completion) tasks. + The schema follows the OpenAI API format defined here: https://platform.openai.com/docs/api-reference/chat + """ + + def __init__(self, provider: str, base_url: str): + super().__init__(provider=provider, base_url=base_url, task="conversational") + + def _prepare_route(self, mapped_model: str, api_key: str) -> str: + return "/v1/chat/completions" + + def _prepare_payload_as_dict( + self, + inputs: List[Union[Dict, ChatCompletionInputMessage]], + parameters: Dict, + provider_mapping_info: InferenceProviderMapping, + ) -> Optional[Dict]: + return filter_none({"messages": inputs, **parameters, "model": provider_mapping_info.provider_id}) + + +class BaseTextGenerationTask(TaskProviderHelper): + """ + Base class for text-generation (completion) tasks. + The schema follows the OpenAI API format defined here: https://platform.openai.com/docs/api-reference/completions + """ + + def __init__(self, provider: str, base_url: str): + super().__init__(provider=provider, base_url=base_url, task="text-generation") + + def _prepare_route(self, mapped_model: str, api_key: str) -> str: + return "/v1/completions" + + def _prepare_payload_as_dict( + self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping + ) -> Optional[Dict]: + return filter_none({"prompt": inputs, **parameters, "model": provider_mapping_info.provider_id}) + + +@lru_cache(maxsize=None) +def _fetch_inference_provider_mapping(model: str) -> List["InferenceProviderMapping"]: + """ + Fetch provider mappings for a model from the Hub. + """ + from huggingface_hub.hf_api import HfApi + + info = HfApi().model_info(model, expand=["inferenceProviderMapping"]) + provider_mapping = info.inference_provider_mapping + if provider_mapping is None: + raise ValueError(f"No provider mapping found for model {model}") + return provider_mapping + + +def recursive_merge(dict1: Dict, dict2: Dict) -> Dict: + return { + **dict1, + **{ + key: recursive_merge(dict1[key], value) + if (key in dict1 and isinstance(dict1[key], dict) and isinstance(value, dict)) + else value + for key, value in dict2.items() + }, + } diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/networkx/algorithms/community/bipartitions.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/networkx/algorithms/community/bipartitions.py new file mode 100644 index 0000000000000000000000000000000000000000..5e0b44d6138850a85b179e9e19608a757230b9d3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/networkx/algorithms/community/bipartitions.py @@ -0,0 +1,354 @@ +"""Functions for splitting a network into two communities (finding a bipartition).""" + +import random +from copy import deepcopy +from itertools import count + +import networkx as nx + +__all__ = [ + "kernighan_lin_bisection", + "spectral_modularity_bipartition", + "greedy_node_swap_bipartition", +] + + +def _kernighan_lin_sweep(edge_info, side): + """ + This is a modified form of Kernighan-Lin, which moves single nodes at a + time, alternating between sides to keep the bisection balanced. We keep + two min-heaps of swap costs to make optimal-next-move selection fast. + """ + heap0, heap1 = cost_heaps = nx.utils.BinaryHeap(), nx.utils.BinaryHeap() + # we use heap methods insert, pop, and get + for u, nbrs in edge_info.items(): + cost_u = sum(wt if side[v] else -wt for v, wt in nbrs.items()) + if side[u]: + heap1.insert(u, cost_u) + else: + heap0.insert(u, -cost_u) + + def _update_heap_values(node): + side_node = side[node] + for nbr, wt in edge_info[node].items(): + side_nbr = side[nbr] + if side_nbr == side_node: + wt = -wt + heap_nbr = cost_heaps[side_nbr] + if nbr in heap_nbr: + cost_nbr = heap_nbr.get(nbr) + 2 * wt + # allow_increase lets us update a value already on the heap + heap_nbr.insert(nbr, cost_nbr, allow_increase=True) + + i = 0 + totcost = 0 + while heap0 and heap1: + u, cost_u = heap0.pop() + _update_heap_values(u) + v, cost_v = heap1.pop() + _update_heap_values(v) + totcost += cost_u + cost_v + i += 1 + yield totcost, i, (u, v) + + +@nx.utils.not_implemented_for("directed") +@nx.utils.py_random_state(4) +@nx._dispatchable(edge_attrs="weight") +def kernighan_lin_bisection(G, partition=None, max_iter=10, weight="weight", seed=None): + """Partition a graph into two blocks using the Kernighan–Lin algorithm. + + This algorithm partitions a network into two sets by iteratively + swapping pairs of nodes to reduce the edge cut between the two sets. The + pairs are chosen according to a modified form of Kernighan-Lin [1]_, which + moves nodes individually, alternating between sides to keep the bisection + balanced. + + Kernighan-Lin is an approximate algorithm for maximal modularity bisection. + In [2]_ they suggest that fine-tuned improvements can be made using + greedy node swapping, (see `greedy_node_swap_bipartition`). + The improvements are typically only a few percent of the modularity value. + But they claim that can make a difference between a good and excellent method. + This function does not perform any improvements. But you can do that yourself. + + Parameters + ---------- + G : NetworkX graph + Graph must be undirected. + + partition : tuple + Pair of iterables containing an initial partition. If not + specified, a random balanced partition is used. + + max_iter : int + Maximum number of times to attempt swaps to find an + improvement before giving up. + + weight : string or function (default: "weight") + If this is a string, then edge weights will be accessed via the + edge attribute with this key (that is, the weight of the edge + joining `u` to `v` will be ``G.edges[u, v][weight]``). If no + such edge attribute exists, the weight of the edge is assumed to + be one. + + If this is a function, the weight of an edge is the value + returned by the function. The function must accept exactly three + positional arguments: the two endpoints of an edge and the + dictionary of edge attributes for that edge. The function must + return a number or None to indicate a hidden edge. + + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness`. + Only used if partition is None + + Returns + ------- + partition : tuple + A pair of sets of nodes representing the bipartition. + + Raises + ------ + NetworkXError + If `partition` is not a valid partition of the nodes of the graph. + + References + ---------- + .. [1] Kernighan, B. W.; Lin, Shen (1970). + "An efficient heuristic procedure for partitioning graphs." + *Bell Systems Technical Journal* 49: 291--307. + Oxford University Press 2011. + .. [2] M. E. J. Newman, + "Modularity and community structure in networks", + PNAS, 103 (23), p. 8577-8582, + https://doi.org/10.1073/pnas.0601602103 + + """ + nodes = list(G) + + if partition is None: + seed.shuffle(nodes) + mid = len(nodes) // 2 + A, B = nodes[:mid], nodes[mid:] + else: + try: + A, B = partition + except (TypeError, ValueError) as err: + raise nx.NetworkXError("partition must be two sets") from err + if not nx.community.is_partition(G, [A, B]): + raise nx.NetworkXError("partition invalid") + + side = {node: (node in A) for node in nodes} + + # ruff: noqa: E731 skips check for no lambda functions + # Using shortest_paths _weight_function with sum instead of min on multiedges + if callable(weight): + sum_weight = weight + elif G.is_multigraph(): + sum_weight = lambda u, v, d: sum(dd.get(weight, 1) for dd in d.values()) + else: + sum_weight = lambda u, v, d: d.get(weight, 1) + + edge_info = { + u: {v: wt for v, d in nbrs.items() if (wt := sum_weight(u, v, d)) is not None} + for u, nbrs in G._adj.items() + } + + for i in range(max_iter): + costs = list(_kernighan_lin_sweep(edge_info, side)) + # find out how many edges to update: min_i + min_cost, min_i, _ = min(costs) + if min_cost >= 0: + break + + for _, _, (u, v) in costs[:min_i]: + side[u] = 1 + side[v] = 0 + + part1 = {u for u, s in side.items() if s == 0} + part2 = {u for u, s in side.items() if s == 1} + return part1, part2 + + +@nx.utils.not_implemented_for("directed") +@nx.utils.not_implemented_for("multigraph") +def spectral_modularity_bipartition(G): + r"""Return a bipartition of the nodes based on the spectrum of the + modularity matrix of the graph. + + This method calculates the eigenvector associated with the second + largest eigenvalue of the modularity matrix, where the modularity + matrix *B* is defined by + + ..math:: + + B_{i j} = A_{i j} - \frac{k_i k_j}{2 m}, + + where *A* is the adjacency matrix, `k_i` is the degree of node *i*, + and *m* is the number of edges in the graph. Nodes whose + corresponding values in the eigenvector are negative are placed in + one block, nodes whose values are nonnegative are placed in another + block. + + Parameters + ---------- + G : NetworkX graph + + Returns + ------- + C : tuple + Pair of communities as two sets of nodes of ``G``, partitioned + according to second largest eigenvalue of the modularity matrix. + + Examples + -------- + >>> G = nx.karate_club_graph() + >>> MrHi, Officer = nx.community.spectral_modularity_bipartition(G) + >>> MrHi, Officer = sorted([sorted(MrHi), sorted(Officer)]) + >>> MrHi + [0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 16, 17, 19, 21] + >>> Officer + [8, 9, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] + + References + ---------- + .. [1] M. E. J. Newman *Networks: An Introduction*, pages 373--378 + Oxford University Press 2011. + .. [2] M. E. J. Newman, + "Modularity and community structure in networks", + PNAS, 103 (23), p. 8577-8582, + https://doi.org/10.1073/pnas.0601602103 + + """ + import numpy as np + + B = nx.linalg.modularity_matrix(G) + eigenvalues, eigenvectors = np.linalg.eig(B) + index = np.argsort(eigenvalues)[-1] + v2 = zip(np.real(eigenvectors[:, index]), G) + left, right = set(), set() + for u, n in v2: + if u < 0: + left.add(n) + else: + right.add(n) + return left, right + + +@nx.utils.not_implemented_for("multigraph") +def greedy_node_swap_bipartition(G, *, init_split=None, max_iter=10): + """Split the nodes into two communities based on greedy + modularity maximization. + + The algorithm works by selecting a node to change communities which + will maximize the modularity. The swap is made and the community + structure with the highest modularity is kept. + + Parameters + ---------- + G : NetworkX graph + + init_split : 2-tuple of sets of nodes + Pair of sets of nodes in ``G`` providing an initial bipartition + for the algorithm. If not specified, a random balanced partition + is used. If this pair of sets is not a partition of the nodes of `G`, + :exc:`NetworkXException` is raised. + + max_iter : int + Maximum number of iterations of attempting swaps to find an improvement. + + Returns + ------- + max_split : 2-tuple of sets of nodes + Pair of sets of nodes of ``G``, partitioned according to a + node swap greedy modularity maximization algorithm. + + Raises + ------ + NetworkXError + if init_split is not a valid partition of the + graph into two communities or if G is a MultiGraph + + Examples + -------- + >>> G = nx.barbell_graph(3, 0) + >>> left, right = nx.community.greedy_node_swap_bipartition(G) + >>> # Sort the communities so the nodes appear in increasing order. + >>> left, right = sorted([sorted(left), sorted(right)]) + >>> sorted(left) + [0, 1, 2] + >>> sorted(right) + [3, 4, 5] + + Notes + ----- + This function is not implemented for multigraphs. + + References + ---------- + .. [1] M. E. J. Newman "Networks: An Introduction", pages 373--375. + Oxford University Press 2011. + + """ + if init_split is None: + m1 = len(G) // 2 + m2 = len(G) - m1 + some_nodes = set(random.sample(list(G), m1)) + other_nodes = {n for n in G if n not in some_nodes} + best_split_so_far = (some_nodes, other_nodes) + else: + if not nx.community.is_partition(G, init_split): + raise nx.NetworkXError("init_split is not a partition of G") + if not len(init_split) == 2: + raise nx.NetworkXError("init_split must be a bipartition") + best_split_so_far = deepcopy(init_split) + + best_mod = nx.community.modularity(G, best_split_so_far) + + max_split, max_mod = best_split_so_far, best_mod + its = 0 + m = G.number_of_edges() + G_degree = dict(G.degree) + + while max_mod >= best_mod and its < max_iter: + best_split_so_far = max_split + best_mod = max_mod + next_split = deepcopy(max_split) + next_mod = max_mod + nodes = set(G) + while nodes: + max_swap = -1 + max_node = None + max_node_comm = None + left, right = next_split + leftd = sum(G_degree[n] for n in left) + rightd = sum(G_degree[n] for n in right) + for n in nodes: + if n in left: + in_comm, out_comm = left, right + in_deg, out_deg = leftd, rightd + else: + in_comm, out_comm = right, left + in_deg, out_deg = rightd, leftd + + d_eii = -len(G[n].keys() & in_comm) / m + d_ejj = len(G[n].keys() & out_comm) / m + deg = G_degree[n] + d_sum_ai = (deg / (2 * m**2)) * (in_deg - out_deg - deg) + swap_change = d_eii + d_ejj + d_sum_ai + + if swap_change > max_swap: + max_swap = swap_change + max_node = n + max_node_comm = in_comm + non_max_node_comm = out_comm + # swap the node from one comm to the other + max_node_comm.remove(max_node) + non_max_node_comm.add(max_node) + next_mod += max_swap + # deepcopy next_split each time it reaches a high (might go lower later) + if next_mod > max_mod: + max_split, max_mod = deepcopy(next_split), next_mod + nodes.remove(max_node) + its += 1 + return best_split_so_far diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/networkx/algorithms/community/centrality.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/networkx/algorithms/community/centrality.py new file mode 100644 index 0000000000000000000000000000000000000000..43281701d2b630710acba8f3cef6693356aa461a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/networkx/algorithms/community/centrality.py @@ -0,0 +1,171 @@ +"""Functions for computing communities based on centrality notions.""" + +import networkx as nx + +__all__ = ["girvan_newman"] + + +@nx._dispatchable(preserve_edge_attrs="most_valuable_edge") +def girvan_newman(G, most_valuable_edge=None): + """Finds communities in a graph using the Girvan–Newman method. + + Parameters + ---------- + G : NetworkX graph + + most_valuable_edge : function + Function that takes a graph as input and outputs an edge. The + edge returned by this function will be recomputed and removed at + each iteration of the algorithm. + + If not specified, the edge with the highest + :func:`networkx.edge_betweenness_centrality` will be used. + + Returns + ------- + iterator + Iterator over tuples of sets of nodes in `G`. Each set of node + is a community, each tuple is a sequence of communities at a + particular level of the algorithm. + + Examples + -------- + To get the first pair of communities:: + + >>> G = nx.path_graph(10) + >>> comp = nx.community.girvan_newman(G) + >>> tuple(sorted(c) for c in next(comp)) + ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]) + + To get only the first *k* tuples of communities, use + :func:`itertools.islice`:: + + >>> import itertools + >>> G = nx.path_graph(8) + >>> k = 2 + >>> comp = nx.community.girvan_newman(G) + >>> for communities in itertools.islice(comp, k): + ... print(tuple(sorted(c) for c in communities)) + ... + ([0, 1, 2, 3], [4, 5, 6, 7]) + ([0, 1], [2, 3], [4, 5, 6, 7]) + + To stop getting tuples of communities once the number of communities + is greater than *k*, use :func:`itertools.takewhile`:: + + >>> import itertools + >>> G = nx.path_graph(8) + >>> k = 4 + >>> comp = nx.community.girvan_newman(G) + >>> limited = itertools.takewhile(lambda c: len(c) <= k, comp) + >>> for communities in limited: + ... print(tuple(sorted(c) for c in communities)) + ... + ([0, 1, 2, 3], [4, 5, 6, 7]) + ([0, 1], [2, 3], [4, 5, 6, 7]) + ([0, 1], [2, 3], [4, 5], [6, 7]) + + To just choose an edge to remove based on the weight:: + + >>> from operator import itemgetter + >>> G = nx.path_graph(10) + >>> edges = G.edges() + >>> nx.set_edge_attributes(G, {(u, v): v for u, v in edges}, "weight") + >>> def heaviest(G): + ... u, v, w = max(G.edges(data="weight"), key=itemgetter(2)) + ... return (u, v) + ... + >>> comp = nx.community.girvan_newman(G, most_valuable_edge=heaviest) + >>> tuple(sorted(c) for c in next(comp)) + ([0, 1, 2, 3, 4, 5, 6, 7, 8], [9]) + + To utilize edge weights when choosing an edge with, for example, the + highest betweenness centrality:: + + >>> from networkx import edge_betweenness_centrality as betweenness + >>> def most_central_edge(G): + ... centrality = betweenness(G, weight="weight") + ... return max(centrality, key=centrality.get) + ... + >>> G = nx.path_graph(10) + >>> comp = nx.community.girvan_newman(G, most_valuable_edge=most_central_edge) + >>> tuple(sorted(c) for c in next(comp)) + ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]) + + To specify a different ranking algorithm for edges, use the + `most_valuable_edge` keyword argument:: + + >>> from networkx import edge_betweenness_centrality + >>> from random import random + >>> def most_central_edge(G): + ... centrality = edge_betweenness_centrality(G) + ... max_cent = max(centrality.values()) + ... # Scale the centrality values so they are between 0 and 1, + ... # and add some random noise. + ... centrality = {e: c / max_cent for e, c in centrality.items()} + ... # Add some random noise. + ... centrality = {e: c + random() for e, c in centrality.items()} + ... return max(centrality, key=centrality.get) + ... + >>> G = nx.path_graph(10) + >>> comp = nx.community.girvan_newman(G, most_valuable_edge=most_central_edge) + + Notes + ----- + The Girvan–Newman algorithm detects communities by progressively + removing edges from the original graph. The algorithm removes the + "most valuable" edge, traditionally the edge with the highest + betweenness centrality, at each step. As the graph breaks down into + pieces, the tightly knit community structure is exposed and the + result can be depicted as a dendrogram. + + """ + # If the graph is already empty, simply return its connected + # components. + if G.number_of_edges() == 0: + yield tuple(nx.connected_components(G)) + return + # If no function is provided for computing the most valuable edge, + # use the edge betweenness centrality. + if most_valuable_edge is None: + + def most_valuable_edge(G): + """Returns the edge with the highest betweenness centrality + in the graph `G`. + + """ + # We have guaranteed that the graph is non-empty, so this + # dictionary will never be empty. + betweenness = nx.edge_betweenness_centrality(G) + return max(betweenness, key=betweenness.get) + + # The copy of G here must include the edge weight data. + g = G.copy().to_undirected() + # Self-loops must be removed because their removal has no effect on + # the connected components of the graph. + g.remove_edges_from(nx.selfloop_edges(g)) + while g.number_of_edges() > 0: + yield _without_most_central_edges(g, most_valuable_edge) + + +def _without_most_central_edges(G, most_valuable_edge): + """Returns the connected components of the graph that results from + repeatedly removing the most "valuable" edge in the graph. + + `G` must be a non-empty graph. This function modifies the graph `G` + in-place; that is, it removes edges on the graph `G`. + + `most_valuable_edge` is a function that takes the graph `G` as input + (or a subgraph with one or more edges of `G` removed) and returns an + edge. That edge will be removed and this process will be repeated + until the number of connected components in the graph increases. + + """ + original_num_components = nx.number_connected_components(G) + num_new_components = original_num_components + while num_new_components <= original_num_components: + edge = most_valuable_edge(G) + G.remove_edge(*edge) + new_components = tuple(nx.connected_components(G)) + num_new_components = len(new_components) + return new_components diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/networkx/algorithms/community/leiden.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/networkx/algorithms/community/leiden.py new file mode 100644 index 0000000000000000000000000000000000000000..a37ad6591ceac8337db41ae99c5d55b144db3002 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/networkx/algorithms/community/leiden.py @@ -0,0 +1,162 @@ +"""Functions for detecting communities based on Leiden Community Detection +algorithm. + +These functions do not have NetworkX implementations. +They may only be run with an installable :doc:`backend ` +that supports them. +""" + +import itertools +from collections import deque + +import networkx as nx +from networkx.utils import not_implemented_for, py_random_state + +__all__ = ["leiden_communities", "leiden_partitions"] + + +@not_implemented_for("directed") +@py_random_state("seed") +@nx._dispatchable(edge_attrs="weight", implemented_by_nx=False) +def leiden_communities(G, weight="weight", resolution=1, max_level=None, seed=None): + r"""Find a best partition of `G` using Leiden Community Detection (backend required) + + Leiden Community Detection is an algorithm to extract the community structure + of a network based on modularity optimization. It is an improvement upon the + Louvain Community Detection algorithm. See :any:`louvain_communities`. + + Unlike the Louvain algorithm, it guarantees that communities are well connected in addition + to being faster and uncovering better partitions. [1]_ + + The algorithm works in 3 phases. On the first phase, it adds the nodes to a queue randomly + and assigns every node to be in its own community. For each node it tries to find the + maximum positive modularity gain by moving each node to all of its neighbor communities. + If a node is moved from its community, it adds to the rear of the queue all neighbors of + the node that do not belong to the node’s new community and that are not in the queue. + + The first phase continues until the queue is empty. + + The second phase consists in refining the partition $P$ obtained from the first phase. It starts + with a singleton partition $P_{refined}$. Then it merges nodes locally in $P_{refined}$ within + each community of the partition $P$. Nodes are merged with a community in $P_{refined}$ only if + both are sufficiently well connected to their community in $P$. This means that after the + refinement phase is concluded, communities in $P$ sometimes will have been split into multiple + communities. + + The third phase consists of aggregating the network by building a new network whose nodes are + now the communities found in the second phase. However, the non-refined partition is used to create + an initial partition for the aggregate network. + + Once this phase is complete it is possible to reapply the first and second phases creating bigger + communities with increased modularity. + + The above three phases are executed until no modularity gain is achieved or `max_level` number + of iterations have been performed. + + Parameters + ---------- + G : NetworkX graph + weight : string or None, optional (default="weight") + The name of an edge attribute that holds the numerical value + used as a weight. If None then each edge has weight 1. + resolution : float, optional (default=1) + If resolution is less than 1, the algorithm favors larger communities. + Greater than 1 favors smaller communities. + max_level : int or None, optional (default=None) + The maximum number of levels (steps of the algorithm) to compute. + Must be a positive integer or None. If None, then there is no max + level and the algorithm will run until converged. + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness`. + + Returns + ------- + list + A list of disjoint sets (partition of `G`). Each set represents one community. + All communities together contain all the nodes in `G`. + + Examples + -------- + >>> import networkx as nx + >>> G = nx.petersen_graph() + >>> nx.community.leiden_communities(G, backend="example_backend") # doctest: +SKIP + [{2, 3, 5, 7, 8}, {0, 1, 4, 6, 9}] + + Notes + ----- + The order in which the nodes are considered can affect the final output. In the algorithm + the ordering happens using a random shuffle. + + References + ---------- + .. [1] Traag, V.A., Waltman, L. & van Eck, N.J. From Louvain to Leiden: guaranteeing + well-connected communities. Sci Rep 9, 5233 (2019). https://doi.org/10.1038/s41598-019-41695-z + + See Also + -------- + leiden_partitions + :any:`louvain_communities` + """ + partitions = leiden_partitions(G, weight, resolution, seed) + if max_level is not None: + if max_level <= 0: + raise ValueError("max_level argument must be a positive integer or None") + partitions = itertools.islice(partitions, max_level) + final_partition = deque(partitions, maxlen=1) + return final_partition.pop() + + +@not_implemented_for("directed") +@py_random_state("seed") +@nx._dispatchable(edge_attrs="weight", implemented_by_nx=False) +def leiden_partitions(G, weight="weight", resolution=1, seed=None): + """Yield partitions for each level of Leiden Community Detection (backend required) + + Leiden Community Detection is an algorithm to extract the community + structure of a network based on modularity optimization. + + The partitions across levels (steps of the algorithm) form a dendrogram + of communities. A dendrogram is a diagram representing a tree and each + level represents a partition of the G graph. The top level contains the + smallest communities and as you traverse to the bottom of the tree the + communities get bigger and the overall modularity increases making + the partition better. + + Each level is generated by executing the three phases of the Leiden Community + Detection algorithm. See :any:`leiden_communities`. + + Parameters + ---------- + G : NetworkX graph + weight : string or None, optional (default="weight") + The name of an edge attribute that holds the numerical value + used as a weight. If None then each edge has weight 1. + resolution : float, optional (default=1) + If resolution is less than 1, the algorithm favors larger communities. + Greater than 1 favors smaller communities. + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness`. + + Yields + ------ + list + A list of disjoint sets (partition of `G`). Each set represents one community. + All communities together contain all the nodes in `G`. The yielded partitions + increase modularity with each iteration. + + References + ---------- + .. [1] Traag, V.A., Waltman, L. & van Eck, N.J. From Louvain to Leiden: guaranteeing + well-connected communities. Sci Rep 9, 5233 (2019). https://doi.org/10.1038/s41598-019-41695-z + + See Also + -------- + leiden_communities + :any:`louvain_partitions` + """ + raise NotImplementedError( + "'leiden_partitions' is not implemented by networkx. " + "Please try a different backend." + ) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/networkx/algorithms/community/quality.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/networkx/algorithms/community/quality.py new file mode 100644 index 0000000000000000000000000000000000000000..4b4dcbba1ce88f55f667cbe010fa89bf71cb99db --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/networkx/algorithms/community/quality.py @@ -0,0 +1,347 @@ +"""Functions for measuring the quality of a partition (into +communities). + +""" + +from itertools import combinations + +import networkx as nx +from networkx import NetworkXError +from networkx.algorithms.community.community_utils import is_partition +from networkx.utils.decorators import argmap + +__all__ = ["modularity", "partition_quality"] + + +class NotAPartition(NetworkXError): + """Raised if a given collection is not a partition.""" + + def __init__(self, G, collection): + msg = f"{collection} is not a valid partition of the graph {G}" + super().__init__(msg) + + +def _require_partition(G, partition): + """Decorator to check that a valid partition is input to a function + + Raises :exc:`networkx.NetworkXError` if the partition is not valid. + + This decorator should be used on functions whose first two arguments + are a graph and a partition of the nodes of that graph (in that + order):: + + >>> @require_partition + ... def foo(G, partition): + ... print("partition is valid!") + ... + >>> G = nx.complete_graph(5) + >>> partition = [{0, 1}, {2, 3}, {4}] + >>> foo(G, partition) + partition is valid! + >>> partition = [{0}, {2, 3}, {4}] + >>> foo(G, partition) + Traceback (most recent call last): + ... + networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G + >>> partition = [{0, 1}, {1, 2, 3}, {4}] + >>> foo(G, partition) + Traceback (most recent call last): + ... + networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G + + """ + if is_partition(G, partition): + return G, partition + raise nx.NetworkXError("`partition` is not a valid partition of the nodes of G") + + +require_partition = argmap(_require_partition, (0, 1)) + + +@nx._dispatchable +def intra_community_edges(G, partition): + """Returns the number of intra-community edges for a partition of `G`. + + Parameters + ---------- + G : NetworkX graph. + + partition : iterable of sets of nodes + This must be a partition of the nodes of `G`. + + The "intra-community edges" are those edges joining a pair of nodes + in the same block of the partition. + + """ + return sum(G.subgraph(block).size() for block in partition) + + +@nx._dispatchable +def inter_community_edges(G, partition): + """Returns the number of inter-community edges for a partition of `G`. + according to the given + partition of the nodes of `G`. + + Parameters + ---------- + G : NetworkX graph. + + partition : iterable of sets of nodes + This must be a partition of the nodes of `G`. + + The *inter-community edges* are those edges joining a pair of nodes + in different blocks of the partition. + + Implementation note: this function creates an intermediate graph + that may require the same amount of memory as that of `G`. + + """ + # Alternate implementation that does not require constructing a new + # graph object (but does require constructing an affiliation + # dictionary): + # + # aff = dict(chain.from_iterable(((v, block) for v in block) + # for block in partition)) + # return sum(1 for u, v in G.edges() if aff[u] != aff[v]) + # + MG = nx.MultiDiGraph if G.is_directed() else nx.MultiGraph + return nx.quotient_graph(G, partition, create_using=MG).size() + + +@nx._dispatchable +def inter_community_non_edges(G, partition): + """Returns the number of inter-community non-edges according to the + given partition of the nodes of `G`. + + Parameters + ---------- + G : NetworkX graph. + + partition : iterable of sets of nodes + This must be a partition of the nodes of `G`. + + A *non-edge* is a pair of nodes (undirected if `G` is undirected) + that are not adjacent in `G`. The *inter-community non-edges* are + those non-edges on a pair of nodes in different blocks of the + partition. + + Implementation note: this function creates two intermediate graphs, + which may require up to twice the amount of memory as required to + store `G`. + + """ + # Alternate implementation that does not require constructing two + # new graph objects (but does require constructing an affiliation + # dictionary): + # + # aff = dict(chain.from_iterable(((v, block) for v in block) + # for block in partition)) + # return sum(1 for u, v in nx.non_edges(G) if aff[u] != aff[v]) + # + return inter_community_edges(nx.complement(G), partition) + + +@nx._dispatchable(edge_attrs="weight") +def modularity(G, communities, weight="weight", resolution=1): + r"""Returns the modularity of the given partition of the graph. + + Modularity is defined in [1]_ as + + .. math:: + Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \gamma\frac{k_ik_j}{2m}\right) + \delta(c_i,c_j) + + where $m$ is the number of edges (or sum of all edge weights as in [5]_), + $A$ is the adjacency matrix of `G`, $k_i$ is the (weighted) degree of $i$, + $\gamma$ is the resolution parameter, and $\delta(c_i, c_j)$ is 1 if $i$ and + $j$ are in the same community else 0. + + According to [2]_ (and verified by some algebra) this can be reduced to + + .. math:: + Q = \sum_{c=1}^{n} + \left[ \frac{L_c}{m} - \gamma\left( \frac{k_c}{2m} \right) ^2 \right] + + where the sum iterates over all communities $c$, $m$ is the number of edges, + $L_c$ is the number of intra-community links for community $c$, + $k_c$ is the sum of degrees of the nodes in community $c$, + and $\gamma$ is the resolution parameter. + + The resolution parameter sets an arbitrary tradeoff between intra-group + edges and inter-group edges. More complex grouping patterns can be + discovered by analyzing the same network with multiple values of gamma + and then combining the results [3]_. That said, it is very common to + simply use gamma=1. More on the choice of gamma is in [4]_. + + The second formula is the one actually used in calculation of the modularity. + For directed graphs the second formula replaces $k_c$ with $k^{in}_c k^{out}_c$. + + Parameters + ---------- + G : NetworkX Graph + + communities : list or iterable of set of nodes + These node sets must represent a partition of G's nodes. + + weight : string or None, optional (default="weight") + The edge attribute that holds the numerical value used + as a weight. If None or an edge does not have that attribute, + then that edge has weight 1. + + resolution : float (default=1) + If resolution is less than 1, modularity favors larger communities. + Greater than 1 favors smaller communities. + + Returns + ------- + Q : float + The modularity of the partition. + + Raises + ------ + NotAPartition + If `communities` is not a partition of the nodes of `G`. + + Examples + -------- + >>> G = nx.barbell_graph(3, 0) + >>> nx.community.modularity(G, [{0, 1, 2}, {3, 4, 5}]) + 0.35714285714285715 + >>> nx.community.modularity(G, nx.community.label_propagation_communities(G)) + 0.35714285714285715 + + References + ---------- + .. [1] M. E. J. Newman "Networks: An Introduction", page 224. + Oxford University Press, 2011. + .. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore. + "Finding community structure in very large networks." + Phys. Rev. E 70.6 (2004). + .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community Detection" + Phys. Rev. E 74, 016110, 2006. https://doi.org/10.1103/PhysRevE.74.016110 + .. [4] M. E. J. Newman, "Equivalence between modularity optimization and + maximum likelihood methods for community detection" + Phys. Rev. E 94, 052315, 2016. https://doi.org/10.1103/PhysRevE.94.052315 + .. [5] Blondel, V.D. et al. "Fast unfolding of communities in large + networks" J. Stat. Mech 10008, 1-12 (2008). + https://doi.org/10.1088/1742-5468/2008/10/P10008 + """ + if not isinstance(communities, list): + communities = list(communities) + if not is_partition(G, communities): + raise NotAPartition(G, communities) + + directed = G.is_directed() + if directed: + out_degree = dict(G.out_degree(weight=weight)) + in_degree = dict(G.in_degree(weight=weight)) + m = sum(out_degree.values()) + norm = 1 / m**2 + else: + out_degree = in_degree = dict(G.degree(weight=weight)) + deg_sum = sum(out_degree.values()) + m = deg_sum / 2 + norm = 1 / deg_sum**2 + + def community_contribution(community): + comm = set(community) + L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1) if v in comm) + + out_degree_sum = sum(out_degree[u] for u in comm) + in_degree_sum = sum(in_degree[u] for u in comm) if directed else out_degree_sum + + return L_c / m - resolution * out_degree_sum * in_degree_sum * norm + + return sum(map(community_contribution, communities)) + + +@require_partition +@nx._dispatchable +def partition_quality(G, partition): + """Returns the coverage and performance of a partition of G. + + The *coverage* of a partition is the ratio of the number of + intra-community edges to the total number of edges in the graph. + + The *performance* of a partition is the number of + intra-community edges plus inter-community non-edges divided by the total + number of potential edges. + + This algorithm has complexity $O(C^2 + L)$ where C is the number of + communities and L is the number of links. + + Parameters + ---------- + G : NetworkX graph + + partition : sequence + Partition of the nodes of `G`, represented as a sequence of + sets of nodes (blocks). Each block of the partition represents a + community. + + Returns + ------- + (float, float) + The (coverage, performance) tuple of the partition, as defined above. + + Raises + ------ + NetworkXError + If `partition` is not a valid partition of the nodes of `G`. + + Notes + ----- + If `G` is a multigraph; + - for coverage, the multiplicity of edges is counted + - for performance, the result is -1 (total number of possible edges is not defined) + + References + ---------- + .. [1] Santo Fortunato. + "Community Detection in Graphs". + *Physical Reports*, Volume 486, Issue 3--5 pp. 75--174 + + """ + + node_community = {} + for i, community in enumerate(partition): + for node in community: + node_community[node] = i + + # `performance` is not defined for multigraphs + if not G.is_multigraph(): + # Iterate over the communities, quadratic, to calculate `possible_inter_community_edges` + possible_inter_community_edges = sum( + len(p1) * len(p2) for p1, p2 in combinations(partition, 2) + ) + + if G.is_directed(): + possible_inter_community_edges *= 2 + else: + possible_inter_community_edges = 0 + + # Compute the number of edges in the complete graph -- `n` nodes, + # directed or undirected, depending on `G` + n = len(G) + total_pairs = n * (n - 1) + if not G.is_directed(): + total_pairs //= 2 + + intra_community_edges = 0 + inter_community_non_edges = possible_inter_community_edges + + # Iterate over the links to count `intra_community_edges` and `inter_community_non_edges` + for e in G.edges(): + if node_community[e[0]] == node_community[e[1]]: + intra_community_edges += 1 + else: + inter_community_non_edges -= 1 + + coverage = intra_community_edges / len(G.edges) + + if G.is_multigraph(): + performance = -1.0 + else: + performance = (intra_community_edges + inter_community_non_edges) / total_pairs + + return coverage, performance diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/networkx/algorithms/tree/decomposition.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/networkx/algorithms/tree/decomposition.py new file mode 100644 index 0000000000000000000000000000000000000000..c8b8f2477b47581cd6010aba7e3329f5044e0da4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/networkx/algorithms/tree/decomposition.py @@ -0,0 +1,88 @@ +r"""Function for computing a junction tree of a graph.""" + +from itertools import combinations + +import networkx as nx +from networkx.algorithms import chordal_graph_cliques, complete_to_chordal_graph, moral +from networkx.utils import not_implemented_for + +__all__ = ["junction_tree"] + + +@not_implemented_for("multigraph") +@nx._dispatchable(returns_graph=True) +def junction_tree(G): + r"""Returns a junction tree of a given graph. + + A junction tree (or clique tree) is constructed from a (un)directed graph G. + The tree is constructed based on a moralized and triangulated version of G. + The tree's nodes consist of maximal cliques and sepsets of the revised graph. + The sepset of two cliques is the intersection of the nodes of these cliques, + e.g. the sepset of (A,B,C) and (A,C,E,F) is (A,C). These nodes are often called + "variables" in this literature. The tree is bipartite with each sepset + connected to its two cliques. + + Junction Trees are not unique as the order of clique consideration determines + which sepsets are included. + + The junction tree algorithm consists of five steps [1]_: + + 1. Moralize the graph + 2. Triangulate the graph + 3. Find maximal cliques + 4. Build the tree from cliques, connecting cliques with shared + nodes, set edge-weight to number of shared variables + 5. Find maximum spanning tree + + + Parameters + ---------- + G : networkx.Graph + Directed or undirected graph. + + Returns + ------- + junction_tree : networkx.Graph + The corresponding junction tree of `G`. + + Raises + ------ + NetworkXNotImplemented + Raised if `G` is an instance of `MultiGraph` or `MultiDiGraph`. + + References + ---------- + .. [1] Junction tree algorithm: + https://en.wikipedia.org/wiki/Junction_tree_algorithm + + .. [2] Finn V. Jensen and Frank Jensen. 1994. Optimal + junction trees. In Proceedings of the Tenth international + conference on Uncertainty in artificial intelligence (UAI’94). + Morgan Kaufmann Publishers Inc., San Francisco, CA, USA, 360–366. + """ + + clique_graph = nx.Graph() + + if G.is_directed(): + G = moral.moral_graph(G) + chordal_graph, _ = complete_to_chordal_graph(G) + + cliques = [tuple(sorted(i)) for i in chordal_graph_cliques(chordal_graph)] + clique_graph.add_nodes_from(cliques, type="clique") + + for edge in combinations(cliques, 2): + set_edge_0 = set(edge[0]) + set_edge_1 = set(edge[1]) + if not set_edge_0.isdisjoint(set_edge_1): + sepset = tuple(sorted(set_edge_0.intersection(set_edge_1))) + clique_graph.add_edge(edge[0], edge[1], weight=len(sepset), sepset=sepset) + + junction_tree = nx.maximum_spanning_tree(clique_graph) + + for edge in list(junction_tree.edges(data=True)): + junction_tree.add_node(edge[2]["sepset"], type="sepset") + junction_tree.add_edge(edge[0], edge[2]["sepset"]) + junction_tree.add_edge(edge[1], edge[2]["sepset"]) + junction_tree.remove_edge(edge[0], edge[1]) + + return junction_tree diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/MarkupSafe-3.0.2.dist-info/INSTALLER b/URSA/.venv_ursa/lib/python3.12/site-packages/MarkupSafe-3.0.2.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/MarkupSafe-3.0.2.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/MarkupSafe-3.0.2.dist-info/LICENSE.txt b/URSA/.venv_ursa/lib/python3.12/site-packages/MarkupSafe-3.0.2.dist-info/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..9d227a0cc43c3268d15722b763bd94ad298645a1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/MarkupSafe-3.0.2.dist-info/LICENSE.txt @@ -0,0 +1,28 @@ +Copyright 2010 Pallets + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/MarkupSafe-3.0.2.dist-info/METADATA b/URSA/.venv_ursa/lib/python3.12/site-packages/MarkupSafe-3.0.2.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..82261f2a4657ed8938325e2f449c1d6cbd4ea7fc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/MarkupSafe-3.0.2.dist-info/METADATA @@ -0,0 +1,92 @@ +Metadata-Version: 2.1 +Name: MarkupSafe +Version: 3.0.2 +Summary: Safely add untrusted strings to HTML/XML markup. +Maintainer-email: Pallets +License: Copyright 2010 Pallets + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Project-URL: Donate, https://palletsprojects.com/donate +Project-URL: Documentation, https://markupsafe.palletsprojects.com/ +Project-URL: Changes, https://markupsafe.palletsprojects.com/changes/ +Project-URL: Source, https://github.com/pallets/markupsafe/ +Project-URL: Chat, https://discord.gg/pallets +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Web Environment +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content +Classifier: Topic :: Text Processing :: Markup :: HTML +Classifier: Typing :: Typed +Requires-Python: >=3.9 +Description-Content-Type: text/markdown +License-File: LICENSE.txt + +# MarkupSafe + +MarkupSafe implements a text object that escapes characters so it is +safe to use in HTML and XML. Characters that have special meanings are +replaced so that they display as the actual characters. This mitigates +injection attacks, meaning untrusted user input can safely be displayed +on a page. + + +## Examples + +```pycon +>>> from markupsafe import Markup, escape + +>>> # escape replaces special characters and wraps in Markup +>>> escape("") +Markup('<script>alert(document.cookie);</script>') + +>>> # wrap in Markup to mark text "safe" and prevent escaping +>>> Markup("Hello") +Markup('hello') + +>>> escape(Markup("Hello")) +Markup('hello') + +>>> # Markup is a str subclass +>>> # methods and operators escape their arguments +>>> template = Markup("Hello {name}") +>>> template.format(name='"World"') +Markup('Hello "World"') +``` + +## Donate + +The Pallets organization develops and supports MarkupSafe and other +popular packages. In order to grow the community of contributors and +users, and allow the maintainers to devote more time to the projects, +[please donate today][]. + +[please donate today]: https://palletsprojects.com/donate diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/MarkupSafe-3.0.2.dist-info/RECORD b/URSA/.venv_ursa/lib/python3.12/site-packages/MarkupSafe-3.0.2.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..b7ba0b7bb80fd0875017e64854d731956e03a829 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/MarkupSafe-3.0.2.dist-info/RECORD @@ -0,0 +1,14 @@ +MarkupSafe-3.0.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +MarkupSafe-3.0.2.dist-info/LICENSE.txt,sha256=SJqOEQhQntmKN7uYPhHg9-HTHwvY-Zp5yESOf_N9B-o,1475 +MarkupSafe-3.0.2.dist-info/METADATA,sha256=aAwbZhSmXdfFuMM-rEHpeiHRkBOGESyVLJIuwzHP-nw,3975 +MarkupSafe-3.0.2.dist-info/RECORD,, +MarkupSafe-3.0.2.dist-info/WHEEL,sha256=OVgtqZzfzIXXtylXP90gxCZ6CKBCwKYyHM8PpMEjN1M,151 +MarkupSafe-3.0.2.dist-info/top_level.txt,sha256=qy0Plje5IJuvsCBjejJyhDCjEAdcDLK_2agVcex8Z6U,11 +markupsafe/__init__.py,sha256=sr-U6_27DfaSrj5jnHYxWN-pvhM27sjlDplMDPZKm7k,13214 +markupsafe/__pycache__/__init__.cpython-312.pyc,, +markupsafe/__pycache__/_native.cpython-312.pyc,, +markupsafe/_native.py,sha256=hSLs8Jmz5aqayuengJJ3kdT5PwNpBWpKrmQSdipndC8,210 +markupsafe/_speedups.c,sha256=O7XulmTo-epI6n2FtMVOrJXl8EAaIwD2iNYmBI5SEoQ,4149 +markupsafe/_speedups.cpython-312-x86_64-linux-gnu.so,sha256=t1DBZlpsjFA30BOOvXfXfT1wvO_4cS16VbHz1-49q5U,43432 +markupsafe/_speedups.pyi,sha256=ENd1bYe7gbBUf2ywyYWOGUpnXOHNJ-cgTNqetlW8h5k,41 +markupsafe/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/MarkupSafe-3.0.2.dist-info/WHEEL b/URSA/.venv_ursa/lib/python3.12/site-packages/MarkupSafe-3.0.2.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..057fef6790745425ef96bcdacf17bbdaec580d03 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/MarkupSafe-3.0.2.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: setuptools (75.2.0) +Root-Is-Purelib: false +Tag: cp312-cp312-manylinux_2_17_x86_64 +Tag: cp312-cp312-manylinux2014_x86_64 + diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/MarkupSafe-3.0.2.dist-info/top_level.txt b/URSA/.venv_ursa/lib/python3.12/site-packages/MarkupSafe-3.0.2.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..75bf729258f9daef77370b6df1a57940f90fc23f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/MarkupSafe-3.0.2.dist-info/top_level.txt @@ -0,0 +1 @@ +markupsafe diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/INSTALLER b/URSA/.venv_ursa/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/LICENSE b/URSA/.venv_ursa/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..2f1b8e15e5627d92f0521605c9870bc8e5505cb4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2017-2021 Ingy döt Net +Copyright (c) 2006-2016 Kirill Simonov + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/METADATA b/URSA/.venv_ursa/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..db029b770cd87a12086e70b1be9900c93d255f0b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/METADATA @@ -0,0 +1,46 @@ +Metadata-Version: 2.1 +Name: PyYAML +Version: 6.0.2 +Summary: YAML parser and emitter for Python +Home-page: https://pyyaml.org/ +Download-URL: https://pypi.org/project/PyYAML/ +Author: Kirill Simonov +Author-email: xi@resolvent.net +License: MIT +Project-URL: Bug Tracker, https://github.com/yaml/pyyaml/issues +Project-URL: CI, https://github.com/yaml/pyyaml/actions +Project-URL: Documentation, https://pyyaml.org/wiki/PyYAMLDocumentation +Project-URL: Mailing lists, http://lists.sourceforge.net/lists/listinfo/yaml-core +Project-URL: Source Code, https://github.com/yaml/pyyaml +Platform: Any +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Cython +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Text Processing :: Markup +Requires-Python: >=3.8 +License-File: LICENSE + +YAML is a data serialization format designed for human readability +and interaction with scripting languages. PyYAML is a YAML parser +and emitter for Python. + +PyYAML features a complete YAML 1.1 parser, Unicode support, pickle +support, capable extension API, and sensible error messages. PyYAML +supports standard YAML tags and provides Python-specific tags that +allow to represent an arbitrary Python object. + +PyYAML is applicable for a broad range of tasks from complex +configuration files to object serialization and persistence. diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/RECORD b/URSA/.venv_ursa/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..f596c8e5493c4dc8b6fdce028ba1172d4dc14148 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/RECORD @@ -0,0 +1,43 @@ +PyYAML-6.0.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +PyYAML-6.0.2.dist-info/LICENSE,sha256=jTko-dxEkP1jVwfLiOsmvXZBAqcoKVQwfT5RZ6V36KQ,1101 +PyYAML-6.0.2.dist-info/METADATA,sha256=9-odFB5seu4pGPcEv7E8iyxNF51_uKnaNGjLAhz2lto,2060 +PyYAML-6.0.2.dist-info/RECORD,, +PyYAML-6.0.2.dist-info/WHEEL,sha256=1pP4yhrbipRtdbm4Rbg3aoTjzc7pDhpHKO0CEY24CNM,152 +PyYAML-6.0.2.dist-info/top_level.txt,sha256=rpj0IVMTisAjh_1vG3Ccf9v5jpCQwAz6cD1IVU5ZdhQ,11 +_yaml/__init__.py,sha256=04Ae_5osxahpJHa3XBZUAf4wi6XX32gR8D6X6p64GEA,1402 +_yaml/__pycache__/__init__.cpython-312.pyc,, +yaml/__init__.py,sha256=N35S01HMesFTe0aRRMWkPj0Pa8IEbHpE9FK7cr5Bdtw,12311 +yaml/__pycache__/__init__.cpython-312.pyc,, +yaml/__pycache__/composer.cpython-312.pyc,, +yaml/__pycache__/constructor.cpython-312.pyc,, +yaml/__pycache__/cyaml.cpython-312.pyc,, +yaml/__pycache__/dumper.cpython-312.pyc,, +yaml/__pycache__/emitter.cpython-312.pyc,, +yaml/__pycache__/error.cpython-312.pyc,, +yaml/__pycache__/events.cpython-312.pyc,, +yaml/__pycache__/loader.cpython-312.pyc,, +yaml/__pycache__/nodes.cpython-312.pyc,, +yaml/__pycache__/parser.cpython-312.pyc,, +yaml/__pycache__/reader.cpython-312.pyc,, +yaml/__pycache__/representer.cpython-312.pyc,, +yaml/__pycache__/resolver.cpython-312.pyc,, +yaml/__pycache__/scanner.cpython-312.pyc,, +yaml/__pycache__/serializer.cpython-312.pyc,, +yaml/__pycache__/tokens.cpython-312.pyc,, +yaml/_yaml.cpython-312-x86_64-linux-gnu.so,sha256=PJFgxnc0f5Dyde6WKmBm6fZWapawmWl7aBRruXjRA80,2481784 +yaml/composer.py,sha256=_Ko30Wr6eDWUeUpauUGT3Lcg9QPBnOPVlTnIMRGJ9FM,4883 +yaml/constructor.py,sha256=kNgkfaeLUkwQYY_Q6Ff1Tz2XVw_pG1xVE9Ak7z-viLA,28639 +yaml/cyaml.py,sha256=6ZrAG9fAYvdVe2FK_w0hmXoG7ZYsoYUwapG8CiC72H0,3851 +yaml/dumper.py,sha256=PLctZlYwZLp7XmeUdwRuv4nYOZ2UBnDIUy8-lKfLF-o,2837 +yaml/emitter.py,sha256=jghtaU7eFwg31bG0B7RZea_29Adi9CKmXq_QjgQpCkQ,43006 +yaml/error.py,sha256=Ah9z-toHJUbE9j-M8YpxgSRM5CgLCcwVzJgLLRF2Fxo,2533 +yaml/events.py,sha256=50_TksgQiE4up-lKo_V-nBy-tAIxkIPQxY5qDhKCeHw,2445 +yaml/loader.py,sha256=UVa-zIqmkFSCIYq_PgSGm4NSJttHY2Rf_zQ4_b1fHN0,2061 +yaml/nodes.py,sha256=gPKNj8pKCdh2d4gr3gIYINnPOaOxGhJAUiYhGRnPE84,1440 +yaml/parser.py,sha256=ilWp5vvgoHFGzvOZDItFoGjD6D42nhlZrZyjAwa0oJo,25495 +yaml/reader.py,sha256=0dmzirOiDG4Xo41RnuQS7K9rkY3xjHiVasfDMNTqCNw,6794 +yaml/representer.py,sha256=IuWP-cAW9sHKEnS0gCqSa894k1Bg4cgTxaDwIcbRQ-Y,14190 +yaml/resolver.py,sha256=9L-VYfm4mWHxUD1Vg4X7rjDRK_7VZd6b92wzq7Y2IKY,9004 +yaml/scanner.py,sha256=YEM3iLZSaQwXcQRg2l2R4MdT0zGP2F9eHkKGKnHyWQY,51279 +yaml/serializer.py,sha256=ChuFgmhU01hj4xgI8GaKv6vfM2Bujwa9i7d2FAHj7cA,4165 +yaml/tokens.py,sha256=lTQIzSVw8Mg9wv459-TjiOQe6wVziqaRlqX2_89rp54,2573 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/WHEEL b/URSA/.venv_ursa/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..56616a86d52ed931647827a07ddc48d04e1cc9ba --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.44.0) +Root-Is-Purelib: false +Tag: cp312-cp312-manylinux_2_17_x86_64 +Tag: cp312-cp312-manylinux2014_x86_64 + diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/top_level.txt b/URSA/.venv_ursa/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..e6475e911f628412049bc4090d86f23ac403adde --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/PyYAML-6.0.2.dist-info/top_level.txt @@ -0,0 +1,2 @@ +_yaml +yaml diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/__pycache__/_cuda_bindings_redirector.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/__pycache__/_cuda_bindings_redirector.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3191a6057c6e73a646264642696b634adc38dbca Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/__pycache__/_cuda_bindings_redirector.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/__pycache__/isympy.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/__pycache__/isympy.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1d17bd1195590d7531d8a3395c1657ab87736baa Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/__pycache__/isympy.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/_distutils_hack/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/_distutils_hack/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4d3f09b0ae97c9d614522eb9a44533b32543cb9b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/_distutils_hack/__init__.py @@ -0,0 +1,220 @@ +# don't import any costly modules +import sys +import os + + +def warn_distutils_present(): + if 'distutils' not in sys.modules: + return + import warnings + + warnings.warn( + "Distutils was imported before Setuptools, but importing Setuptools " + "also replaces the `distutils` module in `sys.modules`. This may lead " + "to undesirable behaviors or errors. To avoid these issues, avoid " + "using distutils directly, ensure that setuptools is installed in the " + "traditional way (e.g. not an editable install), and/or make sure " + "that setuptools is always imported before distutils." + ) + + +def clear_distutils(): + if 'distutils' not in sys.modules: + return + import warnings + + warnings.warn("Setuptools is replacing distutils.") + mods = [ + name + for name in sys.modules + if name == "distutils" or name.startswith("distutils.") + ] + for name in mods: + del sys.modules[name] + + +def enabled(): + """ + Allow selection of distutils by environment variable. + """ + which = os.environ.get('SETUPTOOLS_USE_DISTUTILS', 'local') + return which == 'local' + + +def ensure_local_distutils(): + import importlib + + clear_distutils() + + # With the DistutilsMetaFinder in place, + # perform an import to cause distutils to be + # loaded from setuptools._distutils. Ref #2906. + with shim(): + importlib.import_module('distutils') + + # check that submodules load as expected + core = importlib.import_module('distutils.core') + assert '_distutils' in core.__file__, core.__file__ + assert 'setuptools._distutils.log' not in sys.modules + + +def do_override(): + """ + Ensure that the local copy of distutils is preferred over stdlib. + + See https://github.com/pypa/setuptools/issues/417#issuecomment-392298401 + for more motivation. + """ + if enabled(): + warn_distutils_present() + ensure_local_distutils() + + +class _TrivialRe: + def __init__(self, *patterns): + self._patterns = patterns + + def match(self, string): + return all(pat in string for pat in self._patterns) + + +class DistutilsMetaFinder: + def find_spec(self, fullname, path, target=None): + # optimization: only consider top level modules and those + # found in the CPython test suite. + if path is not None and not fullname.startswith('test.'): + return None + + method_name = 'spec_for_{fullname}'.format(**locals()) + method = getattr(self, method_name, lambda: None) + return method() + + def spec_for_distutils(self): + if self.is_cpython(): + return None + + import importlib + import importlib.abc + import importlib.util + + try: + mod = importlib.import_module('setuptools._distutils') + except Exception: + # There are a couple of cases where setuptools._distutils + # may not be present: + # - An older Setuptools without a local distutils is + # taking precedence. Ref #2957. + # - Path manipulation during sitecustomize removes + # setuptools from the path but only after the hook + # has been loaded. Ref #2980. + # In either case, fall back to stdlib behavior. + return None + + class DistutilsLoader(importlib.abc.Loader): + def create_module(self, spec): + mod.__name__ = 'distutils' + return mod + + def exec_module(self, module): + pass + + return importlib.util.spec_from_loader( + 'distutils', DistutilsLoader(), origin=mod.__file__ + ) + + @staticmethod + def is_cpython(): + """ + Suppress supplying distutils for CPython (build and tests). + Ref #2965 and #3007. + """ + return os.path.isfile('pybuilddir.txt') + + def spec_for_pip(self): + """ + Ensure stdlib distutils when running under pip. + See pypa/pip#8761 for rationale. + """ + if sys.version_info >= (3, 12) or self.pip_imported_during_build(): + return + clear_distutils() + self.spec_for_distutils = lambda: None + + @classmethod + def pip_imported_during_build(cls): + """ + Detect if pip is being imported in a build script. Ref #2355. + """ + import traceback + + return any( + cls.frame_file_is_setup(frame) for frame, line in traceback.walk_stack(None) + ) + + @staticmethod + def frame_file_is_setup(frame): + """ + Return True if the indicated frame suggests a setup.py file. + """ + # some frames may not have __file__ (#2940) + return frame.f_globals.get('__file__', '').endswith('setup.py') + + def spec_for_sensitive_tests(self): + """ + Ensure stdlib distutils when running select tests under CPython. + + python/cpython#91169 + """ + clear_distutils() + self.spec_for_distutils = lambda: None + + sensitive_tests = ( + [ + 'test.test_distutils', + 'test.test_peg_generator', + 'test.test_importlib', + ] + if sys.version_info < (3, 10) + else [ + 'test.test_distutils', + ] + ) + + +for name in DistutilsMetaFinder.sensitive_tests: + setattr( + DistutilsMetaFinder, + f'spec_for_{name}', + DistutilsMetaFinder.spec_for_sensitive_tests, + ) + + +DISTUTILS_FINDER = DistutilsMetaFinder() + + +def add_shim(): + DISTUTILS_FINDER in sys.meta_path or insert_shim() + + +class shim: + def __enter__(self): + insert_shim() + + def __exit__(self, exc, value, tb): + _remove_shim() + + +def insert_shim(): + sys.meta_path.insert(0, DISTUTILS_FINDER) + + +def _remove_shim(): + try: + sys.meta_path.remove(DISTUTILS_FINDER) + except ValueError: + pass + + +if sys.version_info < (3, 12): + # DistutilsMetaFinder can only be disabled in Python < 3.12 (PEP 632) + remove_shim = _remove_shim diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/_distutils_hack/override.py b/URSA/.venv_ursa/lib/python3.12/site-packages/_distutils_hack/override.py new file mode 100644 index 0000000000000000000000000000000000000000..2cc433a4a55e3b41fa31089918fb62096092f89f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/_distutils_hack/override.py @@ -0,0 +1 @@ +__import__('_distutils_hack').do_override() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/_yaml/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/_yaml/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7baa8c4b68127d5cdf0be9a799429e61347c2694 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/_yaml/__init__.py @@ -0,0 +1,33 @@ +# This is a stub package designed to roughly emulate the _yaml +# extension module, which previously existed as a standalone module +# and has been moved into the `yaml` package namespace. +# It does not perfectly mimic its old counterpart, but should get +# close enough for anyone who's relying on it even when they shouldn't. +import yaml + +# in some circumstances, the yaml module we imoprted may be from a different version, so we need +# to tread carefully when poking at it here (it may not have the attributes we expect) +if not getattr(yaml, '__with_libyaml__', False): + from sys import version_info + + exc = ModuleNotFoundError if version_info >= (3, 6) else ImportError + raise exc("No module named '_yaml'") +else: + from yaml._yaml import * + import warnings + warnings.warn( + 'The _yaml extension module is now located at yaml._yaml' + ' and its location is subject to change. To use the' + ' LibYAML-based parser and emitter, import from `yaml`:' + ' `from yaml import CLoader as Loader, CDumper as Dumper`.', + DeprecationWarning + ) + del warnings + # Don't `del yaml` here because yaml is actually an existing + # namespace member of _yaml. + +__name__ = '_yaml' +# If the module is top-level (i.e. not a part of any specific package) +# then the attribute should be set to ''. +# https://docs.python.org/3.8/library/types.html +__package__ = '' diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c50e5655f5caef73ad92a683f349c54a5d09fe47 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/__init__.py @@ -0,0 +1,51 @@ +# Copyright 2020 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +__version__ = "1.13.0" + +from .accelerator import Accelerator +from .big_modeling import ( + cpu_offload, + cpu_offload_with_hook, + disk_offload, + dispatch_model, + init_empty_weights, + init_on_device, + load_checkpoint_and_dispatch, +) +from .data_loader import skip_first_batches +from .inference import prepare_pippy +from .launchers import debug_launcher, notebook_launcher +from .parallelism_config import ParallelismConfig +from .state import PartialState +from .utils import ( + AutocastKwargs, + DataLoaderConfiguration, + DDPCommunicationHookType, + DeepSpeedPlugin, + DistributedDataParallelKwargs, + DistributedType, + FullyShardedDataParallelPlugin, + GradScalerKwargs, + InitProcessGroupKwargs, + ProfileKwargs, + find_executable_batch_size, + infer_auto_device_map, + is_rich_available, + load_checkpoint_in_model, + synchronize_rng_states, +) + + +if is_rich_available(): + from .utils import rich diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/accelerator.py b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/accelerator.py new file mode 100644 index 0000000000000000000000000000000000000000..ddc20488f764b809605f5bf39ac45948ee5e340e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/accelerator.py @@ -0,0 +1,4342 @@ +# Copyright 2021 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import contextlib +import functools +import inspect +import json +import math +import os +import re +import shutil +import warnings +from collections import OrderedDict +from contextlib import contextmanager +from functools import partial +from types import MethodType +from typing import Any, Callable, Union + +import torch +import torch.utils.hooks as hooks + +from accelerate.utils.dataclasses import FP8BackendType + +from .big_modeling import _attach_context_parallel_hooks +from .checkpointing import load_accelerator_state, load_custom_state, save_accelerator_state, save_custom_state +from .data_loader import DataLoaderDispatcher, prepare_data_loader, skip_first_batches +from .logging import get_logger +from .optimizer import AcceleratedOptimizer +from .parallelism_config import ParallelismConfig +from .scheduler import AcceleratedScheduler +from .state import AcceleratorState, GradientState, PartialState +from .tracking import LOGGER_TYPE_TO_CLASS, GeneralTracker, filter_trackers +from .utils import ( + MODEL_NAME, + SAFE_WEIGHTS_INDEX_NAME, + SAFE_WEIGHTS_NAME, + SAFE_WEIGHTS_PATTERN_NAME, + WEIGHTS_INDEX_NAME, + WEIGHTS_NAME, + WEIGHTS_PATTERN_NAME, + AORecipeKwargs, + AutocastKwargs, + DataLoaderConfiguration, + DeepSpeedPlugin, + DistributedDataParallelKwargs, + DistributedType, + DynamoBackend, + FP8RecipeKwargs, + FullyShardedDataParallelPlugin, + GradientAccumulationPlugin, + GradScalerKwargs, + InitProcessGroupKwargs, + KwargsHandler, + LoggerType, + MegatronLMPlugin, + MSAMPRecipeKwargs, + PrecisionType, + ProfileKwargs, + ProjectConfiguration, + RNGType, + TERecipeKwargs, + TorchDynamoPlugin, + TorchTensorParallelPlugin, + apply_fp8_autowrap, + check_os_kernel, + clean_state_dict_for_safetensors, + compare_versions, + convert_model, + convert_model_to_fp8_ao, + convert_outputs_to_fp32, + ensure_weights_retied, + extract_model_from_parallel, + fsdp2_apply_ac, + fsdp2_canonicalize_names, + fsdp2_prepare_model, + fsdp2_switch_optimizer_parameters, + gather, + gather_object, + get_fsdp2_grad_scaler, + get_grad_scaler, + get_mixed_precision_context_manager, + get_pretty_name, + has_offloaded_params, + is_bf16_available, + is_bitsandbytes_multi_backend_available, + is_deepspeed_available, + is_lomo_available, + is_megatron_lm_available, + is_mlu_available, + is_msamp_available, + is_musa_available, + is_npu_available, + is_torch_version, + is_torch_xla_available, + is_torchao_available, + is_transformer_engine_available, + is_xpu_available, + load_fsdp_model, + load_fsdp_optimizer, + model_has_dtensor, + pad_across_processes, + parse_choice_from_env, + recursively_apply, + reduce, + release_memory, + save, + save_fsdp_model, + save_fsdp_optimizer, + wait_for_everyone, +) +from .utils.constants import ( + DTENSOR_PYTORCH_VERSION, + FSDP2_PYTORCH_VERSION, + FSDP_PYTORCH_VERSION, + PROFILE_PATTERN_NAME, + SCALER_NAME, +) +from .utils.modeling import get_state_dict_offloaded_model +from .utils.other import compile_regions, compile_regions_deepspeed, is_compiled_module + + +if is_deepspeed_available(): + from .utils import ( + DeepSpeedEngineWrapper, + DeepSpeedOptimizerWrapper, + DeepSpeedSchedulerWrapper, + DummyOptim, + DummyScheduler, + map_pytorch_optim_to_deepspeed, + ) + +if is_megatron_lm_available(): + from .utils import ( + MegatronEngine, + MegatronLMDummyDataLoader, + MegatronLMDummyScheduler, + MegatronLMOptimizerWrapper, + MegatronLMSchedulerWrapper, + megatron_lm_initialize, + megatron_lm_prepare_data_loader, + megatron_lm_prepare_model_optimizer_scheduler, + ) + +from torch.distributed.algorithms.join import Join + + +if is_torch_xla_available(): + import torch_xla.core.xla_model as xm + import torch_xla.distributed.xla_multiprocessing as xmp + + +if is_npu_available(check_device=False): + import torch_npu # noqa: F401 + + +try: + from torch.optim.lr_scheduler import LRScheduler +except ImportError: + from torch.optim.lr_scheduler import _LRScheduler as LRScheduler + +logger = get_logger(__name__) + +# Sentinel values for defaults +_split_batches = object() +_dispatch_batches = object() +_even_batches = object() +_use_seedable_sampler = object() + + +class Accelerator: + """ + Creates an instance of an accelerator for distributed training or mixed precision training. + + Args: + device_placement (`bool`, *optional*, defaults to `True`): + Whether or not the accelerator should put objects on device (tensors yielded by the dataloader, model, + etc...). + mixed_precision (`str`, *optional*): + Whether or not to use mixed precision training. Choose from 'no','fp16','bf16' or 'fp8'. Will default to + the value in the environment variable `ACCELERATE_MIXED_PRECISION`, which will use the default value in the + accelerate config of the current system or the flag passed with the `accelerate.launch` command. 'fp8' + requires the installation of transformers-engine. + gradient_accumulation_steps (`int`, *optional*, default to 1): + The number of steps that should pass before gradients are accumulated. A number > 1 should be combined with + `Accelerator.accumulate`. If not passed, will default to the value in the environment variable + `ACCELERATE_GRADIENT_ACCUMULATION_STEPS`. Can also be configured through a `GradientAccumulationPlugin`. + cpu (`bool`, *optional*): + Whether or not to force the script to execute on CPU. Will ignore GPU available if set to `True` and force + the execution on one process only. + dataloader_config (`DataLoaderConfiguration`, *optional*): + A configuration for how the dataloaders should be handled in distributed scenarios. + deepspeed_plugin ([`~utils.DeepSpeedPlugin`] or dict of `str`: [`~utils.DeepSpeedPlugin`], *optional*): + Tweak your DeepSpeed related args using this argument. This argument is optional and can be configured + directly using *accelerate config*. If using multiple plugins, use the configured `key` property of each + plugin to access them from `accelerator.state.get_deepspeed_plugin(key)`. Alias for `deepspeed_plugins`. + fsdp_plugin ([`~utils.FullyShardedDataParallelPlugin`], *optional*): + Tweak your FSDP related args using this argument. This argument is optional and can be configured directly + using *accelerate config* + torch_tp_plugin ([`~utils.TorchTensorParallelPlugin`], *optional*): + Deprecated: use `parallelism_config` with `tp_size` instead. + megatron_lm_plugin ([`~utils.MegatronLMPlugin`], *optional*): + Tweak your MegatronLM related args using this argument. This argument is optional and can be configured + directly using *accelerate config* + rng_types (list of `str` or [`~utils.RNGType`]): + The list of random number generators to synchronize at the beginning of each iteration in your prepared + dataloaders. Should be one or several of: + + - `"torch"`: the base torch random number generator + - `"cuda"`: the CUDA random number generator (GPU only) + - `"xla"`: the XLA random number generator (TPU only) + - `"generator"`: the `torch.Generator` of the sampler (or batch sampler if there is no sampler in your + dataloader) or of the iterable dataset (if it exists) if the underlying dataset is of that type. + + Will default to `["torch"]` for PyTorch versions <=1.5.1 and `["generator"]` for PyTorch versions >= 1.6. + log_with (list of `str`, [`~utils.LoggerType`] or [`~tracking.GeneralTracker`], *optional*): + A list of loggers to be setup for experiment tracking. Should be one or several of: + + - `"all"` + - `"tensorboard"` + - `"wandb"` + - `"trackio"` + - `"aim"` + - `"comet_ml"` + - `"mlflow"` + - `"dvclive"` + - `"swanlab"` + If `"all"` is selected, will pick up all available trackers in the environment and initialize them. Can + also accept implementations of `GeneralTracker` for custom trackers, and can be combined with `"all"`. + project_config ([`~utils.ProjectConfiguration`], *optional*): + A configuration for how saving the state can be handled. + project_dir (`str`, `os.PathLike`, *optional*): + A path to a directory for storing data such as logs of locally-compatible loggers and potentially saved + checkpoints. + step_scheduler_with_optimizer (`bool`, *optional*, defaults to `True`): + Set `True` if the learning rate scheduler is stepped at the same time as the optimizer, `False` if only + done under certain circumstances (at the end of each epoch, for instance). + kwargs_handlers (list of [`~utils.KwargsHandler`], *optional*) + A list of [`~utils.KwargsHandler`] to customize how the objects related to distributed training, profiling + or mixed precision are created. See [kwargs](kwargs) for more information. + dynamo_backend (`str` or [`~utils.DynamoBackend`], *optional*, defaults to `"no"`): + Set to one of the possible dynamo backends to optimize your training with torch dynamo. + dynamo_plugin ([`~utils.TorchDynamoPlugin`], *optional*): + A configuration for how torch dynamo should be handled, if more tweaking than just the `backend` or `mode` + is needed. + gradient_accumulation_plugin ([`~utils.GradientAccumulationPlugin`], *optional*): + A configuration for how gradient accumulation should be handled, if more tweaking than just the + `gradient_accumulation_steps` is needed. + + **Available attributes:** + + - **device** (`torch.device`) -- The device to use. + - **distributed_type** ([`~utils.DistributedType`]) -- The distributed training configuration. + - **local_process_index** (`int`) -- The process index on the current machine. + - **mixed_precision** (`str`) -- The configured mixed precision mode. + - **num_processes** (`int`) -- The total number of processes used for training. + - **optimizer_step_was_skipped** (`bool`) -- Whether or not the optimizer update was skipped (because of + gradient overflow in mixed precision), in which + case the learning rate should not be changed. + - **process_index** (`int`) -- The overall index of the current process among all processes. + - **state** ([`~state.AcceleratorState`]) -- The distributed setup state. + - **sync_gradients** (`bool`) -- Whether the gradients are currently being synced across all processes. + - **use_distributed** (`bool`) -- Whether the current configuration is for distributed training. + """ + + def __init__( + self, + device_placement: bool = True, + split_batches: bool = _split_batches, + mixed_precision: PrecisionType | str | None = None, + gradient_accumulation_steps: int = 1, + cpu: bool = False, + dataloader_config: DataLoaderConfiguration | None = None, + deepspeed_plugin: DeepSpeedPlugin | dict[str, DeepSpeedPlugin] | None = None, + fsdp_plugin: FullyShardedDataParallelPlugin | None = None, + torch_tp_plugin: TorchTensorParallelPlugin | None = None, # Deprecate later, warning in `post_init` + megatron_lm_plugin: MegatronLMPlugin | None = None, + rng_types: list[str | RNGType] | None = None, + log_with: str | LoggerType | GeneralTracker | list[str | LoggerType | GeneralTracker] | None = None, + project_dir: str | os.PathLike | None = None, + project_config: ProjectConfiguration | None = None, + gradient_accumulation_plugin: GradientAccumulationPlugin | None = None, + step_scheduler_with_optimizer: bool = True, + kwargs_handlers: list[KwargsHandler] | None = None, + dynamo_backend: DynamoBackend | str | None = None, + dynamo_plugin: TorchDynamoPlugin | None = None, + deepspeed_plugins: DeepSpeedPlugin | dict[str, DeepSpeedPlugin] | None = None, + parallelism_config: ParallelismConfig | None = None, + ): + self.trackers = [] + if project_config is not None: + self.project_configuration = project_config + else: + self.project_configuration = ProjectConfiguration(project_dir=project_dir) + if project_dir is not None and self.project_dir is None: + self.project_configuration.set_directories(project_dir) + + if mixed_precision is not None: + mixed_precision = str(mixed_precision) + if mixed_precision not in PrecisionType: + raise ValueError( + f"Unknown mixed_precision mode: {mixed_precision}. Choose between {PrecisionType.list()}" + ) + if torch_tp_plugin is not None: + warnings.warn( + "`TorchTensorParallelPlugin` is deprecated and will be removed in a future version of Accelerate. " + "Please use the `ParallelismConfig` with `tp_size` instead.", + FutureWarning, + ) + + if dynamo_plugin is not None and dynamo_backend is not None: + raise ValueError("You cannot pass in both `dynamo_plugin` and `dynamo_backend`, please only pass in one.") + if dynamo_backend is not None: + dynamo_plugin = TorchDynamoPlugin(backend=dynamo_backend) + elif dynamo_plugin is None: + dynamo_plugin = TorchDynamoPlugin() + + if deepspeed_plugins is not None and deepspeed_plugin is not None: + raise ValueError("You cannot pass in both `deepspeed_plugins` and `deepspeed_plugin`.") + elif deepspeed_plugin is not None: + deepspeed_plugins = deepspeed_plugin + + if deepspeed_plugins is None: + # First check if we're creating another `Accelerator` w/o setting `deepspeed_plugin` + if ( + AcceleratorState._shared_state != {} + and AcceleratorState().distributed_type == DistributedType.DEEPSPEED + ): + deepspeed_plugins = AcceleratorState().deepspeed_plugins + else: + # init from env variables + deepspeed_plugins = ( + DeepSpeedPlugin() + if os.environ.get("ACCELERATE_USE_DEEPSPEED", "false").lower() == "true" + else None + ) + else: + # If we're creating a second `Accelerator`, users shouldn't be passing in a `deepspeed_plugin` + if ( + AcceleratorState._shared_state != {} + and AcceleratorState().distributed_type == DistributedType.DEEPSPEED + and AcceleratorState().deepspeed_plugins is not None + ): + raise NotImplementedError( + "You cannot pass in a `deepspeed_plugin` when creating a second `Accelerator`. " + "Please make sure the first `Accelerator` is initialized with all the plugins you want to use." + ) + if isinstance(deepspeed_plugins, dict): + for plugin in deepspeed_plugins.values(): + if not isinstance(plugin, DeepSpeedPlugin): + raise TypeError("`deepspeed_plugin` must be a DeepSpeedPlugin object.") + + if deepspeed_plugins is not None: + os.environ["ACCELERATE_USE_DEEPSPEED"] = "true" # use DeepSpeed if plugin is provided + if not is_deepspeed_available(): + raise ImportError("DeepSpeed is not installed => run `pip install deepspeed` or build it from source.") + if is_mlu_available(): + if compare_versions("deepspeed", "<", "0.15.2"): + raise ImportError("DeepSpeed MLU version must be >= 0.15.2. Please update DeepSpeed.") + elif is_musa_available(): + if compare_versions("deepspeed", "<", "0.14.3"): + raise ImportError("DeepSpeed MUSA version must be >= 0.14.3. Please update DeepSpeed.") + elif compare_versions("deepspeed", "<", "0.9.3"): + raise ImportError("DeepSpeed version must be >= 0.9.3. Please update DeepSpeed.") + + self.deepspeed_engine_wrapped = None + + if os.environ.get("ACCELERATE_USE_FSDP", "false").lower() == "true" or isinstance( + fsdp_plugin, FullyShardedDataParallelPlugin + ): + if not is_torch_version(">=", FSDP_PYTORCH_VERSION): + raise ValueError(f"FSDP requires PyTorch >= {FSDP_PYTORCH_VERSION}") + + if fsdp_plugin is None: # init from env variables + fsdp_plugin = ( + FullyShardedDataParallelPlugin() + if os.environ.get("ACCELERATE_USE_FSDP", "false").lower() == "true" + else None + ) + else: + if not isinstance(fsdp_plugin, FullyShardedDataParallelPlugin): + raise TypeError("`fsdp_plugin` must be a FullyShardedDataParallelPlugin object.") + os.environ["ACCELERATE_USE_FSDP"] = "true" # use FSDP if plugin is provided + + if fsdp_plugin is not None and fsdp_plugin.fsdp_version == 2: + if not is_torch_version(">=", FSDP2_PYTORCH_VERSION): + raise ImportError(f"FSDP2 requires PyTorch >= {FSDP2_PYTORCH_VERSION}") + + if megatron_lm_plugin is None: # init from env variables + megatron_lm_plugin = ( + MegatronLMPlugin() if os.environ.get("ACCELERATE_USE_MEGATRON_LM", "false").lower() == "true" else None + ) + else: + if not isinstance(megatron_lm_plugin, MegatronLMPlugin): + raise TypeError("`megatron_lm_plugin` must be a MegatronLMPlugin object.") + os.environ["ACCELERATE_USE_MEGATRON_LM"] = "true" # use MegatronLM if plugin is provided + + if megatron_lm_plugin: + if not is_megatron_lm_available(): + raise ImportError("Megatron is not installed. please build it from source.") + + # Kwargs handlers + self.ddp_handler = None + self.scaler_handler = None + self.init_handler = None + self.fp8_recipe_handler = None + self.ao_recipe_handler = None + self.te_recipe_handler = None + self.msamp_recipe_handler = None + self.autocast_handler = None + self.profile_handler = None + self.has_lomo_optimizer = False + + found_handlers = set() + handler_class_to_attr = { + DistributedDataParallelKwargs: "ddp_handler", + GradScalerKwargs: "scaler_handler", + InitProcessGroupKwargs: "init_handler", + FP8RecipeKwargs: "fp8_recipe_handler", + AutocastKwargs: "autocast_handler", + ProfileKwargs: "profile_handler", + AORecipeKwargs: "ao_recipe_handler", + TERecipeKwargs: "te_recipe_handler", + MSAMPRecipeKwargs: "msamp_recipe_handler", + } + self.has_fp8_handler = False + if kwargs_handlers is not None: + for handler in kwargs_handlers: + assert isinstance(handler, KwargsHandler), ( + f"Unsupported kwargs handler passed: {handler}, must be one that inherits `accelerate.utils.KwargsHandler`." + ) + # Add the handler class to the set of found handlers + if handler.__class__ in found_handlers: + raise ValueError(f"You can only pass one {handler.__class__} in `kwargs_handlers`.") + found_handlers.add(handler.__class__) + handler_attr = handler_class_to_attr[handler.__class__] + setattr(self, handler_attr, handler) + if "recipe_handler" in handler_attr and not self.has_fp8_handler: + self.has_fp8_handler = True + + if parallelism_config is None: + # TODO: Remove after deprecating tp_plugin + if torch_tp_plugin is not None: + parallelism_config = ParallelismConfig(tp_size=torch_tp_plugin.tp_size) + elif os.environ.get("ACCELERATE_USE_PARALLELISM_CONFIG", "false").lower() == "true": + parallelism_config = ParallelismConfig() + + kwargs = self.init_handler.to_kwargs() if self.init_handler is not None else {} + self.state = AcceleratorState( + mixed_precision=mixed_precision, + cpu=cpu, + dynamo_plugin=dynamo_plugin, + deepspeed_plugin=deepspeed_plugins, + fsdp_plugin=fsdp_plugin, + megatron_lm_plugin=megatron_lm_plugin, + parallelism_config=parallelism_config, + _from_accelerator=True, + **kwargs, + ) + + if self.parallelism_config: + self.state.device_mesh = self.parallelism_config.get_device_mesh(self.device.type) + self.parallelism_config._validate_accelerator(self) + + self.fp8_enabled = self.state.mixed_precision == "fp8" or mixed_precision == "fp8" + # Check for automatic FP8 recipe creation + if self.fp8_enabled and not self.has_fp8_handler: + if self.fp8_backend == FP8BackendType.AO: + self.ao_recipe_handler = AORecipeKwargs() + elif self.fp8_backend == FP8BackendType.TE: + self.te_recipe_handler = TERecipeKwargs() + elif self.fp8_backend == FP8BackendType.MSAMP: + self.msamp_recipe_handler = MSAMPRecipeKwargs() + elif self.fp8_backend == FP8BackendType.NO: + # Prioritize AO -> TE -> MSAMP + if is_torchao_available(): + logger.info("Found `torchao` installed, using it for FP8 training.") + self.ao_recipe_handler = AORecipeKwargs() + elif is_transformer_engine_available(): + logger.info("Found `transformer-engine` installed, using it for FP8 training.") + self.te_recipe_handler = TERecipeKwargs() + elif is_msamp_available(): + logger.info("Found `msamp` installed, using it for FP8 training.") + self.msamp_recipe_handler = MSAMPRecipeKwargs() + else: + raise ImportError( + "Tried to train with `fp8` and auto-detect backend, but no FP8-compatible backend was installed. " + "Valid backends are: `torchao`, `transformer-engine`, and `msamp`." + ) + self.has_fp8_handler = True + + self.delayed_fp8_autocast = False + if self.has_fp8_handler: + # We already check if FP8 is available during `self.state` + if not self.fp8_enabled and ( + self.distributed_type not in (DistributedType.FSDP, DistributedType.DEEPSPEED) + ): + raise ValueError("Passing in an FP8 configuration requires setting `mixed_precision='fp8'`.") + self.delayed_fp8_autocast = self.fp8_backend == "TE" and self.distributed_type in ( + DistributedType.MULTI_GPU, + DistributedType.FSDP, + ) + + # TODO: S1ro - this is probably gonna be a problem with other fp8 backends too + if ( + self.fp8_backend == FP8BackendType.AO + and self.state.distributed_type == DistributedType.FSDP + and self.state.fsdp_plugin.cpu_ram_efficient_loading + ): + raise ValueError( + "torchao with FSDP2 and cpu_ram_efficient_loading is not supported, setting `cpu_ram_efficient_loading` to False will fix the issue and work as intended." + ) + + trackers = filter_trackers(log_with, self.logging_dir) + if len(trackers) < 1 and log_with is not None: + warnings.warn(f"`log_with={log_with}` was passed but no supported trackers are currently installed.") + self.log_with = trackers + + if ( + (mixed_precision != "bf16") + and getattr(self.state, "downcast_bfloat", False) + and (self.state.distributedType != DistributedType.XLA) + ): + raise ValueError("Can only use `downcast_bf16` when using `mixed_precision='bf16'` and on a TPU") + + if gradient_accumulation_plugin is not None: + if gradient_accumulation_steps != 1: + raise ValueError( + "You can only pass one of `gradient_accumulation_steps` and `gradient_accumulation_plugin`. Please only pass in the created `GradientAccumulationPlugin` object." + ) + else: + gradient_accumulation_steps = int( + parse_choice_from_env("ACCELERATE_GRADIENT_ACCUMULATION_STEPS", gradient_accumulation_steps) + ) + gradient_accumulation_plugin = GradientAccumulationPlugin(num_steps=gradient_accumulation_steps) + + # If using DeepSpeed, update gradient accumulation steps from the DeepSpeed plugin + self.gradient_state = GradientState( + gradient_accumulation_plugin=gradient_accumulation_plugin, + ) + + self.device_placement = device_placement + if dataloader_config is None: + dataloader_config = DataLoaderConfiguration() + self.dataloader_config = dataloader_config + self.step_scheduler_with_optimizer = step_scheduler_with_optimizer + + # Mixed precision attributes + self.scaler = None + self.native_amp = False + if ( + self.state.mixed_precision == "fp16" + and self.device.type != "cpu" + and self.distributed_type not in (DistributedType.DEEPSPEED, DistributedType.MEGATRON_LM) + ): + self.native_amp = True + supported_device = ("xpu", "cuda", "npu", "xla", "mlu", "musa", "hpu", "sdaa", "mps") + if self.device.type not in supported_device or is_torch_xla_available(check_is_tpu=True): + raise ValueError( + f"fp16 mixed precision requires a device in {supported_device} (not {self.device.type!r})." + ) + if self.device.type == "mps" and not is_torch_version(">=", "2.5.0"): + raise ValueError("fp16 mixed precision with MPS device requires a Pytorch >= 2.5.0") + kwargs = self.scaler_handler.to_kwargs() if self.scaler_handler is not None else {} + + # FSDP2 doesn't use ShardedGradScaler, don't want to modify `get_grad_scaler`, rather create a simple utility + if self.is_fsdp2: + self.scaler = get_fsdp2_grad_scaler(device=self.device.type, **kwargs) + else: + self.scaler = get_grad_scaler(self.distributed_type, **kwargs) + + elif self.state.mixed_precision == "bf16" and self.distributed_type not in ( + DistributedType.DEEPSPEED, + DistributedType.MEGATRON_LM, + ): + if self.device.type in ["cpu", "xpu", "hpu"]: + self.native_amp = True + else: + self.native_amp = is_bf16_available(True) + if not self.native_amp and not is_torch_xla_available(): + raise ValueError("bf16 mixed precision requires PyTorch >= 1.10 and a supported device.") + if self.native_amp and self.device.type == "mps" and not is_torch_version(">=", "2.6.0"): + raise ValueError("bf16 mixed precision with MPS device requires a Pytorch >= 2.6.0") + + # for DeepSpeed, self.state.mixed_precision is always "bf16", + # see https://github.com/huggingface/accelerate/blob/main/src/accelerate/state.py#L968 and + # https://github.com/huggingface/accelerate/blob/main/src/accelerate/utils/dataclasses.py#L1263. + elif self.fp8_enabled: + # We always enable `native_amp` for FP8 + self.native_amp = True + if self.fp8_backend == FP8BackendType.MSAMP: + if self.distributed_type == DistributedType.FSDP: + raise NotImplementedError( + "`accelerate` + `MS-AMP` + `FSDP` is not supported at this time. " + "Please consider using deepspeed, which is supported." + ) + elif self.distributed_type != DistributedType.DEEPSPEED: + # MS-AMP requires `GradScaler` even with bf16 autocast w/ single GPU or DDP: + self.scaler = get_grad_scaler(**kwargs) + + # Start of internal step tracking + self.step = 0 + + # Internal references to the training objects + self._optimizers = [] + self._models = [] + self._schedulers = [] + self._dataloaders = [] + self._custom_objects = [] + + # Hooks + self._load_model_state_pre_hook = OrderedDict() + self._save_model_state_pre_hook = OrderedDict() + + # RNG Types + self.rng_types = rng_types + if self.rng_types is None: + self.rng_types = ["generator"] + + # Set a flag tensor for early stopping and other breakpoints + self.flag_tensor = None + + check_os_kernel() + + @property + def deepspeed_plugin(self): + """ + Returns the currently active DeepSpeedPlugin. + + If using multiple plugins, the first one will be the active one by default. Manually call + `accelerator.state.select_deepspeed_plugin(key)` to activate a different plugin. + + If deepspeed is not enabled, this will return `None`. + """ + return self.state.deepspeed_plugin + + @property + def use_distributed(self): + """ + Whether the Accelerator is configured for distributed training + """ + return self.state.use_distributed + + @property + def multi_device(self): + return self.use_distributed and self.distributed_type in ( + DistributedType.MULTI_GPU, + DistributedType.MULTI_MLU, + DistributedType.MULTI_SDAA, + DistributedType.MULTI_MUSA, + DistributedType.MULTI_NPU, + DistributedType.MULTI_XPU, + DistributedType.MULTI_HPU, + DistributedType.MULTI_NEURON, + ) + + @property + def distributed_type(self): + return self.state.distributed_type + + @property + def num_processes(self): + return self.state.num_processes + + @property + def process_index(self): + return self.state.process_index + + @property + def local_process_index(self): + return self.state.local_process_index + + @property + def device(self): + return self.state.device + + @property + def split_batches(self): + return self.dataloader_config.split_batches + + @property + def dispatch_batches(self): + return self.dataloader_config.dispatch_batches + + @property + def even_batches(self): + return self.dataloader_config.even_batches + + @even_batches.setter + def even_batches(self, value: bool): + self.dataloader_config.even_batches = value + + @property + def use_seedable_sampler(self): + return self.dataloader_config.use_seedable_sampler + + @property + def non_blocking(self): + return self.dataloader_config.non_blocking + + @property + def use_stateful_dataloader(self): + if hasattr(self.dataloader_config, "use_stateful_dataloader"): + return self.dataloader_config.use_stateful_dataloader + return False + + @property + def project_dir(self): + return self.project_configuration.project_dir + + @property + def logging_dir(self): + return self.project_configuration.logging_dir + + @property + def save_iteration(self): + return self.project_configuration.iteration + + @property + def is_main_process(self): + """True for one process only.""" + return self.state.is_main_process + + @property + def is_local_main_process(self): + """True for one process per server.""" + return self.state.is_local_main_process + + @property + def is_last_process(self): + return self.process_index == self.num_processes - 1 + + @property + def mixed_precision(self): + return self.state.mixed_precision + + @property + def is_fsdp2(self): + return self.state.is_fsdp2 + + @property + def is_composable_parallelism_enabled(self): + return self.is_fsdp2 + + @property + def parallelism_config(self) -> Union[ParallelismConfig, None]: + return self.state.parallelism_config + + @property + def torch_device_mesh(self): + return self.state.device_mesh + + @property + def should_save_model(self): + if (pc := self.parallelism_config) is None: + # shouldn't even happen + return self.state.is_local_main_process + _non_model_shard_dims = { + pc.dp_replicate_enabled: "dp_replicate", + pc.cp_enabled: "cp", + } + + # return all( + # self.torch_device_mesh[dim].get_local_rank() == 0 for key, dim in non_model_shard_dims.items() if key + # ) + # TODO: S1ro - this is a temporary solution until we figure out why `save_safe_file` is slow when not all processes + return True + + @property + def tensor_parallel_rank(self) -> int: + """ + Returns the local rank for tensor parallelism. If tensor parallelism is configured but not enabled, returns 0 + since all ranks are assumed to be the same. + """ + if self.parallelism_config: + if self.parallelism_config.tp_enabled: + return self.torch_device_mesh.get_local_rank("tp") + return 0 + raise RuntimeError("Tensor parallelism is not configured. Set `parallelism_config` first.") + + @property + def pipeline_parallel_rank(self) -> int: + """ + Pipeline parallelism is not supported yet. + """ + raise NotImplementedError("Pipeline parallelism is currently not supported in Accelerate.") + + @property + def context_parallel_rank(self) -> int: + """ + Context parallelism is not supported yet. + """ + raise NotImplementedError("Context parallelism is currently not supported in Accelerate.") + + @property + def data_parallel_rank(self) -> int: + """ + Returns the local rank for replicate-based data parallelism. If replicate-based data parallelism is configured + but not enabled, returns 0 since all ranks are assumed to be the same. + """ + if self.parallelism_config: + if self.parallelism_config.dp_replicate_enabled: + return self.torch_device_mesh.get_local_rank("dp_replicate") + return 0 + raise RuntimeError("Data parallelism is not configured. Set `parallelism_config` first.") + + @property + def data_parallel_shard_rank(self) -> int: + """ + Returns the local rank for shard-based data parallelism. If shard-based data parallelism is configured but not + enabled, returns 0 since all ranks are assumed to be the same. + """ + if self.parallelism_config: + if self.parallelism_config.dp_shard_enabled: + return self.torch_device_mesh.get_local_rank("dp_shard") + return 0 + raise RuntimeError("Shard-based data parallelism is not configured. Set `parallelism_config` first.") + + @contextmanager + def split_between_processes(self, inputs: list | tuple | dict | torch.Tensor, apply_padding: bool = False): + """ + Splits `input` between `self.num_processes` quickly and can be then used on that process. Useful when doing + distributed inference, such as with different prompts. + + Note that when using a `dict`, all keys need to have the same number of elements. + + Args: + inputs (`list`, `tuple`, `torch.Tensor`, or `dict` of `list`/`tuple`/`torch.Tensor`): + The input to split between processes. + apply_padding (`bool`, `optional`, defaults to `False`): + Whether to apply padding by repeating the last element of the input so that all processes have the same + number of elements. Useful when trying to perform actions such as `Accelerator.gather()` on the outputs + or passing in less inputs than there are processes. If so, just remember to drop the padded elements + afterwards. + + Example: + + ```python + # Assume there are two processes + from accelerate import Accelerator + + accelerator = Accelerator() + with accelerator.split_between_processes(["A", "B", "C"]) as inputs: + print(inputs) + # Process 0 + ["A", "B"] + # Process 1 + ["C"] + + with accelerator.split_between_processes(["A", "B", "C"], apply_padding=True) as inputs: + print(inputs) + # Process 0 + ["A", "B"] + # Process 1 + ["C", "C"] + ``` + """ + with PartialState().split_between_processes(inputs, apply_padding=apply_padding) as inputs: + yield inputs + + def on_main_process(self, function: Callable[..., Any] | None = None): + """ + A decorator that will run the decorated function on the main process only. Can also be called using the + `PartialState` class. + + Args: + function (`Callable`): The function to decorate. + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + + + >>> @accelerator.on_main_process + ... def print_something(): + ... print("This will be printed by process 0 only.") + + + >>> print_something() + "This will be printed by process 0 only" + ``` + """ + # For times when the `Accelerator` object itself utilizes this decorator. + if function is None: + if "Accelerator." in self.__qualname__: + function = self + else: + raise ValueError( + "The `on_main_process` decorator must be called with a function on an instantiated `Accelerator` object." + ) + + def _inner(*args, **kwargs): + return PartialState().on_main_process(function)(*args, **kwargs) + + return _inner + + def on_local_main_process(self, function: Callable[..., Any] | None = None): + """ + A decorator that will run the decorated function on the local main process only. Can also be called using the + `PartialState` class. + + Args: + function (`Callable`): The function to decorate. + + Example: + ```python + # Assume we have 2 servers with 4 processes each. + from accelerate import Accelerator + + accelerator = Accelerator() + + + @accelerator.on_local_main_process + def print_something(): + print("This will be printed by process 0 only on each server.") + + + print_something() + # On server 1: + "This will be printed by process 0 only" + # On server 2: + "This will be printed by process 0 only" + ``` + """ + # For times when the `Accelerator` object itself utilizes this decorator. + if function is None: + if "Accelerator." in self.__qualname__: + function = self + else: + raise ValueError( + "The `on_local_main_process` decorator must be called with a function on an instantiated `Accelerator` object." + ) + + def _inner(*args, **kwargs): + return PartialState().on_local_main_process(function)(*args, **kwargs) + + return _inner + + def on_last_process(self, function: Callable[..., Any]): + """ + A decorator that will run the decorated function on the last process only. Can also be called using the + `PartialState` class. + + Args: + function (`Callable`): The function to decorate. + + Example: + ```python + # Assume we have 4 processes. + from accelerate import Accelerator + + accelerator = Accelerator() + + + @accelerator.on_last_process + def print_something(): + print(f"Printed on process {accelerator.process_index}") + + + print_something() + "Printed on process 3" + ``` + """ + # For times when the `Accelerator` object itself utilizes this decorator. + if function is None: + if "Accelerator." in self.__qualname__: + function = self + else: + raise ValueError( + "The `on_last_process` decorator must be called with a function on an instantiated `Accelerator` object." + ) + + def _inner(*args, **kwargs): + return PartialState().on_last_process(function)(*args, **kwargs) + + return _inner + + def on_process(self, function: Callable[..., Any] | None = None, process_index: int | None = None): + """ + A decorator that will run the decorated function on a given process index only. Can also be called using the + `PartialState` class. + + Args: + function (`Callable`, `optional`): + The function to decorate. + process_index (`int`, `optional`): + The index of the process on which to run the function. + + Example: + ```python + # Assume we have 4 processes. + from accelerate import Accelerator + + accelerator = Accelerator() + + + @accelerator.on_process(process_index=2) + def print_something(): + print(f"Printed on process {accelerator.process_index}") + + + print_something() + "Printed on process 2" + ``` + """ + # Initial construction of the decorator. + if (self is not None) and (process_index is not None) and (function is None): + return partial(self.on_process, process_index=process_index) + # For times when the `Accelerator` object itself utilizes this decorator. + if function is None: + if "Accelerator." in self.__qualname__: + function = self + else: + raise ValueError( + "The `on_main_process` decorator must be called with a function on an instantiated `Accelerator` object." + ) + + def _inner(*args, **kwargs): + return PartialState().on_process(function, process_index)(*args, **kwargs) + + return _inner + + def on_local_process(self, function: Callable[..., Any] | None = None, local_process_index: int | None = None): + """ + A decorator that will run the decorated function on a given local process index only. Can also be called using + the `PartialState` class. + + Args: + function (`Callable`, *optional*): + The function to decorate. + local_process_index (`int`, *optional*): + The index of the local process on which to run the function. + + Example: + ```python + # Assume we have 2 servers with 4 processes each. + from accelerate import Accelerator + + accelerator = Accelerator() + + + @accelerator.on_local_process(local_process_index=2) + def print_something(): + print(f"Printed on process {accelerator.local_process_index}") + + + print_something() + # On server 1: + "Printed on process 2" + # On server 2: + "Printed on process 2" + ``` + """ + # Initial construction of the decorator. + if (self is not None) and (local_process_index is not None) and (function is None): + return partial(self.on_local_process, local_process_index=local_process_index) + # For times when the `Accelerator` object itself utilizes this decorator. + if function is None: + if "Accelerator." in self.__qualname__: + function = self + else: + raise ValueError( + "The `on_main_process` decorator must be called with a function on an instantiated `Accelerator` object." + ) + + def _inner(*args, **kwargs): + return PartialState().on_local_process(function, local_process_index)(*args, **kwargs) + + return _inner + + @contextmanager + def main_process_first(self): + """ + Lets the main process go first inside a with block. + + The other processes will enter the with block after the main process exits. + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> with accelerator.main_process_first(): + ... # This will be printed first by process 0 then in a seemingly + ... # random order by the other processes. + ... print(f"This will be printed by process {accelerator.process_index}") + ``` + """ + with self.state.main_process_first(): + yield + + @contextmanager + def local_main_process_first(self): + """ + Lets the local main process go inside a with block. + + The other processes will enter the with block after the main process exits. + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> with accelerator.local_main_process_first(): + ... # This will be printed first by local process 0 then in a seemingly + ... # random order by the other processes. + ... print(f"This will be printed by process {accelerator.local_process_index}") + ``` + """ + with self.state.local_main_process_first(): + yield + + @contextmanager + def no_sync(self, model): + """ + A context manager to disable gradient synchronizations across DDP processes by calling + `torch.nn.parallel.DistributedDataParallel.no_sync`. + + If `model` is not in DDP, this context manager does nothing + + Args: + model (`torch.nn.Module`): + PyTorch Module that was prepared with `Accelerator.prepare` + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> dataloader, model, optimizer = accelerator.prepare(dataloader, model, optimizer) + >>> input_a = next(iter(dataloader)) + >>> input_b = next(iter(dataloader)) + + >>> with accelerator.no_sync(): + ... outputs = model(input_a) + ... loss = loss_func(outputs) + ... accelerator.backward(loss) + ... # No synchronization across processes, only accumulate gradients + >>> outputs = model(input_b) + >>> accelerator.backward(loss) + >>> # Synchronization across all processes + >>> optimizer.step() + >>> optimizer.zero_grad() + ``` + """ + if self.is_fsdp2: + model.set_requires_gradient_sync(False) + try: + yield + finally: + model.set_requires_gradient_sync(True) + else: + context = contextlib.nullcontext + if self.use_distributed: + if self.distributed_type != DistributedType.DEEPSPEED or self.state.deepspeed_plugin.zero_stage < 2: + context = getattr(model, "no_sync", context) + + with context(): + yield + + @staticmethod + @contextmanager + def trigger_sync_in_backward(model): + """Trigger the sync of the gradients in the next backward pass of the model after multiple forward passes under + `Accelerator.no_sync` (only applicable in multi-GPU scenarios). + + If the script is not launched in distributed mode, this context manager does nothing. + + Args: + model (`torch.nn.Module`): + The model for which to trigger the gradient synchronization. + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> dataloader, model, optimizer = accelerator.prepare(dataloader, model, optimizer) + + >>> with accelerator.no_sync(): + ... loss_a = loss_func(model(input_a)) # first forward pass + ... loss_b = loss_func(model(input_b)) # second forward pass + >>> accelerator.backward(loss_a) # No synchronization across processes, only accumulate gradients + >>> with accelerator.trigger_sync_in_backward(model): + ... accelerator.backward(loss_b) # Synchronization across all processes + >>> optimizer.step() + >>> optimizer.zero_grad() + ``` + """ + if not isinstance(model, torch.nn.parallel.DistributedDataParallel): + yield + return + + old_require_backward_grad_sync = model.require_backward_grad_sync + old_require_forward_param_sync = model.require_forward_param_sync + + # EXPERIMENTAL: This will force grad sync during `backward()`, but it is unknown if it breaks other DDP features. + # https://github.com/pytorch/pytorch/blob/e1502c0cdbfd17548c612f25d5a65b1e4b86224d/torch/nn/parallel/distributed.py#L1453-L1466 + model.require_backward_grad_sync = True + model.require_forward_param_sync = True + # https://github.com/pytorch/pytorch/blob/e1502c0cdbfd17548c612f25d5a65b1e4b86224d/torch/csrc/distributed/c10d/reducer.cpp#L1371-L1402 + model.reducer.prepare_for_backward([]) + try: + yield + finally: + model.require_backward_grad_sync = old_require_backward_grad_sync + model.require_forward_param_sync = old_require_forward_param_sync + + def _do_sync(self): + "Sets the right `sync_gradients` context and either resets or increases `self.step`" + if self.gradient_state.sync_with_dataloader and self.gradient_state.end_of_dataloader: + self.step = 0 + self.gradient_state._set_sync_gradients(True) + else: + self.step += 1 + self.gradient_state._set_sync_gradients((self.step % self.gradient_state.num_steps) == 0) + + @property + def sync_gradients(self): + return self.gradient_state.sync_gradients + + @sync_gradients.setter + def sync_gradients(self, sync_gradients): + self.gradient_state.sync_gradients = sync_gradients + + @property + def gradient_accumulation_steps(self): + return self.gradient_state.num_steps + + @gradient_accumulation_steps.setter + def gradient_accumulation_steps(self, gradient_accumulation_steps): + self.gradient_state.plugin_kwargs.update({"num_steps": gradient_accumulation_steps}) + + @contextmanager + def accumulate(self, *models): + """ + A context manager that will lightly wrap around and perform gradient accumulation automatically + + Args: + *models (list of `torch.nn.Module`): + PyTorch Modules that were prepared with `Accelerator.prepare`. Models passed to `accumulate()` will + skip gradient syncing during backward pass in distributed training + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator(gradient_accumulation_steps=1) + >>> dataloader, model, optimizer, scheduler = accelerator.prepare(dataloader, model, optimizer, scheduler) + + >>> for input, output in dataloader: + ... with accelerator.accumulate(model): + ... outputs = model(input) + ... loss = loss_func(outputs) + ... loss.backward() + ... optimizer.step() + ... scheduler.step() + ... optimizer.zero_grad() + ``` + """ + self._do_sync() + + allow_gradient_sync = ( + self.sync_gradients # must sync if sync gradients need to complete an optimizer step + or ( + # the no_sync context stops the gradients from reducing during distributed training + # bringing speedup (potentially at some costs). Here, no_sync can be prevented + # by setting sync_each_batch = True. + self.use_distributed # only relevant in distributed settings + and self.gradient_state.plugin_kwargs.get("sync_each_batch", False) + ) + ) + with contextlib.ExitStack() as cm_stack: + for m in models: + cm_stack.enter_context(contextlib.nullcontext() if allow_gradient_sync else self.no_sync(m)) + yield + + @contextmanager + def join_uneven_inputs(self, joinables, even_batches=None): + """ + A context manager that facilitates distributed training or evaluation on uneven inputs, which acts as a wrapper + around `torch.distributed.algorithms.join`. This is useful when the total batch size does not evenly divide the + length of the dataset. + + Args: + joinables (`list[torch.distributed.algorithms.Joinable]`): + A list of models or optimizers that subclass `torch.distributed.algorithms.Joinable`. Most commonly, a + PyTorch Module that was prepared with `Accelerator.prepare` for DistributedDataParallel training. + even_batches (`bool`, *optional*) + If set, this will override the value of `even_batches` set in the `Accelerator`. If it is not provided, + the default `Accelerator` value wil be used. + + + + `join_uneven_inputs` is only supported for Distributed Data Parallel training on multiple GPUs. For any other + configuration, this method will have no effect. + + + + + + Overriding `even_batches` will not affect iterable-style data loaders. + + + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator(even_batches=True) + >>> ddp_model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader) + + >>> with accelerator.join_uneven_inputs([ddp_model], even_batches=False): + ... for input, output in dataloader: + ... outputs = model(input) + ... loss = loss_func(outputs) + ... loss.backward() + ... optimizer.step() + ... optimizer.zero_grad() + ``` + """ + if self.multi_device: + dl_even_batches_values = [] + + if even_batches is not None: + iterable_dl_seen = False + # override value in batch sampler for map-style datasets + for dl_idx, dl in enumerate(self._dataloaders): + if isinstance(dl, DataLoaderDispatcher): + iterable_dl_seen = True + continue + dl_even_batches_values.append((dl_idx, dl.batch_sampler.even_batches)) + dl.batch_sampler.even_batches = even_batches + + if iterable_dl_seen: + warnings.warn( + "Overriding even_batches is only supported for map-style datasets, yet some dataloaders given were iterable" + ) + else: + even_batches = self.even_batches + + enable_join = False if even_batches else True + try: + with Join(joinables, enable=enable_join, throw_on_early_termination=False): + yield + finally: + # reset any batch samplers that have been modified + for dl_idx, even_batches_value in dl_even_batches_values: + self._dataloaders[dl_idx].batch_sampler.even_batches = even_batches_value + else: + # Even when disabled, Join expects models to subclass Joinable, so skip entirely for single process runs + if self.distributed_type != DistributedType.NO: + warnings.warn( + "Joining uneven inputs is only supported for multi-GPU training, as a result `join_uneven_inputs` will have no effect." + ) + + with contextlib.nullcontext(joinables): + yield + + def print(self, *args, **kwargs): + """ + Drop in replacement of `print()` to only print once per server. + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> accelerator.print("Hello world!") + ``` + """ + self.state.print(*args, **kwargs) + + def _prepare_one(self, obj, first_pass=False, device_placement=None): + # First pass of preparation: DataLoader, model, optimizer + if first_pass: + if isinstance(obj, torch.utils.data.DataLoader): + return self.prepare_data_loader(obj, device_placement=device_placement) + elif isinstance(obj, torch.nn.Module): + return self.prepare_model(obj, device_placement=device_placement) + elif isinstance(obj, torch.optim.Optimizer): + optimizer = self.prepare_optimizer(obj, device_placement=device_placement) + return optimizer + # Second pass of preparation: LR scheduler (which need the full list of optimizers) + elif isinstance(obj, LRScheduler): + scheduler = self.prepare_scheduler(obj) + return scheduler + # Return the unprocessed object if previous criteria was not met + return obj + + def prepare(self, *args, device_placement=None): + """ + Prepare all objects passed in `args` for distributed training and mixed precision, then return them in the same + order. + + Args: + *args (list of objects): + Any of the following type of objects: + + - `torch.utils.data.DataLoader`: PyTorch Dataloader + - `torch.nn.Module`: PyTorch Module + - `torch.optim.Optimizer`: PyTorch Optimizer + - `torch.optim.lr_scheduler.LRScheduler`: PyTorch LR Scheduler + + device_placement (`list[bool]`, *optional*): + Used to customize whether automatic device placement should be performed for each object passed. Needs + to be a list of the same length as `args`. Not compatible with DeepSpeed or FSDP. + + + + You don't need to prepare a model if you only use it for inference without any kind of mixed precision + + + + Examples: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> # Assume a model, optimizer, data_loader and scheduler are defined + >>> model, optimizer, data_loader, scheduler = accelerator.prepare(model, optimizer, data_loader, scheduler) + ``` + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> # Assume a model, optimizer, data_loader and scheduler are defined + >>> device_placement = [True, True, False, False] + >>> # Will place the first two items passed in automatically to the right device but not the last two. + >>> model, optimizer, data_loader, scheduler = accelerator.prepare( + ... model, optimizer, data_loader, scheduler, device_placement=device_placement + ... ) + ``` + """ + if device_placement is None: + device_placement = [None for _ in args] + elif self.distributed_type in (DistributedType.DEEPSPEED, DistributedType.MEGATRON_LM): + raise ValueError("You can't customize device placements with DeepSpeed or Megatron-LM.") + elif len(device_placement) != len(args): + raise ValueError( + f"`device_placement` should be a list with {len(args)} elements (the number of objects passed)." + ) + + for obj in args: + # TODO: Look at enabling native TP training directly with a proper config + if ( + isinstance(obj, torch.nn.Module) + and self.verify_device_map(obj) + and self.distributed_type != DistributedType.NO + and os.environ.get("ACCELERATE_BYPASS_DEVICE_MAP", "false") != "true" + ): + raise ValueError( + "You can't train a model that has been loaded with `device_map='auto'` in any distributed mode." + " Please rerun your script specifying `--num_processes=1` or by launching with `python {{myscript.py}}`." + ) + + if self.distributed_type == DistributedType.DEEPSPEED: + model_count = 0 + for obj in args: + if isinstance(obj, torch.nn.Module): + model_count += 1 + if model_count > 1: + raise AssertionError( + "You can't use same `Accelerator()` instance with multiple models when using DeepSpeed" + ) + + # On TPUs, putting the model on the XLA device will create new parameters, so the corresponding optimizer will + # have parameters disconnected from the model (so no training :-( ). + # If the model and optimizer have parameters on different devices we raise an error. + if self.distributed_type == DistributedType.XLA: + model_device, optimizer_device = self._get_devices() + if model_device is not None and optimizer_device is not None and model_device != optimizer_device: + raise ValueError( + "The model and the optimizer parameters are not on the same device, which probably means you " + "created an optimizer around your model **before** putting on the device. Make sure the line " + "model.to(device) is before the optimizer creation in your script or remove it entirely and use " + "the flag default value for `device_placement` in your `Accelerator` to let it handle that " + "part for you." + ) + + if self.is_fsdp2: + model_count = 0 + optimizer_count = 0 + for i, obj in enumerate(args): + if isinstance(obj, torch.nn.Module): + model_count += 1 + elif isinstance(obj, torch.optim.Optimizer): + optimizer_count += 1 + + # This needs to be written as such, so that passing other objects other than models/optimizers doesn't raise an error + if (model_count < 1 and optimizer_count > 0) or (model_count > 0 and optimizer_count < 1): + raise ValueError( + "When using FSDP2, a model and optimizer must be passed together to `Accelerator.prepare()`" + " as the optimizer needs to have its parameters modified after the model is converted." + ) + if model_count > 1: + raise ValueError("Only one model is supported when using FSDP2") + + # If we're dealing with device placement, this deals with that by... + tpu_should_fix_optimizer = self.device_placement and self.distributed_type == DistributedType.XLA + + if tpu_should_fix_optimizer: + # 1. grabbing old model parameters + old_named_params = self._get_named_parameters(*args, drop_refs=False) + + if self.parallelism_config and self.parallelism_config.tp_enabled: + args = self._prepare_tp(*args) + for item in args: + if any( + item in container + for container in (self._dataloaders, self._models, self._optimizers, self._schedulers) + ): + item._is_accelerate_prepared = True + + if self.parallelism_config and self.parallelism_config.cp_enabled: + args = self._prepare_cp(*args) + # for megatron-lm, we don't need to prepare TE AO at this moment + if self.distributed_type != DistributedType.MEGATRON_LM: + if self.fp8_backend == FP8BackendType.TE: + args = self._prepare_te(*args) + elif self.fp8_backend == FP8BackendType.AO: + args = self._prepare_ao(*args) + if self.distributed_type == DistributedType.DEEPSPEED: + result = self._prepare_deepspeed(*args) + elif self.distributed_type == DistributedType.MEGATRON_LM: + result = self._prepare_megatron_lm(*args) + elif self.is_fsdp2: + result = self._prepare_fsdp2(*args) + else: + if self.fp8_backend == FP8BackendType.MSAMP: + args, device_placement = self._prepare_msamp(*args, device_placement=device_placement) + result = tuple( + self._prepare_one(obj, first_pass=True, device_placement=d) for obj, d in zip(args, device_placement) + ) + result = tuple(self._prepare_one(obj, device_placement=d) for obj, d in zip(result, device_placement)) + if tpu_should_fix_optimizer: + # 2. grabbing new model parameters + new_named_params = self._get_named_parameters(*result) + # 3. building a map from the first to the second + mapping = {p: new_named_params[n] for n, p in old_named_params.items()} + # 4. using that map to update the parameters of the optimizer + for obj in result: + if isinstance(obj, torch.optim.Optimizer): + obj._switch_parameters(mapping) + + for item in result: + if any( + item in container + for container in (self._dataloaders, self._models, self._optimizers, self._schedulers) + ): + item._is_accelerate_prepared = True + + return result if len(result) > 1 else result[0] + + def _prepare_tp(self, *args): + # First pass: prepare everything except schedulers (first_pass=True) and the model, which is prepared separately + # below + result = [ + self._prepare_one(obj, first_pass=True) if not isinstance(obj, torch.nn.Module) else obj for obj in args + ] + + # Second pass: prepare schedulers + result = [self._prepare_one(obj) if not isinstance(obj, torch.nn.Module) else obj for obj in result] + + for arg in args: + if not isinstance(arg, torch.nn.Module): + continue + model = arg + + from torch.distributed.tensor import DTensor + + if not any(isinstance(p, DTensor) for p in model.parameters()): + logger.warning( + "The model parameters are not sharded by DTensor, we skip the TP preparation. If you are using " + "a PreTrained model it is expected and this warning can be ignored." + ) + return result + + # Now we prepare the model + device_mesh = self.torch_device_mesh + + old_named_params = self._get_named_parameters(*tuple(result), drop_refs=True) + + from torch.distributed.tensor import DTensor + + if self.is_fsdp2: + for arg in result: + if not isinstance(arg, torch.nn.Module): + continue + + from torch.distributed.tensor import Replicate + from transformers.integrations.tensor_parallel import ReplicateParallel + + model: torch.nn.Module = arg + tp_plan = ReplicateParallel + + for name, param in model.named_parameters(): + if isinstance(param, DTensor): + continue + + dp = DTensor.from_local(param, device_mesh=device_mesh["tp"], placements=[Replicate()]) + param_name, param_type = name.rsplit(".", 1) + module_to_tp = model.get_submodule(param_name) + + tp_plan().prepare_module_tp(module_to_tp, device_mesh["tp"]) + if not isinstance(dp, torch.nn.Parameter): + dp = torch.nn.Parameter(dp, requires_grad=param.requires_grad) + setattr(module_to_tp, param_type, dp) + + new_named_params = self._get_named_parameters(*tuple(result), drop_refs=False) + # Build a map from old to new params + mapping = {p: new_named_params[n] for n, p in old_named_params.items()} + + if not mapping: + return result + + def _get_tensor_address(p): + if isinstance(p, DTensor): + return p._local_tensor.data_ptr() + return p.data_ptr() + + for obj in result: + if isinstance(obj, torch.optim.Optimizer): + for param_group in obj.param_groups: + # Each param_group originally maps to model parameters (e.g., from model.parameters()). + # After _prepare_tp(), parameter references are replaced with DTensor instances. + # Therefore, we remap the parameter references to their new DTensor addresses + # so that the optimizer can correctly update the model parameters. + param_group["params"] = [mapping[_get_tensor_address(p)] for p in param_group["params"]] + + return result + + def _prepare_cp(self, *args): + from torch.distributed.tensor.experimental import context_parallel + from torch.distributed.tensor.experimental._attention import set_rotate_method + + cp_comm_strategy = self.parallelism_config.cp_handler.cp_comm_strategy + set_rotate_method(cp_comm_strategy) + + self._cp_context = functools.partial(context_parallel, mesh=self.torch_device_mesh["cp"]) + + for arg in args: + if isinstance(arg, torch.nn.Module): + _attach_context_parallel_hooks(arg) + + return args + + def _prepare_fsdp2(self, *args): + # First pass: prepare everything except schedulers (and model, which is prepared separately below) + result = [ + self._prepare_one(obj, first_pass=True) if not isinstance(obj, torch.nn.Module) else obj for obj in args + ] + + # Second pass: prepare schedulers + result = [self._prepare_one(obj) if not isinstance(obj, torch.nn.Module) else obj for obj in result] + + # Prepare the model + model_index, model = None, None + for i, obj in enumerate(result): + if isinstance(obj, torch.nn.Module): + model_index, model = i, obj + + # Invariant: if we have a model, we also have an optimizer (checked in `prepare`) + if model_index is None: + return tuple(result) + + # Needs to be done first, to make sure AC + fully_shard will work as expected + self.state.fsdp_plugin.set_auto_wrap_policy(model) + + # Apply AC if needed + if self.state.fsdp_plugin.activation_checkpointing: + model = fsdp2_apply_ac(self, model) + + # Apply compile if needed, has to be *after* applying AC + # Copied from: `accelerator.prepare_model` ~ L1804 + if self.state.dynamo_plugin.backend != DynamoBackend.NO and not is_compiled_module(model): + if self.state.dynamo_plugin.use_regional_compilation: + model = compile_regions(model, **self.state.dynamo_plugin.to_kwargs()) + else: + model = torch.compile(model, **self.state.dynamo_plugin.to_kwargs()) + + # Get old params and canonicalize - we canonicalize to have the mapping easy + old_named_params = fsdp2_canonicalize_names(self._get_named_parameters(*tuple(result), drop_refs=True)) + + # Swap the optimizer parameters with empty, so `fully_shard` after will not allocate too much memory + from torch.distributed.tensor import DTensor + + for obj in result: + if isinstance(obj, torch.optim.Optimizer): + for param_group in obj.param_groups: + for i, p in enumerate(param_group["params"]): + # We drop a reference to the original param here, so that _move_states_to_device triggers a reallocation + # We reassign the data_ptr to the original param, so that we preserve the mapping to the new ones + param_group["params"][i] = torch.empty(1, dtype=p.dtype, device=p.device) + param_group["params"][i].data_ptr = ( + p._local_tensor.data_ptr() if isinstance(p, DTensor) else p.data_ptr() + ) + + self._models.append(model) + + # Prepare everything FSDP2 related for the model (except AC) + model = fsdp2_prepare_model(self, model) + + # Remove the old model from the list + if len(self._models) > 1 and (self._models[-2] is self._models[-1]): + del self._models[-2] + + # Replace the old model with the new one (shouldn't be needed as everything should be in place) + result[model_index] = model + + # Get new params and canonicalize + new_named_params = fsdp2_canonicalize_names(self._get_named_parameters(*result)) + # Build a map from old to new params and handle missings gracefully + mapping = {} + missing_params = [] + for n, p in old_named_params.items(): + if n in new_named_params: + mapping[p] = new_named_params[n] + else: + missing_params.append(n) + + if missing_params: + # Common tied embedding parameter names + tied_weight_names = ["lm_head.weight", "model.embed_tokens.weight", "transformer.wte.weight"] + if any(name in missing_params for name in tied_weight_names): + raise ValueError( + f"FSDP2 mapping failed (missing: {missing_params}). This is likely due to tied embeddings " + f"(config has tie_word_embeddings=True but checkpoint has separate weights).\n" + f"To fix, try: Set `model.config.tie_word_embeddings = False` after loading the model.\n" + ) + raise KeyError(f"Parameters missing after FSDP2 wrapping: {missing_params}") + + # Update the optimizer parameters + for obj in result: + if isinstance(obj, torch.optim.Optimizer): + fsdp2_switch_optimizer_parameters(obj, mapping) + + return result + + def prepare_model( + self, model: torch.nn.Module, device_placement: bool | None = None, evaluation_mode: bool = False + ): + """ + Prepares a PyTorch model for training in any distributed setup. It is recommended to use + [`Accelerator.prepare`] instead. + + Args: + model (`torch.nn.Module`): + A PyTorch model to prepare. You don't need to prepare a model if it is used only for inference without + any kind of mixed precision + device_placement (`bool`, *optional*): + Whether or not to place the model on the proper device. Will default to `self.device_placement`. + evaluation_mode (`bool`, *optional*, defaults to `False`): + Whether or not to set the model for evaluation only, by just applying mixed precision and + `torch.compile` (if configured in the `Accelerator` object). + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> # Assume a model is defined + >>> model = accelerator.prepare_model(model) + ``` + """ + if device_placement is None: + device_placement = self.device_placement and self.distributed_type != DistributedType.FSDP + + self._models.append(model) + + # TODO: Look at enabling native TP training directly with a proper config + if ( + self.verify_device_map(model) + and self.distributed_type != DistributedType.NO + and os.environ.get("ACCELERATE_BYPASS_DEVICE_MAP", "false") != "true" + ): + raise ValueError( + "You can't train a model that has been loaded with `device_map='auto'` in any distributed mode." + " Please rerun your script specifying `--num_processes=1` or by launching with `python {{myscript.py}}`." + ) + + if self.native_amp: + model._original_forward = model.forward + autocast_context = get_mixed_precision_context_manager(self.native_amp, self.autocast_handler) + # NOTE: MS-AMP adds `__func__` already to `model.forward`, so we should always use `model.forward` + if self.fp8_backend == FP8BackendType.MSAMP or not hasattr(model.forward, "__func__"): + model_forward_func = model.forward + model.forward = convert_outputs_to_fp32(autocast_context(model_forward_func)) + else: + model_forward_func = model.forward.__func__ + new_forward = autocast_context(model_forward_func) + model.forward = MethodType(new_forward, model) + model.forward = MethodType(convert_outputs_to_fp32(model.forward.__func__), model) + + # We prepare TE after, allowing for bf16 autocast to happen first + if self.fp8_backend == FP8BackendType.TE and not self.delayed_fp8_autocast: + model = apply_fp8_autowrap(model, self.te_recipe_handler or self.fp8_recipe_handler) + + if (getattr(model, "is_loaded_in_8bit", False) or getattr(model, "is_loaded_in_4bit", False)) and getattr( + model, "hf_device_map", False + ): + model_devices = set(model.hf_device_map.values()) + if len(model_devices) > 1 and self.distributed_type != DistributedType.NO: + raise ValueError( + "You can't train a model that has been loaded in 8-bit or 4-bit precision on multiple devices in any distributed mode." + " In order to use 8-bit or 4-bit models that have been loaded across multiple GPUs the solution is to use Naive Pipeline Parallelism." + " Therefore you should not specify that you are under any distributed regime in your accelerate config." + ) + elif len(model_devices) == 1: + current_device = list(model_devices)[0] + if isinstance(current_device, torch.device): + current_device_index = current_device.index + elif isinstance(current_device, str): + current_device_index = torch.device(current_device).index + else: + current_device_index = current_device + + current_device_index = int(current_device_index) if current_device_index is not None else None + if self.device.type == "cpu" and is_bitsandbytes_multi_backend_available(): + # bnb with multi-backend supports CPU which don't need to check index. + pass + elif torch.device(self.device.type, current_device_index) != self.device: + # if on the first device (GPU 0) we don't care + if (self.device.index is not None) or (current_device_index != 0): + raise ValueError( + "You can't train a model that has been loaded in 8-bit or 4-bit precision on a different device than the one " + "you're training on. Make sure you loaded the model on the correct device using for example `device_map={'':torch.cuda.current_device()}` or `device_map={'':torch.xpu.current_device()}`" + ) + if ( + ("cpu" in model_devices and not is_bitsandbytes_multi_backend_available()) + or ("cpu" in model_devices and is_xpu_available()) + or "disk" in model_devices + ): + raise ValueError( + "You can't train a model that has been loaded in 8-bit or 4-bit precision with CPU or disk offload. " + "If you want train the 8-bit or 4-bit model in CPU, please install bitsandbytes with multi-backend, see https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend" + ) + elif device_placement and not self.verify_device_map(model): + model = model.to(self.device) + if not evaluation_mode: + if self.multi_device and not (self.parallelism_config and self.parallelism_config.tp_enabled): + if model_has_dtensor(model): + raise ValueError( + "Your model contains `DTensor` parameters, which is incompatible with DDP. Maybe you loaded your model with `device_map='auto'`? Specify `device_map='cuda'` or 'xpu' or 'cpu' instead." + ) + if any(p.requires_grad for p in model.parameters()): + kwargs = self.ddp_handler.to_kwargs() if self.ddp_handler is not None else {} + # TODO: Look at enabling native TP training directly with a proper config + if os.environ.get("ACCELERATE_BYPASS_DEVICE_MAP", "false") != "true": + if self.device.type == "hpu": + device_ids, output_device = [self.device.index], self.device.index + else: + device_ids, output_device = [self.local_process_index], self.local_process_index + else: + device_ids, output_device = None, None + model = torch.nn.parallel.DistributedDataParallel( + model, device_ids=device_ids, output_device=output_device, **kwargs + ) + if self.ddp_handler is not None: + self.ddp_handler.register_comm_hook(model) + elif self.parallelism_config and self.parallelism_config.tp_enabled: + if not hasattr(model, "tp_size"): + raise NotImplementedError( + "Model should undergo tensor parallel before passing it to accelerate." + "You can use .from_pretrained(..., tp_plan='auto') if the model supports" + ) + if model.tp_size != self.parallelism_config.tp_size: + raise ValueError( + f"tp_size in the plugin {self.parallelism_config.tp_size} should be same as model's tp size {model.tp_size}" + ) + elif self.is_fsdp2: + raise ValueError( + "FSDP2 preparation should be done via `accelerate.prepare()`, as it requires a model and an optimizer." + ) + + elif self.distributed_type == DistributedType.FSDP: + # We need to fix the optimizer *before* sharding the model + from torch.distributed.fsdp.fully_sharded_data_parallel import FullyShardedDataParallel as FSDP + + # Check if the model is already a FSDP model due to `Manual Wrapping` and if so, + # don't wrap it again + # In case the model is already compiled using PyTorch 2.0 and the wrapped model in it + # is a FSDP model, don't wrap it again + is_type_fsdp = isinstance(model, FSDP) or ( + is_compiled_module(model) and isinstance(model._orig_mod, FSDP) + ) + + if not is_type_fsdp: + self.state.fsdp_plugin.set_auto_wrap_policy(model) + fsdp_plugin = self.state.fsdp_plugin + + # need to ensure that params are re-tied after running + # param_init_fn + fsdp_plugin.param_init_fn = ensure_weights_retied( + fsdp_plugin.param_init_fn, + model, + self.device, + ) + + kwargs = { + # We fallback to reshard_after_forward if sharding_strategy is not set. + # We prerfer sharding_strategy to not break the behavior of the existing code. + # Deprecation warning has already been issued in `utils.dataclasses.py` + "sharding_strategy": fsdp_plugin.sharding_strategy or fsdp_plugin.reshard_after_forward, + "cpu_offload": fsdp_plugin.cpu_offload, + "auto_wrap_policy": fsdp_plugin.auto_wrap_policy, + "mixed_precision": fsdp_plugin.mixed_precision_policy, + "sync_module_states": fsdp_plugin.sync_module_states, + "backward_prefetch": fsdp_plugin.backward_prefetch, + "forward_prefetch": fsdp_plugin.forward_prefetch, + "use_orig_params": fsdp_plugin.use_orig_params, + "param_init_fn": fsdp_plugin.param_init_fn, + "ignored_modules": fsdp_plugin.ignored_modules, + "limit_all_gathers": fsdp_plugin.limit_all_gathers, + "device_id": self.device, + } + + if isinstance(kwargs["ignored_modules"], str): + reg = re.compile(kwargs["ignored_modules"]) + ignored = [] + for name, module in model.named_modules(): + if reg.fullmatch(name): + # ensure that the device for these modules is still set correctly + module.to(self.device) + ignored.append(module) + kwargs["ignored_modules"] = ignored + + model = FSDP(model, **kwargs) + if fsdp_plugin.activation_checkpointing: + from torch.distributed.algorithms._checkpoint.checkpoint_wrapper import ( + CheckpointImpl, + apply_activation_checkpointing, + checkpoint_wrapper, + ) + + apply_activation_checkpointing( + model, + checkpoint_wrapper_fn=functools.partial( + checkpoint_wrapper, + checkpoint_impl=CheckpointImpl.NO_REENTRANT, + ), + auto_wrap_policy=fsdp_plugin.auto_wrap_policy, + ) + + # In the event the model had been loaded in low precision, but + # mixed precision had also been activated, then we follow DeepSpeed's + # strategy to hold the parameters in full precision. + # - assume that trainer.args.bf16 and trainer.args.fp16 are already checked against + # fsdp_plugin.mixed_precision_policy. + # - NOTE: we do not check the mixed_precision attribute on the FSDP root wrapper. + # * this attribute will always set by init_utils.init_core_state so its always not None. + # * mixed_precision.param_dtype only regards _fwd_bwd_param_dtype + # * if model is loaded in 16bit, and even if mixed_precision.param_dtype is None, + # we still want to upcast the flat_param. + if self.mixed_precision != "no": # if mixed precision is set + upcasted_log = [] + for module in FSDP.fsdp_modules(model): + # Referencing DeepSpeed Zero3 + # - in Init, params are converted to 16bit while partitioning. + # - in accelerator.prepare, deepspeed.initialize is called to: + # * creates the DeepSpeedEngine. + # * since zero_optimization() is True , calls engine._configure_zero_optimizer. + # + # Inside the DeepSpeed Zero3 optimizer configuration, which initializes + # DeepSpeedZeroOptimizer_Stage3, during which: + # * trainable_param_groups are obtained from the attached optimizer + # (already partitioned in 16bit). + # * then _setup_for_real_optimizer -> _create_fp32_partitions + # which performs the fp32 upcasting. + + # To mimic DeepSeepds's casting in FSDP, we look at the (single) FlatParameter held + # within an FSDP wrapper. This FlatParameter will be seen by the optimizer. + # - even though there is a torch.device('meta') guard below, we + # expect _init_utils._init_param_handle_from_module to already + # sync the parameter. + + if not module._has_params: + continue # skip if FSDP module not managing parameters + param = module._flat_param + if ( + param.dtype != torch.float32 + and param.device != torch.device("meta") + and param.requires_grad + ): + # keep log of names_params that was upcasted + # NOTE: resorted to this because warnings.simplefilter("once") is somehow not working + name_param_log = (module.module.__class__.__name__, ", ".join(module._flat_param._fqns)) + if name_param_log not in upcasted_log: + upcasted_log.append(name_param_log) + + # this works because of FSDP's _runtime_utils.lazy_init. + # Have to be careful not to call anything before this that + # triggers lazy_init (e.g., _is_fsdp_root). + param.data = param.data.to(torch.float32) # upcasting + module._handle._orig_param_dtype = torch.float32 # update + + # report the warnings + # some messages can be quite repetitive, especially when reporting about layers that have identical architecture. + if self.is_main_process: + for name_log, param_log in upcasted_log: + warnings.warn( + f"Upcasted low precision parameters in {name_log} because mixed precision turned on in FSDP. " + f"Affects: {param_log}." + ) + + if len(upcasted_log) > 0: + warnings.warn( + "FSDP upcast of low precision parameters may affect the precision of model checkpoints." + ) + + # if the previous and current models are same, delete the previous one + if len(self._models) > 1 and (self._models[-2] is self._models[-1]): + del self._models[-2] + self._models[-1] = model + elif self.distributed_type == DistributedType.MULTI_CPU: + kwargs = self.ddp_handler.to_kwargs() if self.ddp_handler else {} + model = torch.nn.parallel.DistributedDataParallel(model, **kwargs) + if self.ddp_handler is not None: + self.ddp_handler.register_comm_hook(model) + elif self.distributed_type == DistributedType.XLA and self.state.fork_launched: + model = xmp.MpModelWrapper(model).to(self.device) + # Now we can apply the FP8 autocast + if self.fp8_backend == FP8BackendType.TE and self.delayed_fp8_autocast: + model = apply_fp8_autowrap(model, self.te_recipe_handler or self.fp8_recipe_handler) + # torch.compile should be called last and only if the model isn't already compiled + if self.state.dynamo_plugin.backend != DynamoBackend.NO and not is_compiled_module(model): + if self.state.dynamo_plugin.use_regional_compilation: + model = compile_regions(model, **self.state.dynamo_plugin.to_kwargs()) + else: + model = torch.compile(model, **self.state.dynamo_plugin.to_kwargs()) + return model + + def _prepare_ao(self, *args): + if not is_torchao_available(): + raise ImportError( + "`torchao` was not found on your system or is too old of a version. Please ensure that `torchao >= 0.6.1` is installed" + ) + + if self.is_fsdp2: + models = [x for x in args if isinstance(x, torch.nn.Module)] + optimizers = [x for x in args if isinstance(x, torch.optim.Optimizer)] + for arg in args: + if isinstance(arg, torch.nn.Module): + convert_model_to_fp8_ao( + arg, + config=self.ao_recipe_handler.config, + module_filter_func=self.ao_recipe_handler.module_filter_func, + ) + + # Invariant: with FSDP2, optimizer is always passed to `prepare()` together with model + # We only precompute scales if float8 all gather is enabled, possibly can add a flag for this later + if self.is_fsdp2 and len(optimizers) > 0 and self.ao_recipe_handler.config.enable_fsdp_float8_all_gather: + from torchao.float8 import precompute_float8_dynamic_scale_for_fsdp + + optimizers[0].register_step_post_hook( + lambda *args, **kwargs: precompute_float8_dynamic_scale_for_fsdp(models[0]) + ) + + return args + + def _prepare_te(self, *args): + if not is_transformer_engine_available(): + raise ImportError( + "`transformer_engine` was not found on your system. Please ensure that `transformer_engine` is installed" + ) + model, optimizer = None, None + num_models, num_optimizers = 0, 0 + result = [obj for obj in args] + for obj in result: + if isinstance(obj, torch.nn.Module): + model = obj + num_models += 1 + elif isinstance(obj, (torch.optim.Optimizer)): + optimizer = obj + num_optimizers += 1 + if optimizer is None and model is None: + return result + elif optimizer is None or model is None: + raise ValueError( + "You must pass a model and an optimizer together to `accelerate.prepare()` when using TransformerEngine." + ) + elif num_models > 1 or num_optimizers > 1: + raise ValueError( + f"You can't use multiple models ({num_models}) or optimizers {num_optimizers} with TransformerEngine." + ) + old_named_params = self._get_named_parameters(model) + with torch.no_grad(): + convert_model(model) + new_named_params = self._get_named_parameters(model) + mapping = {p: new_named_params[n] for n, p in old_named_params.items()} + # We need to switch the optimizer params to the new params *after* the model is wrapped in FSDP + for param_group in optimizer.param_groups: + param_group["params"] = [mapping[p] for p in param_group["params"]] + + return result + + def _prepare_deepspeed(self, *args): + import deepspeed + + ds_initialize = deepspeed.initialize + if self.fp8_backend == FP8BackendType.MSAMP: + # MS-AMP requires DeepSpeed patches + from msamp import deepspeed as msamp_deepspeed + + ds_initialize = msamp_deepspeed.initialize + + deepspeed_plugin = self.deepspeed_plugin + + is_dataloader_present = any(isinstance(obj, torch.utils.data.DataLoader) for obj in args) + tp_size = deepspeed_plugin.deepspeed_config.get("tensor_parallel", {}).get("autotp_size", 0) + + sp_backend = self.parallelism_config.sp_backend if self.parallelism_config else None + sp_size = self.parallelism_config.sp_size if self.parallelism_config else 1 + sp_handler = self.parallelism_config.sp_handler if self.parallelism_config else None + + if tp_size > 1: + if not compare_versions("deepspeed", ">=", "0.16.4"): + raise ImportError( + "Deepspeed TP requires deepspeed >= 0.16.4, Please update DeepSpeed via `pip install deepspeed -U`." + ) + if not is_torch_version(">=", "2.2.0"): + raise ImportError( + "Tried to use TP, but `torch.distributed.device_mesh` requires PyTorch >= 2.2.0. Please upgrade your PyTorch version" + ) + from torch.distributed.device_mesh import init_device_mesh + + mesh_dim_name = "tp" + self.state.ds_device_mesh = init_device_mesh(self.device.type, (tp_size,), mesh_dim_names=(mesh_dim_name,)) + + result = [ + self._prepare_one(obj, first_pass=True) if isinstance(obj, torch.utils.data.DataLoader) else obj + for obj in args + ] + + if deepspeed_plugin.is_auto("train_micro_batch_size_per_gpu"): + if is_dataloader_present: + batch_sizes = [obj.batch_size for obj in args if hasattr(obj, "batch_size")] + if any(bs is None for bs in batch_sizes): + raise ValueError( + "At least one of the dataloaders passed to `accelerate.prepare()` has `None` as batch size. " + "Please set an integer value in `train_micro_batch_size_per_gpu` in the deepspeed config file " + "or assign integer value to `AcceleratorState().deepspeed_plugin.deepspeed_config['train_micro_batch_size_per_gpu']`." + ) + if self.split_batches: + batch_sizes = [batch_size // self.num_processes for batch_size in batch_sizes] + + batch_size_per_device = min(batch_sizes) if deepspeed_plugin.is_train_batch_min else max(batch_sizes) + if len(batch_sizes) > 1: + logger.info( + "Since you passed both train and evaluation dataloader, `is_train_batch_min` (here " + f"{deepspeed_plugin.is_train_batch_min} will decide the `train_batch_size` ({batch_size_per_device})." + ) + else: + raise ValueError( + "When using DeepSpeed, `accelerate.prepare()` requires you to pass at least one of training or evaluation dataloaders " + "with `batch_size` attribute returning an integer value " + "or alternatively set an integer value in `train_micro_batch_size_per_gpu` in the deepspeed config file " + "or assign integer value to `AcceleratorState().deepspeed_plugin.deepspeed_config['train_micro_batch_size_per_gpu']`." + ) + else: + batch_size_per_device = deepspeed_plugin.get_value("train_micro_batch_size_per_gpu") + + # handle `gradient_accumulation_steps` when the value is `auto` + deepspeed_plugin.fill_match( + "gradient_accumulation_steps", + must_match=False, + gradient_accumulation_steps=self.gradient_accumulation_steps, + ) + + deepspeed_gradient_accumulation_steps = deepspeed_plugin.get_value("gradient_accumulation_steps") + # update gradient_accumulation_steps if there is a mismatch + if deepspeed_gradient_accumulation_steps != self.gradient_accumulation_steps: + logger.warning( + f"Gradient accumulation steps mismatch: GradientAccumulationPlugin has {self.gradient_accumulation_steps}, " + f"DeepSpeed config has {deepspeed_gradient_accumulation_steps}. Using DeepSpeed's value." + ) + self.gradient_accumulation_steps = deepspeed_gradient_accumulation_steps + + config_kwargs = { + "gradient_clipping": 1.0, + "zero_optimization.stage3_gather_16bit_weights_on_model_save": False, + } + # This block is skipped when preparing just a model and DL is absent from current call's args + if batch_size_per_device is not None: + config_kwargs["train_micro_batch_size_per_gpu"] = batch_size_per_device + config_kwargs["train_batch_size"] = ( + batch_size_per_device + * deepspeed_plugin.get_value("gradient_accumulation_steps") + * self.num_processes + // sp_size + ) + + model = None + optimizer = None + scheduler = None + for obj in result: + if isinstance(obj, torch.nn.Module): + model = obj + elif isinstance(obj, (torch.optim.Optimizer, DummyOptim)): + optimizer = obj + elif (isinstance(obj, (LRScheduler, DummyScheduler))) or ( + type(obj).__name__ in deepspeed.runtime.lr_schedules.VALID_LR_SCHEDULES + ): + scheduler = obj + + if optimizer is not None: + if "optimizer" in deepspeed_plugin.deepspeed_config and not isinstance(optimizer, (DummyOptim)): + raise ValueError( + "You cannot specify an optimizer in the config file and in the code at the same time. " + "Please remove the optimizer from the config file or " + "create `accelerate.utils.DummyOptim` in the code." + ) + elif "optimizer" not in deepspeed_plugin.deepspeed_config and isinstance(optimizer, (DummyOptim)): + raise ValueError( + "You cannot create a `DummyOptim` without specifying an optimizer in the config file." + ) + + if isinstance(optimizer, (torch.optim.Optimizer)): + deepspeed_plugin.deepspeed_config["zero_allow_untested_optimizer"] = True + + if scheduler is not None: + if "scheduler" in deepspeed_plugin.deepspeed_config and not isinstance(scheduler, (DummyScheduler)): + raise ValueError( + "You cannot specify a scheduler in the config file and in the code at the same time. " + "Please remove the scheduler from the config file or " + "create `accelerate.utils.DummyScheduler` in the code." + ) + elif ( + "scheduler" not in deepspeed_plugin.deepspeed_config + and isinstance(scheduler, (DummyScheduler)) + and scheduler.lr_scheduler_callable is None + ): + raise ValueError( + "Either specify a scheduler in the config file or " + "pass in the `lr_scheduler_callable` parameter when using `accelerate.utils.DummyScheduler`." + ) + + if optimizer is not None and scheduler is not None: + if isinstance(optimizer, (DummyOptim)) and not isinstance(scheduler, (DummyScheduler)): + raise ValueError( + "You can only specify `accelerate.utils.DummyScheduler` in the code when using " + "`accelerate.utils.DummyOptim`." + ) + + if model is not None: + # If we are using FP8, we need to apply the autowrap now + if self.fp8_backend == FP8BackendType.TE: + model = apply_fp8_autowrap(model, self.fp8_recipe_handler) + # if the model is an MOE, set the appropriate MOE layers as leaf Z3 modules + deepspeed_plugin.set_moe_leaf_modules(model) + # deal with config keys that use `auto` value and rely on model's hidden_size + hidden_size_based_keys = [ + "zero_optimization.reduce_bucket_size", + "zero_optimization.stage3_prefetch_bucket_size", + "zero_optimization.stage3_param_persistence_threshold", + ] + hidden_size_auto_keys = [x for x in hidden_size_based_keys if deepspeed_plugin.is_auto(x)] + if len(hidden_size_auto_keys) > 0: + reasoning = ( + "therefore it's not possible to automatically fill out the following `auto` entries " + + f"in the DeepSpeed config file: {hidden_size_auto_keys}. You can fix that by replacing " + + "`auto` values for these keys with an integer value of your choice." + ) + if not hasattr(model, "config"): + raise ValueError("Can't find `model.config` entry, " + reasoning) + + if hasattr(model.config, "hidden_size"): + hidden_size = model.config.hidden_size + elif hasattr(model.config, "hidden_sizes"): + # if there are many hidden sizes pick the largest one + hidden_size = max(model.config.hidden_sizes) + else: + raise ValueError( + "Can find neither `model.config.hidden_size` nor `model.config.hidden_sizes`, " + reasoning + ) + + config_kwargs.update( + { + "zero_optimization.reduce_bucket_size": hidden_size * hidden_size, + "zero_optimization.stage3_prefetch_bucket_size": int(0.9 * hidden_size * hidden_size), + "zero_optimization.stage3_param_persistence_threshold": 10 * hidden_size, + } + ) + + if isinstance(optimizer, (DummyOptim)): + config_kwargs.update( + {"optimizer.params.lr": optimizer.lr, "optimizer.params.weight_decay": optimizer.weight_decay} + ) + if isinstance(scheduler, (DummyScheduler)) and scheduler.lr_scheduler_callable is None: + max_lr = ( + getattr(scheduler.optimizer, "lr", None) + if getattr(scheduler.optimizer, "defaults", None) is None + else scheduler.optimizer.defaults["lr"] + ) + config_kwargs.update( + { + "scheduler.params.warmup_min_lr": 0, + "scheduler.params.warmup_max_lr": max_lr, + "scheduler.params.warmup_num_steps": scheduler.warmup_num_steps, + } + ) + if scheduler.total_num_steps is not None: + config_kwargs["scheduler.params.total_num_steps"] = ( + math.ceil(scheduler.total_num_steps / self.num_processes) + if not self.split_batches + else scheduler.total_num_steps + ) + + deepspeed_plugin.deepspeed_config_process(must_match=False, **config_kwargs) + self.deepspeed_config = deepspeed_plugin.deepspeed_config + + # note: batch_size derivation is all over the map, especiall in HF Trainer, so try to fix it at the last moment if needed + pc = self.parallelism_config + if pc is not None and pc.sp_backend == "deepspeed" and pc.sp_size > 1: + self.deepspeed_config["train_batch_size"] = ( + self.deepspeed_config["train_micro_batch_size_per_gpu"] + * self.deepspeed_config["gradient_accumulation_steps"] + * pc.data_parallel_size + ) + + kwargs = dict(model=model, config_params=self.deepspeed_config) + if optimizer is not None: + if isinstance(optimizer, (DummyOptim)): + kwargs["model_parameters"] = optimizer.params + if isinstance(scheduler, (DummyScheduler)) and scheduler.lr_scheduler_callable is not None: + kwargs["lr_scheduler"] = scheduler.lr_scheduler_callable + else: + if self.deepspeed_config["zero_optimization"].get("offload_optimizer", {}).get( + "device", "none" + ) != "none" and self.deepspeed_config.get("zero_force_ds_cpu_optimizer", True): + if self.device.type == "hpu" and os.environ.get("PT_HPU_LAZY_MODE", "1") == "1": + raise ValueError( + "You can't use an Offload Optimizer with HPU in Lazy Mode. " + "Please set the environment variable `PT_HPU_LAZY_MODE` to `0`." + ) + + optimizer = map_pytorch_optim_to_deepspeed(optimizer) + kwargs["optimizer"] = optimizer + if scheduler is not None: + if type(scheduler).__name__ in deepspeed.runtime.lr_schedules.VALID_LR_SCHEDULES: + kwargs["lr_scheduler"] = scheduler + + if self.device.type == "hpu": + # This env variable is initialized here to make sure it is set to "true" + # It should be done by the launcher but it does not work for multi-node runs + os.environ["DEEPSPEED_USE_HPU"] = "true" + + mpu = None + if sp_size > 1: + if sp_backend != "deepspeed": + raise ValueError( + f"In order to use the configured {sp_size=} with DeepSpeed, you need to configure sp_backend='deepspeed', yet you configured it to be {sp_backend=}." + ) + + ver_min_required = "0.18.2" + if not compare_versions("deepspeed", ">=", ver_min_required): + raise ImportError( + f"Deepspeed ALST/Ulysses requires deepspeed>={ver_min_required}. Please update DeepSpeed via `pip install deepspeed -U`." + ) + + from deepspeed.runtime.sequence_parallel.ulysses_sp import ( + UlyssesSPAttentionHF, + UlyssesSPDataLoaderAdapter, + ) + + if not hasattr(model, "config"): + raise ValueError( + "UlyssesSPAttentionHF currently works with HF Transformers and expects the model object to have a config attribute but this model doesn't have one." + ) + + kwagrs = {} + signature = inspect.signature(UlyssesSPAttentionHF.register_with_transformers) + if "disable_in_eval" in signature.parameters.keys(): + kwagrs["disable_in_eval"] = True + + mpu = UlyssesSPAttentionHF.register_with_transformers( + model_name_or_path=model, + sequence_parallel_size=sp_size, + seq_length=sp_handler.sp_seq_length, + seq_length_is_variable=sp_handler.sp_seq_length_is_variable, + core_attn_implementation=sp_handler.sp_attn_implementation, + micro_batch_size=batch_size_per_device, + **kwagrs, + ) + kwargs["mpu"] = mpu + + for i in range(len(result)): + if isinstance(result[i], torch.utils.data.DataLoader): + if sp_size > 1: + # note that in case dataloader was prepared apart from model (for the external accelerator.prepare call) you'd need to call deepspeed_ulysses_dl_adapter after prepare(model) (see HF Trainer as the use-case) + sp_group = mpu.get_sequence_parallel_group() + sp_world_size = mpu.get_sequence_parallel_world_size() + sp_rank = mpu.get_sequence_parallel_rank() + result[i] = UlyssesSPDataLoaderAdapter( + result[i], + sp_rank=sp_rank, + sp_group=sp_group, + sp_world_size=sp_world_size, + device=self.device, # model.device, + ) + + engine, optimizer, _, lr_scheduler = ds_initialize(**kwargs) + + if compare_versions("deepspeed", ">=", "0.14.4") and self.state.dynamo_plugin.backend != DynamoBackend.NO: + compile_kwargs = self.state.dynamo_plugin.to_kwargs() + if self.state.dynamo_plugin.use_regional_compilation: + compile_regions_deepspeed(engine.module, **compile_kwargs) + else: + engine.compile(backend=compile_kwargs.pop("backend"), compile_kwargs=compile_kwargs) + if optimizer is not None: + optimizer = DeepSpeedOptimizerWrapper(optimizer) + if scheduler is not None: + if lr_scheduler is None: + scheduler = AcceleratedScheduler( + scheduler, + optimizer, + step_with_optimizer=self.step_scheduler_with_optimizer, + split_batches=self.split_batches, + ) + else: + scheduler = DeepSpeedSchedulerWrapper(lr_scheduler, optimizer) + + for i in range(len(result)): + if isinstance(result[i], torch.nn.Module): + result[i] = engine + elif isinstance(result[i], (torch.optim.Optimizer, DummyOptim)): + result[i] = optimizer + elif (isinstance(result[i], (LRScheduler, DummyScheduler))) or ( + type(result[i]).__name__ in deepspeed.runtime.lr_schedules.VALID_LR_SCHEDULES + ): + result[i] = scheduler + + # pointing for deepspeed_engine_wrapped.backward() + if self.deepspeed_engine_wrapped is None: + self.deepspeed_engine_wrapped = DeepSpeedEngineWrapper(engine) + else: + logger.warning( + "A wrapped DeepSpeed engine reference is currently tied for this `Accelerator()` instance. " + "If you want to call `accelerator.backward()` referencing a new model/engine, " + "please create a separate `Accelerator()` instance and call `accelerator.prepare()` on it." + ) + self._models.append(engine) + if optimizer is not None: + self._optimizers.append(optimizer) + if scheduler is not None: + self._schedulers.append(scheduler) + return tuple(result) + + def deepspeed_ulysses_dl_adapter(self, dl, model): + """this is normally called as part of `prepare` but when dataloader was prepared apart from model (for the external accelerator.prepare call) this additional call needs to be made after prepare(model) (see HF Trainer as the use-case)""" + sp_size = self.parallelism_config.sp_size if self.parallelism_config else 1 + if sp_size == 1: + return dl + from deepspeed.runtime.sequence_parallel.ulysses_sp import UlyssesSPDataLoaderAdapter + from deepspeed.utils import groups + + sp_group = groups._get_sequence_parallel_group() + sp_world_size = groups._get_sequence_parallel_world_size() + sp_rank = groups._get_sequence_parallel_rank() + dl = UlyssesSPDataLoaderAdapter( + dl, + sp_rank=sp_rank, + sp_group=sp_group, + sp_world_size=sp_world_size, + device=model.device, + ) + return dl + + def _prepare_megatron_lm(self, *args): + megatron_lm_plugin = self.state.megatron_lm_plugin + micro_batch_size = None + if not megatron_lm_plugin.megatron_dataset_flag: + batch_sizes = [obj.batch_size for obj in args if hasattr(obj, "batch_size")] + if len(batch_sizes) == 0: + raise ValueError( + "You must specify a training or evaluation dataloader in `accelerate.prepare()` when using Megatron-LM." + ) + + micro_batch_size = min(batch_sizes) if megatron_lm_plugin.is_train_batch_min else max(batch_sizes) + if len(batch_sizes) > 1: + logger.info( + "Since you passed both train and evaluation dataloader, `is_train_batch_min` (here " + f"{megatron_lm_plugin.is_train_batch_min} will decide the `train_batch_size` ({micro_batch_size})." + ) + else: + for obj in args: + if isinstance(obj, MegatronLMDummyDataLoader): + micro_batch_size = obj.dataset_args["micro_batch_size"] + break + if micro_batch_size is not None: + dp_degree = self.num_processes // (megatron_lm_plugin.tp_degree * megatron_lm_plugin.pp_degree) + megatron_lm_plugin.set_training_args(micro_batch_size, dp_degree) + else: + raise ValueError( + "When you do not pass the dataloader parameter, the `data_parallel_size`, " + "`micro_batch_size`, and `global_batch_size` megatron parameters will not be updated." + ) + model = None + optimizer = None + scheduler = None + batch_data = None + for obj in args: + if isinstance(obj, torch.utils.data.DataLoader) and batch_data is None: + batch_data = next(iter(obj)) + elif isinstance(obj, torch.nn.Module): + model = obj + elif isinstance(obj, (torch.optim.Optimizer)): + optimizer = obj + elif isinstance(obj, (LRScheduler, MegatronLMDummyScheduler)): + scheduler = obj + + if model is not None: + megatron_lm_plugin.set_network_size_args(model, batch_data) + if optimizer is not None: + megatron_lm_plugin.set_optimizer_type(optimizer) + if scheduler is not None: + if not isinstance(scheduler, MegatronLMDummyScheduler): + raise ValueError( + "You can't use a custom scheduler with Megatron-LM. Please use the `accelerate.utils.MegatronLMDummyScheduler` instead." + ) + megatron_lm_plugin.set_scheduler_args(scheduler) + + # initialize megatron-lm + megatron_lm_initialize(self, args_defaults=megatron_lm_plugin.megatron_lm_default_args) + + (model, optimizer, scheduler) = megatron_lm_prepare_model_optimizer_scheduler(self) + self.wait_for_everyone() + + counter = 0 + result = [] + for obj in args: + if isinstance(obj, torch.utils.data.DataLoader): + result.append(megatron_lm_prepare_data_loader(self, obj)) + counter += 1 + elif isinstance(obj, MegatronLMDummyDataLoader): + if counter == 0: + obj.set_megatron_data_args() + dataloaders = megatron_lm_prepare_data_loader(self, obj) + result.append(dataloaders[counter]) + counter += 1 + else: + result.append(obj) + + if model is not None: + model = MegatronEngine(self, model, optimizer, scheduler) + if optimizer is not None: + optimizer = MegatronLMOptimizerWrapper(optimizer) + if scheduler is not None: + scheduler = MegatronLMSchedulerWrapper(scheduler, optimizer) + + for i in range(len(result)): + if isinstance(result[i], torch.nn.Module): + result[i] = model + elif isinstance(result[i], torch.optim.Optimizer): + result[i] = optimizer + elif isinstance(result[i], MegatronLMDummyScheduler): + result[i] = scheduler + + if model is not None: + self._models.append(model) + if len(self._models) > 1: + raise AssertionError( + "You can't use same `Accelerator()` instance with multiple models when using Megatron-LM" + ) + if optimizer is not None: + self._optimizers.append(optimizer) + if scheduler is not None: + self._schedulers.append(scheduler) + + return tuple(result) + + def _prepare_device_mesh(self): + """ + Prepare the device mesh for distributed training. The dataloader will determine how to load data based on the + device mesh. + """ + if self.distributed_type == DistributedType.DEEPSPEED and hasattr(self.state, "ds_device_mesh"): + return self.state.ds_device_mesh + else: + return self.torch_device_mesh + + def _prepare_msamp(self, *args, device_placement): + warnings.warn( + "MS-AMP is deprecated and will be removed in a future version of Accelerate. " + "Please use `'te'` (Transformer Engine) or `'torchao'` as the backend for FP8 " + "mixed precision training instead.", + FutureWarning, + ) + if not is_msamp_available(): + raise ImportError( + "MS-AMP was not found on your system. Please ensure that MS-AMP is available " + " or choose `'te'` as the backend for FP8 mixed precision training." + ) + # We've already checked for FSDP + MS-AMP during `__init__` + import msamp + + model, optimizer = None, None + optimizer_index = None + num_models, num_optimizers = 0, 0 + result = [obj for obj in args] + for i, obj in enumerate(result): + if isinstance(obj, torch.nn.Module): + model = obj + num_models += 1 + elif isinstance(obj, (torch.optim.Optimizer)): + optimizer = obj + optimizer_index = i + num_optimizers += 1 + # DataLoader/Scheduler case + if optimizer is None and model is None: + return result, device_placement + elif optimizer is None or model is None: + raise ValueError( + "You must pass a model and an optimizer together to `accelerate.prepare()` when using MS-AMP." + ) + elif num_models > 1 or num_optimizers > 1: + raise ValueError( + f"You can't use multiple models ({num_models}) or optimizers {num_optimizers} with MS-AMP." + ) + else: + # DEPRECATE @ 2.0 + if self.fp8_recipe_handler is not None: + opt_level = self.fp8_recipe_handler.opt_level + else: + opt_level = self.msamp_recipe_handler.opt_level + model, optimizer = msamp.initialize(model, optimizer, opt_level=opt_level) + for i in range(len(result)): + if isinstance(result[i], torch.nn.Module): + result[i] = model + elif isinstance(result[i], (torch.optim.Optimizer)): + result[i] = optimizer + if optimizer_index is not None: + # NOTE: MS-AMP moves the optimizer, but *not* the model to the right device + device_placement[optimizer_index] = False + return tuple(result), device_placement + + def prepare_data_loader( + self, data_loader: torch.utils.data.DataLoader, device_placement=None, slice_fn_for_dispatch=None + ): + """ + Prepares a PyTorch DataLoader for training in any distributed setup. It is recommended to use + [`Accelerator.prepare`] instead. + + Args: + data_loader (`torch.utils.data.DataLoader`): + A vanilla PyTorch DataLoader to prepare + device_placement (`bool`, *optional*): + Whether or not to place the batches on the proper device in the prepared dataloader. Will default to + `self.device_placement`. + slice_fn_for_dispatch (`Callable`, *optional*`): + If passed, this function will be used to slice tensors across `num_processes`. Will default to + [`~utils.slice_tensors`]. This argument is used only when `dispatch_batches` is set to `True` and will + be ignored otherwise. + + Example: + + ```python + >>> import torch + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> data_loader = torch.utils.data.DataLoader(...) + >>> data_loader = accelerator.prepare_data_loader(data_loader, device_placement=True) + ``` + """ + # Ensure we can't double wrap a DataLoader due to `find_batch_size` + if getattr(data_loader, "_is_accelerate_prepared", False): + if data_loader not in self._dataloaders: + self._dataloaders.append(data_loader) + return data_loader + if device_placement is None: + device_placement = self.device_placement if self.distributed_type != DistributedType.XLA else False + + device_mesh = self._prepare_device_mesh() + + prepared_data_loader = prepare_data_loader( + data_loader, + self.device, + num_processes=self.num_processes, + process_index=self.process_index, + split_batches=self.split_batches, + put_on_device=device_placement, + rng_types=self.rng_types.copy(), + dispatch_batches=self.dispatch_batches, + even_batches=self.even_batches, + slice_fn_for_dispatch=slice_fn_for_dispatch, + use_seedable_sampler=self.use_seedable_sampler, + data_seed=self.dataloader_config.data_seed, + non_blocking=self.non_blocking, + use_stateful_dataloader=self.use_stateful_dataloader, + torch_device_mesh=device_mesh, + ) + self._dataloaders.append(prepared_data_loader) + return prepared_data_loader + + def prepare_optimizer(self, optimizer: torch.optim.Optimizer, device_placement=None): + """ + Prepares a PyTorch Optimizer for training in any distributed setup. It is recommended to use + [`Accelerator.prepare`] instead. + + Args: + optimizer (`torch.optim.Optimizer`): + A vanilla PyTorch optimizer to prepare + device_placement (`bool`, *optional*): + Whether or not to place the optimizer on the proper device. Will default to `self.device_placement`. + + Example: + + ```python + >>> import torch + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> optimizer = torch.optim.Adam(...) + >>> optimizer = accelerator.prepare_optimizer(optimizer, device_placement=True) + ``` + """ + if is_lomo_available(): + # We need to import locally to avoid circular imports since lomo imports stuff from + # transformers & accelerate + from lomo_optim import AdaLomo, Lomo + + # Support multiple optimizers: https://github.com/huggingface/accelerate/pull/2695#discussion_r1589164607 + self.has_lomo_optimizer |= isinstance(optimizer, (Lomo, AdaLomo)) + + # Ensure we can't double wrap an optimizer due to `find_batch_size` + if getattr(optimizer, "_is_accelerate_prepared", False): + if optimizer not in self._optimizers: + self._optimizers.append(optimizer) + return optimizer + if device_placement is None: + device_placement = self.device_placement + # NOTE: Special case with MS-AMP we do *not* pass in the scaler explicitly to the `AcceleratedOptimizer`, + # Their optimizer handles it for us. + scaler = None if self.fp8_backend == FP8BackendType.MSAMP else self.scaler + optimizer = AcceleratedOptimizer(optimizer, device_placement=device_placement, scaler=scaler) + self._optimizers.append(optimizer) + return optimizer + + def prepare_scheduler(self, scheduler: LRScheduler): + """ + Prepares a PyTorch Scheduler for training in any distributed setup. It is recommended to use + [`Accelerator.prepare`] instead. + + Args: + scheduler (`torch.optim.lr_scheduler.LRScheduler`): + A vanilla PyTorch scheduler to prepare + + Example: + + ```python + >>> import torch + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> optimizer = torch.optim.Adam(...) + >>> scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, ...) + >>> scheduler = accelerator.prepare_scheduler(scheduler) + ``` + """ + # Ensure we can't double wrap a scheduler due to `find_batch_size` + if getattr(scheduler, "_is_accelerate_prepared", False): + if scheduler not in self._schedulers: + self._schedulers.append(scheduler) + return scheduler + # We try to find the optimizer associated with `scheduler`, the default is the full list. + optimizer = self._optimizers + for opt in self._optimizers: + if getattr(scheduler, "optimizer", None) == opt.optimizer: + optimizer = opt + break + scheduler = AcceleratedScheduler( + scheduler, + optimizer, + step_with_optimizer=self.step_scheduler_with_optimizer, + split_batches=self.split_batches, + ) + self._schedulers.append(scheduler) + return scheduler + + def backward(self, loss, **kwargs): + """ + Scales the gradients in accordance to the `GradientAccumulationPlugin` and calls the correct `backward()` based + on the configuration. + + Should be used in lieu of `loss.backward()`. + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator(gradient_accumulation_steps=2) + >>> outputs = model(inputs) + >>> loss = loss_fn(outputs, labels) + >>> accelerator.backward(loss) + ``` + """ + learning_rate = kwargs.get("learning_rate") + + if self.distributed_type != DistributedType.DEEPSPEED: + # deepspeed handles loss scaling by gradient_accumulation_steps in its `backward` + loss = loss / self.gradient_accumulation_steps + if self.distributed_type == DistributedType.DEEPSPEED: + self.deepspeed_engine_wrapped.backward(loss, sync_gradients=self.sync_gradients, **kwargs) + elif self.distributed_type == DistributedType.MEGATRON_LM: + return + elif self.scaler is not None: + self.scaler.scale(loss).backward(**kwargs) + elif learning_rate is not None and self.has_lomo_optimizer: + self.lomo_backward(loss, learning_rate) + else: + loss.backward(**kwargs) + + def set_trigger(self): + """ + Sets the internal trigger tensor to 1 on the current process. A latter check should follow using this which + will check across all processes. + + Note: + Does not require `wait_for_everyone()` + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> # Assume later in the training script + >>> # `should_do_breakpoint` is a custom function to monitor when to break, + >>> # e.g. when the loss is NaN + >>> if should_do_breakpoint(loss): + ... accelerator.set_trigger() + >>> # Assume later in the training script + >>> if accelerator.check_breakpoint(): + ... break + ``` + """ + self.flag_tensor = torch.tensor(1, device=self.device) + + def check_trigger(self): + """ + Checks if the internal trigger tensor has been set to 1 in any of the processes. If so, will return `True` and + reset the trigger tensor to 0. + + Note: + Does not require `wait_for_everyone()` + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> # Assume later in the training script + >>> # `should_do_breakpoint` is a custom function to monitor when to break, + >>> # e.g. when the loss is NaN + >>> if should_do_breakpoint(loss): + ... accelerator.set_trigger() + >>> # Assume later in the training script + >>> if accelerator.check_trigger(): + ... break + ``` + """ + # Now that we are outside `__init__`, we can initialize it if it is `None` on device + if self.flag_tensor is None: + self.flag_tensor = torch.tensor(0, device=self.device) + flag_tensor = self.reduce(self.flag_tensor) + if flag_tensor.item() >= 1: + self.flag_tensor = torch.tensor(0, device=self.device) + return True + return False + + def unscale_gradients(self, optimizer=None): + """ + Unscale the gradients in mixed precision training with AMP. This is a noop in all other settings. + + Likely should be called through [`Accelerator.clip_grad_norm_`] or [`Accelerator.clip_grad_value_`] + + Args: + optimizer (`torch.optim.Optimizer` or `list[torch.optim.Optimizer]`, *optional*): + The optimizer(s) for which to unscale gradients. If not set, will unscale gradients on all optimizers + that were passed to [`~Accelerator.prepare`]. + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> model, optimizer = accelerator.prepare(model, optimizer) + >>> outputs = model(inputs) + >>> loss = loss_fn(outputs, labels) + >>> accelerator.backward(loss) + >>> accelerator.unscale_gradients(optimizer=optimizer) + ``` + """ + if self.native_amp and self.mixed_precision == "fp16": + if optimizer is None: + # TODO: this unscales all optimizers where we should only unscale the one where parameters are. + optimizer = self._optimizers + elif not isinstance(optimizer, (tuple, list)): + optimizer = [optimizer] + for opt in optimizer: + while isinstance(opt, AcceleratedOptimizer): + opt = opt.optimizer + self.scaler.unscale_(opt) + + def clip_grad_norm_(self, parameters, max_norm, norm_type=2): + """ + Should be used in place of `torch.nn.utils.clip_grad_norm_`. + + Returns: + `torch.Tensor`: Total norm of the parameter gradients (viewed as a single vector). + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator(gradient_accumulation_steps=2) + >>> dataloader, model, optimizer, scheduler = accelerator.prepare(dataloader, model, optimizer, scheduler) + + >>> for input, target in dataloader: + ... optimizer.zero_grad() + ... output = model(input) + ... loss = loss_func(output, target) + ... accelerator.backward(loss) + ... if accelerator.sync_gradients: + ... accelerator.clip_grad_norm_(model.parameters(), max_grad_norm) + ... optimizer.step() + ``` + """ + if self.distributed_type == DistributedType.FSDP: + self.unscale_gradients() + parameters = [p for p in parameters] + for model in self._models: + if parameters == [p for p in model.parameters()]: + if not self.is_fsdp2: + return model.clip_grad_norm_(max_norm, norm_type) + else: + return torch.nn.utils.clip_grad_norm_( + parameters, max_norm, norm_type=norm_type + ) # viz: https://github.com/pytorch/torchtitan/blob/main/docs/fsdp.md + elif self.distributed_type == DistributedType.DEEPSPEED: + # DeepSpeed handles gradient clipping internally, but we can retrieve the gradient norm + if self.deepspeed_engine_wrapped is not None: + return self.deepspeed_engine_wrapped.get_global_grad_norm() + return None + elif self.distributed_type == DistributedType.XLA: + # Reduce gradients first for XLA + for acc_opt in self._optimizers: + if not acc_opt.gradient_state.is_xla_gradients_synced: + opt = acc_opt + while isinstance(opt, AcceleratedOptimizer): + opt = opt.optimizer + gradients = xm._fetch_gradients(opt) + # Use xm.all_reduce to perform an in-place all-reduce. Recursive all-reduce each tensor + # one by one in self.reduce is non-inplace. + xm.all_reduce("sum", gradients, scale=1.0 / self.num_processes) + # Set is_xla_gradients_synced to True to avoid all-reduce twice in the AcceleratedOptimizer step. + acc_opt.gradient_state.is_xla_gradients_synced = True + if os.environ.get("ACCELERATE_USE_FSDP", "false").lower() == "true": + self.unscale_gradients() + parameters = [p for p in parameters] + for model in self._models: + if parameters == [p for p in model.parameters()]: + return model.clip_grad_norm_(max_norm, norm_type) + self.unscale_gradients() + return torch.nn.utils.clip_grad_norm_(parameters, max_norm, norm_type=norm_type) + + def clip_grad_value_(self, parameters, clip_value): + """ + Should be used in place of `torch.nn.utils.clip_grad_value_`. + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator(gradient_accumulation_steps=2) + >>> dataloader, model, optimizer, scheduler = accelerator.prepare(dataloader, model, optimizer, scheduler) + + >>> for input, target in dataloader: + ... optimizer.zero_grad() + ... output = model(input) + ... loss = loss_func(output, target) + ... accelerator.backward(loss) + ... if accelerator.sync_gradients: + ... accelerator.clip_grad_value_(model.parameters(), clip_value) + ... optimizer.step() + ``` + """ + if self.distributed_type in [DistributedType.DEEPSPEED, DistributedType.FSDP]: + raise Exception("DeepSpeed and FSDP do not support `clip_grad_value_`. Use `clip_grad_norm_` instead.") + self.unscale_gradients() + torch.nn.utils.clip_grad_value_(parameters, clip_value) + + def gather(self, tensor): + """ + Gather the values in *tensor* across all processes and concatenate them on the first dimension. Useful to + regroup the predictions from all processes when doing evaluation. + + Note: + This gather happens in all processes. + + Args: + tensor (`torch.Tensor`, or a nested tuple/list/dictionary of `torch.Tensor`): + The tensors to gather across all processes. + + Returns: + `torch.Tensor`, or a nested tuple/list/dictionary of `torch.Tensor`: The gathered tensor(s). Note that the + first dimension of the result is *num_processes* multiplied by the first dimension of the input tensors. + + Example: + + ```python + >>> # Assuming four processes + >>> import torch + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> process_tensor = torch.tensor([accelerator.process_index], device=accelerator.device) + >>> gathered_tensor = accelerator.gather(process_tensor) + >>> gathered_tensor + tensor([0, 1, 2, 3]) + ``` + """ + return gather(tensor) + + def gather_for_metrics(self, input_data, use_gather_object=False): + """ + Gathers `input_data` and potentially drops duplicates in the last batch if on a distributed system. Should be + used for gathering the inputs and targets for metric calculation. + + Args: + input (`torch.Tensor`, `object`, a nested tuple/list/dictionary of `torch.Tensor`, or a nested tuple/list/dictionary of `object`): + The tensors or objects for calculating metrics across all processes + use_gather_object(`bool`): + Whether to forcibly use gather_object instead of gather (which is already done if all objects passed do + not contain tensors). This flag can be useful for gathering tensors with different sizes that we don't + want to pad and concatenate along the first dimension. Using it with GPU tensors is not well supported + and inefficient as it incurs GPU -> CPU transfer since tensors would be pickled. + + Example: + + ```python + >>> # Assuming two processes, with a batch size of 5 on a dataset with 9 samples + >>> import torch + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> dataloader = torch.utils.data.DataLoader(range(9), batch_size=5) + >>> dataloader = accelerator.prepare(dataloader) + >>> batch = next(iter(dataloader)) + >>> gathered_items = accelerator.gather_for_metrics(batch) + >>> len(gathered_items) + 9 + ``` + """ + + try: + recursively_apply(lambda x: x, input_data, error_on_other_type=True) + all_tensors = True + except TypeError: + all_tensors = False + + use_gather_object = use_gather_object or not all_tensors + + if use_gather_object: + data = gather_object(input_data) + else: + data = self.gather(input_data) + + try: + if self.gradient_state.end_of_dataloader: + # at the end of a dataloader, `gather_for_metrics` regresses to + # `gather` unless the dataset has a remainder so log. + if self.gradient_state.remainder == -1: + logger.info( + "The used dataset had no length, returning gathered tensors. You should drop the remainder yourself." + ) + return data + elif self.gradient_state.remainder > 0: + # Last batch needs to be truncated on distributed systems as it contains additional samples + def _adjust_samples(tensor): + return tensor[: self.gradient_state.remainder] + + if use_gather_object: + # gather_object put the objects in a list + return _adjust_samples(data) + else: + return recursively_apply(_adjust_samples, data) + else: # remainder is 0 + # no remainder even though at end of dataloader, so nothing to do. + return data + else: + # Not at the end of the dataloader, no need to adjust the tensors + return data + except Exception: + # Dataset had no length or raised an error + return data + + def reduce(self, tensor, reduction="sum", scale=1.0): + """ + Reduce the values in *tensor* across all processes based on *reduction*. + + Note: + All processes get the reduced value. + + Args: + tensor (`torch.Tensor`, or a nested tuple/list/dictionary of `torch.Tensor`): + The tensors to reduce across all processes. + reduction (`str`, *optional*, defaults to "sum"): + A reduction type, can be one of 'sum', 'mean', or 'none'. If 'none', will not perform any operation. + scale (`float`, *optional*, defaults to 1.0): + A default scaling value to be applied after the reduce, only valid on XLA. + + Returns: + `torch.Tensor`, or a nested tuple/list/dictionary of `torch.Tensor`: + The reduced tensor(s). + + Example: + + ```python + >>> # Assuming two processes + >>> import torch + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> process_tensor = torch.arange(accelerator.num_processes) + 1 + (2 * accelerator.process_index) + >>> process_tensor = process_tensor.to(accelerator.device) + >>> reduced_tensor = accelerator.reduce(process_tensor, reduction="sum") + >>> reduced_tensor + tensor([4, 6]) + ``` + """ + return reduce(tensor, reduction, scale) + + def pad_across_processes(self, tensor, dim=0, pad_index=0, pad_first=False): + """ + Recursively pad the tensors in a nested list/tuple/dictionary of tensors from all devices to the same size so + they can safely be gathered. + + Args: + tensor (nested list/tuple/dictionary of `torch.Tensor`): + The data to gather. + dim (`int`, *optional*, defaults to 0): + The dimension on which to pad. + pad_index (`int`, *optional*, defaults to 0): + The value with which to pad. + pad_first (`bool`, *optional*, defaults to `False`): + Whether to pad at the beginning or the end. + + Returns: + `torch.Tensor`, or a nested tuple/list/dictionary of `torch.Tensor`: + The padded tensor(s). + + Example: + + ```python + >>> # Assuming two processes, with the first processes having a tensor of size 1 and the second of size 2 + >>> import torch + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> process_tensor = torch.arange(accelerator.process_index + 1).to(accelerator.device) + >>> padded_tensor = accelerator.pad_across_processes(process_tensor) + >>> padded_tensor.shape + torch.Size([2]) + ``` + """ + return pad_across_processes(tensor, dim=dim, pad_index=pad_index, pad_first=pad_first) + + def unwrap_model(self, model, keep_fp32_wrapper: bool = True, keep_torch_compile: bool = True): + """ + Unwraps the `model` from the additional layer possible added by [`~Accelerator.prepare`]. Useful before saving + the model. + + Args: + model (`torch.nn.Module`): + The model to unwrap. + keep_fp32_wrapper (`bool`, *optional*, defaults to `True`): + Whether to not remove the mixed precision hook if it was added. + keep_torch_compile (`bool`, *optional*, defaults to `True`): + Whether to not unwrap compiled model if compiled. + Returns: + `torch.nn.Module`: The unwrapped model. + + Example: + + ```python + >>> # Assuming two GPU processes + >>> from torch.nn.parallel import DistributedDataParallel + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> model = accelerator.prepare(MyModel()) + >>> print(model.__class__.__name__) + DistributedDataParallel + + >>> model = accelerator.unwrap_model(model) + >>> print(model.__class__.__name__) + MyModel + ``` + """ + return extract_model_from_parallel(model, keep_fp32_wrapper, keep_torch_compile) + + def wait_for_everyone(self): + """ + Will stop the execution of the current process until every other process has reached that point (so this does + nothing when the script is only run in one process). Useful to do before saving a model. + + Example: + + ```python + >>> # Assuming two GPU processes + >>> import time + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> if accelerator.is_main_process: + ... time.sleep(2) + >>> else: + ... print("I'm waiting for the main process to finish its sleep...") + >>> accelerator.wait_for_everyone() + >>> # Should print on every process at the same time + >>> print("Everyone is here") + ``` + """ + wait_for_everyone() + + @on_main_process + def init_trackers(self, project_name: str, config: dict | None = None, init_kwargs: dict | None = {}): + """ + Initializes a run for all trackers stored in `self.log_with`, potentially with starting configurations + + Args: + project_name (`str`): + The name of the project. All trackers will save their data based on this + config (`dict`, *optional*): + Optional starting configuration to be logged. + init_kwargs (`dict`, *optional*): + A nested dictionary of kwargs to be passed to a specific tracker's `__init__` function. Should be + formatted like so: + ```python + {"wandb": {"tags": ["tag_a", "tag_b"]}} + ``` + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator(log_with="tensorboard") + >>> accelerator.init_trackers( + ... project_name="my_project", + ... config={"learning_rate": 0.001, "batch_size": 32}, + ... init_kwargs={"tensorboard": {"flush_secs": 60}}, + ... ) + ``` + """ + for tracker in self.log_with: + if issubclass(type(tracker), GeneralTracker): + # Custom trackers are already initialized + self.trackers.append(tracker) + else: + tracker_init = LOGGER_TYPE_TO_CLASS[str(tracker)] + if tracker_init.requires_logging_directory: + # We can skip this check since it was done in `__init__` + self.trackers.append( + tracker_init(project_name, self.logging_dir, **init_kwargs.get(str(tracker), {})) + ) + else: + self.trackers.append(tracker_init(project_name, **init_kwargs.get(str(tracker), {}))) + + for tracker in self.trackers: + tracker.start() + + if config is not None: + for tracker in self.trackers: + tracker.store_init_configuration(config) + + def get_tracker(self, name: str, unwrap: bool = False): + """ + Returns a `tracker` from `self.trackers` based on `name` on the main process only. + + Args: + name (`str`): + The name of a tracker, corresponding to the `.name` property. + unwrap (`bool`): + Whether to return the internal tracking mechanism or to return the wrapped tracker instead + (recommended). + + Returns: + `GeneralTracker`: The tracker corresponding to `name` if it exists. + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator(log_with="tensorboard") + >>> accelerator.init_trackers("my_project") + >>> tensorboard_tracker = accelerator.get_tracker("tensorboard") + ``` + """ + if len(self.trackers) > 0: + for tracker in self.trackers: + if tracker.name == name: + return tracker.tracker if unwrap else tracker + raise ValueError(f"{name} is not an available tracker stored inside the `Accelerator`.") + # Handle tracker only made on main process + return GeneralTracker(_blank=True) + + @on_main_process + def log(self, values: dict, step: int | None = None, log_kwargs: dict | None = {}): + """ + Logs `values` to all stored trackers in `self.trackers` on the main process only. + + Args: + values (`dict`): + Values should be a dictionary-like object containing only types `int`, `float`, or `str`. + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + log_kwargs (`dict`, *optional*): + A nested dictionary of kwargs to be passed to a specific tracker's `log` function. Should be formatted + like so: + ```python + {"wandb": {"tags": ["tag_a", "tag_b"]}} + ``` + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator(log_with="tensorboard") + >>> accelerator.init_trackers("my_project") + >>> accelerator.log({"loss": 0.5, "accuracy": 0.9}) + ``` + """ + for tracker in self.trackers: + tracker.log(values, step=step, **log_kwargs.get(tracker.name, {})) + + def end_training(self): + """ + Runs any special end training behaviors, such as stopping trackers on the main process only or destoying + process group. Should always be called at the end of your script if using experiment tracking. + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator(log_with="tensorboard") + >>> accelerator.init_trackers("my_project") + >>> # Do training + >>> accelerator.end_training() + ``` + """ + for tracker in self.trackers: + tracker.finish() + + self.state.destroy_process_group() + + def save(self, obj, f, safe_serialization=False): + """ + Save the object passed to disk once per machine. Use in place of `torch.save`. + + Args: + obj (`object`): The object to save. + f (`str` or `os.PathLike`): Where to save the content of `obj`. + safe_serialization (`bool`, *optional*, defaults to `False`): Whether to save `obj` using `safetensors` + + Note: + If `save_on_each_node` was passed in as a `ProjectConfiguration`, will save the object once per node, + rather than only once on the main node. + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> arr = [0, 1, 2, 3] + >>> accelerator.save(arr, "array.pkl") + ``` + """ + save( + obj, + f, + save_on_each_node=self.project_configuration.save_on_each_node, + safe_serialization=safe_serialization, + ) + + def save_model( + self, + model: torch.nn.Module, + save_directory: Union[str, os.PathLike], + max_shard_size: Union[int, str] = "10GB", + safe_serialization: bool = True, + ): + """ + Save a model so that it can be re-loaded using load_checkpoint_in_model + + Arguments: + model: (`torch.nn.Module`): + Model to be saved. The model can be wrapped or unwrapped. + save_directory (`str` or `os.PathLike`): + Directory to which to save. Will be created if it doesn't exist. + max_shard_size (`int` or `str`, *optional*, defaults to `"10GB"`): + The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size + lower than this size. If expressed as a string, needs to be digits followed by a unit (like `"5MB"`). + + + + If a single weight of the model is bigger than `max_shard_size`, it will be in its own checkpoint shard + which will be bigger than `max_shard_size`. + + + + safe_serialization (`bool`, *optional*, defaults to `True`): + Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> model = ... + >>> accelerator.save_model(model, save_directory) + ``` + """ + + if os.path.isfile(save_directory): + logger.error(f"Provided path ({save_directory}) should be a directory, not a file") + return + + # get the state_dict of the model + if any(has_offloaded_params(module) for module in model.modules()): + state_dict = get_state_dict_offloaded_model(model) + else: + if any(param.device == torch.device("meta") for param in model.parameters()): + raise RuntimeError("You can't save the model since some parameters are on the meta device.") + state_dict = self.get_state_dict(model) + + # Case: DeepSpeed zero3 gets gathered and `state_dict` is empty + if state_dict is None: + return + os.makedirs(save_directory, exist_ok=True) + + if safe_serialization: + state_dict = clean_state_dict_for_safetensors(state_dict) + weights_name = SAFE_WEIGHTS_NAME if safe_serialization else WEIGHTS_NAME + filename_pattern = SAFE_WEIGHTS_PATTERN_NAME if safe_serialization else WEIGHTS_PATTERN_NAME + + from huggingface_hub import split_torch_state_dict_into_shards + + state_dict_split = split_torch_state_dict_into_shards( + state_dict, filename_pattern=filename_pattern, max_shard_size=max_shard_size + ) + + # Clean the folder from a previous save + for filename in os.listdir(save_directory): + full_filename = os.path.join(save_directory, filename) + # If we have a shard file that is not going to be replaced, we delete it, but only from the main process + # in distributed settings to avoid race conditions. + weights_no_suffix = weights_name.replace(".bin", "") + + # make sure that file to be deleted matches format of sharded file, e.g. pytorch_model-00001-of-00005 + filename_no_suffix = filename.replace(".bin", "") + reg = re.compile(r"(.*?)-\d{5}-of-\d{5}") + + if ( + filename.startswith(weights_no_suffix) + and os.path.isfile(full_filename) + and filename not in state_dict_split.filename_to_tensors.keys() + and reg.fullmatch(filename_no_suffix) is not None + and PartialState().is_main_process + ): + os.remove(full_filename) + + # Save the model + for filename, tensors in state_dict_split.filename_to_tensors.items(): + shard = {tensor: state_dict[tensor] for tensor in tensors} + self.save(shard, os.path.join(save_directory, filename), safe_serialization=safe_serialization) + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = SAFE_WEIGHTS_INDEX_NAME if safe_serialization else WEIGHTS_INDEX_NAME + save_index_file = os.path.join(save_directory, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + logger.info( + f"The model is bigger than the maximum size per checkpoint ({max_shard_size}) and is going to be " + f"split in {len(state_dict_split.filename_to_tensors)} checkpoint shards. You can find where each parameters has been saved in the " + f"index located at {save_index_file}." + ) + else: + path_to_weights = os.path.join(save_directory, WEIGHTS_NAME) + logger.info(f"Model weights saved in {path_to_weights}") + + def register_save_state_pre_hook(self, hook: Callable[..., None]) -> hooks.RemovableHandle: + """ + Registers a pre hook to be run before `save_checkpoint` is called in [`Accelerator.save_state`]. + + Args: + hook (`Callable`): + A function to be called in [`Accelerator.save_state`] before `save_checkpoint`. + + The hook should have the following signature: + + `hook(models: list[torch.nn.Module], weights: list[dict[str, torch.Tensor]], input_dir: str) -> None` + + The `models` argument are the models as saved in the accelerator state under `accelerator._models`, `weights` + argument are the state dicts of the `models`, and the `input_dir` argument is the `input_dir` argument passed + to [`Accelerator.load_state`]. + + + + Should only be used in conjunction with [`Accelerator.register_load_state_pre_hook`]. Can be useful to save + configurations in addition to model weights. Can also be used to overwrite model saving with a customized + method. In this case, make sure to remove already loaded weights from the weights list. + + + + Returns: + `torch.utils.hooks.RemovableHandle`: a handle that can be used to remove the added hook by calling + `handle.remove()` + """ + handle = hooks.RemovableHandle(self._save_model_state_pre_hook) + self._save_model_state_pre_hook[handle.id] = hook + return handle + + def save_state(self, output_dir: str | None = None, safe_serialization: bool = True, **save_model_func_kwargs): + """ + Saves the current states of the model, optimizer, scaler, RNG generators, and registered objects to a folder. + + If a `ProjectConfiguration` was passed to the `Accelerator` object with `automatic_checkpoint_naming` enabled + then checkpoints will be saved to `self.project_dir/checkpoints`. If the number of current saves is greater + than `total_limit` then the oldest save is deleted. Each checkpoint is saved in separate folders named + `checkpoint_`. + + Otherwise they are just saved to `output_dir`. + + + + Should only be used when wanting to save a checkpoint during training and restoring the state in the same + environment. + + + + Args: + output_dir (`str` or `os.PathLike`): + The name of the folder to save all relevant weights and states. + safe_serialization (`bool`, *optional*, defaults to `True`): + Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + save_model_func_kwargs (`dict`, *optional*): + Additional keyword arguments for saving model which can be passed to the underlying save function, such + as optional arguments for DeepSpeed's `save_checkpoint` function. + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> model, optimizer, lr_scheduler = ... + >>> model, optimizer, lr_scheduler = accelerator.prepare(model, optimizer, lr_scheduler) + >>> accelerator.save_state(output_dir="my_checkpoint") + ``` + """ + if self.project_configuration.automatic_checkpoint_naming: + output_dir = os.path.join(self.project_dir, "checkpoints") + os.makedirs(output_dir, exist_ok=True) + if self.project_configuration.automatic_checkpoint_naming: + folders = [os.path.join(output_dir, folder) for folder in os.listdir(output_dir)] + if ( + self.project_configuration.total_limit is not None + and (len(folders) + 1 > self.project_configuration.total_limit) + and self.is_main_process + ): + + def _inner(folder): + return list(map(int, re.findall(r"[\/]?([0-9]+)(?=[^\/]*$)", folder)))[0] + + folders.sort(key=_inner) + logger.warning( + f"Deleting {len(folders) + 1 - self.project_configuration.total_limit} checkpoints to make room for new checkpoint." + ) + for folder in folders[: len(folders) + 1 - self.project_configuration.total_limit]: + shutil.rmtree(folder) + output_dir = os.path.join(output_dir, f"checkpoint_{self.save_iteration}") + if os.path.exists(output_dir): + raise ValueError( + f"Checkpoint directory {output_dir} ({self.save_iteration}) already exists. Please manually override `self.save_iteration` with what iteration to start with." + ) + self.wait_for_everyone() + os.makedirs(output_dir, exist_ok=True) + logger.info(f"Saving current state to {output_dir}") + + if self.distributed_type == DistributedType.XLA: + # Finish running the previous step before checkpointing + xm.mark_step() + + # Save the models taking care of FSDP and DeepSpeed nuances + weights = [] + for i, model in enumerate(self._models): + if self.distributed_type == DistributedType.FSDP: + logger.info("Saving FSDP model") + save_fsdp_model(self.state.fsdp_plugin, self, model, output_dir, i) + logger.info(f"FSDP Model saved to output dir {output_dir}") + elif self.distributed_type == DistributedType.DEEPSPEED: + logger.info("Saving DeepSpeed Model and Optimizer") + ckpt_id = f"{MODEL_NAME}" if i == 0 else f"{MODEL_NAME}_{i}" + model.save_checkpoint(output_dir, ckpt_id, **save_model_func_kwargs) + logger.info(f"DeepSpeed Model and Optimizer saved to output dir {os.path.join(output_dir, ckpt_id)}") + elif self.distributed_type == DistributedType.MEGATRON_LM: + logger.info("Saving Megatron-LM Model, Optimizer and Scheduler") + model.save_checkpoint(output_dir) + logger.info(f"Megatron-LM Model , Optimizer and Scheduler saved to output dir {output_dir}") + else: + weights.append(self.get_state_dict(model, unwrap=False)) + + # Save the optimizers taking care of FSDP and DeepSpeed nuances + optimizers = [] + if self.distributed_type == DistributedType.FSDP: + for i, opt in enumerate(self._optimizers): + logger.info("Saving FSDP Optimizer") + save_fsdp_optimizer(self.state.fsdp_plugin, self, opt, self._models[i], output_dir, i) + logger.info(f"FSDP Optimizer saved to output dir {output_dir}") + elif self.distributed_type not in [DistributedType.DEEPSPEED, DistributedType.MEGATRON_LM]: + optimizers = self._optimizers + + # Save the lr schedulers taking care of DeepSpeed nuances + schedulers = [] + if self.distributed_type == DistributedType.DEEPSPEED: + for i, scheduler in enumerate(self._schedulers): + if isinstance(scheduler, DeepSpeedSchedulerWrapper): + continue + schedulers.append(scheduler) + elif self.distributed_type not in [DistributedType.MEGATRON_LM]: + schedulers = self._schedulers + + # Save the samplers of the dataloaders + dataloaders = self._dataloaders + + # Call model loading hooks that might have been registered with + # accelerator.register_model_state_hook + for hook in self._save_model_state_pre_hook.values(): + hook(self._models, weights, output_dir) + + save_location = save_accelerator_state( + output_dir, + weights, + optimizers, + schedulers, + dataloaders, + self.state.process_index, + self.step, + self.scaler, + save_on_each_node=self.project_configuration.save_on_each_node, + safe_serialization=safe_serialization, + ) + for i, obj in enumerate(self._custom_objects): + save_custom_state(obj, output_dir, i, save_on_each_node=self.project_configuration.save_on_each_node) + self.project_configuration.iteration += 1 + return save_location + + def register_load_state_pre_hook(self, hook: Callable[..., None]) -> hooks.RemovableHandle: + """ + Registers a pre hook to be run before [`load_checkpoint`] is called in [`Accelerator.load_state`]. + + Args: + hook (`Callable`): + A function to be called in [`Accelerator.load_state`] before `load_checkpoint`. + + The hook should have the following signature: + + `hook(models: list[torch.nn.Module], input_dir: str) -> None` + + The `models` argument are the models as saved in the accelerator state under `accelerator._models`, and the + `input_dir` argument is the `input_dir` argument passed to [`Accelerator.load_state`]. + + + + Should only be used in conjunction with [`Accelerator.register_save_state_pre_hook`]. Can be useful to load + configurations in addition to model weights. Can also be used to overwrite model loading with a customized + method. In this case, make sure to remove already loaded models from the models list. + + + + Returns: + `torch.utils.hooks.RemovableHandle`: a handle that can be used to remove the added hook by calling + `handle.remove()` + """ + handle = hooks.RemovableHandle(self._load_model_state_pre_hook) + self._load_model_state_pre_hook[handle.id] = hook + return handle + + def load_state(self, input_dir: str | None = None, load_kwargs: dict | None = None, **load_model_func_kwargs): + """ + Loads the current states of the model, optimizer, scaler, RNG generators, and registered objects. + + + + Should only be used in conjunction with [`Accelerator.save_state`]. If a file is not registered for + checkpointing, it will not be loaded if stored in the directory. + + + + Args: + input_dir (`str` or `os.PathLike`): + The name of the folder all relevant weights and states were saved in. Can be `None` if + `automatic_checkpoint_naming` is used, and will pick up from the latest checkpoint. + load_kwargs (`dict`, *optional*): + Additional keyword arguments for the underlying `load` function, such as optional arguments for + state_dict and optimizer on. + load_model_func_kwargs (`dict`, *optional*): + Additional keyword arguments for loading model which can be passed to the underlying load function, + such as optional arguments for DeepSpeed's `load_checkpoint` function or a `map_location` to load the + model and optimizer on. + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> model, optimizer, lr_scheduler = ... + >>> model, optimizer, lr_scheduler = accelerator.prepare(model, optimizer, lr_scheduler) + >>> accelerator.load_state("my_checkpoint") + ``` + """ + if input_dir is not None: + # Check if folder exists + input_dir = os.path.expanduser(input_dir) + if not os.path.isdir(input_dir): + raise ValueError(f"Tried to find {input_dir} but folder does not exist") + elif self.project_configuration.automatic_checkpoint_naming: + # Pick up from automatic checkpoint naming + input_dir = os.path.join(self.project_dir, "checkpoints") + folders = [os.path.join(input_dir, folder) for folder in os.listdir(input_dir)] + + def _inner(folder): + return list(map(int, re.findall(r"[\/]?([0-9]+)(?=[^\/]*$)", folder)))[0] + + folders.sort(key=_inner) + input_dir = folders[-1] + else: + raise ValueError("No input_dir provided and automatic checkpoint naming is disabled.") + logger.info(f"Loading states from {input_dir}") + + # Load the models taking care of FSDP and DeepSpeed nuances + models = [] + for i, model in enumerate(self._models): + if self.distributed_type == DistributedType.FSDP: + logger.info("Loading FSDP model") + load_fsdp_model(self.state.fsdp_plugin, self, model, input_dir, i) + logger.info(f"FSDP Model loaded from input dir {input_dir}") + elif self.distributed_type == DistributedType.DEEPSPEED: + logger.info("Loading DeepSpeed Model and Optimizer") + ckpt_id = f"{MODEL_NAME}" if i == 0 else f"{MODEL_NAME}_{i}" + model.load_checkpoint(input_dir, ckpt_id, **load_model_func_kwargs) + logger.info(f"DeepSpeed Model and Optimizer loaded from input dir {os.path.join(input_dir, ckpt_id)}") + elif self.distributed_type == DistributedType.MEGATRON_LM: + logger.info("Loading Megatron-LM Model, Optimizer and Scheduler") + model.load_checkpoint(input_dir) + logger.info(f"Megatron-LM Model , Optimizer and Scheduler loaded from input dir {input_dir}") + else: + models.append(model) + + # We need to load the scaler state before the optimizer for FSDP2 + # (`torch.distributed.checkpoint.set_optimizer_state_dict`) which we use to set the state of the optimizer calls `optimizer.step` on + # a dummy tensor, but since the scaler is not initialized, it will raise an error (the scaler exists but its `_scale` is None) + scaler = None + if self.scaler is not None and self.is_fsdp2: + input_scaler_file = os.path.join(input_dir, SCALER_NAME) + scaler_state = torch.load(input_scaler_file) + self.scaler.load_state_dict(scaler_state) + # We also need to call the `_lazy_init_scale_growth_tracker` to initialize the scaler, as it would else be called + # on the first call to scale + self.scaler._lazy_init_scale_growth_tracker(self.scaler._device) + logger.info("GradScaler state loaded successfully") + else: + scaler = self.scaler + + # Load the optimizers taking care of FSDP and DeepSpeed nuances + optimizers = [] + if self.distributed_type == DistributedType.FSDP: + for i, opt in enumerate(self._optimizers): + logger.info("Loading FSDP Optimizer") + load_fsdp_optimizer(self.state.fsdp_plugin, self, opt, self._models[i], input_dir, i) + logger.info(f"FSDP Optimizer loaded from input dir {input_dir}") + elif self.distributed_type not in [DistributedType.DEEPSPEED, DistributedType.MEGATRON_LM]: + optimizers = self._optimizers + + # Load the lr schedulers taking care of DeepSpeed nuances + schedulers = [] + if self.distributed_type == DistributedType.DEEPSPEED: + for i, scheduler in enumerate(self._schedulers): + if isinstance(scheduler, DeepSpeedSchedulerWrapper): + continue + schedulers.append(scheduler) + elif self.distributed_type not in [DistributedType.MEGATRON_LM]: + schedulers = self._schedulers + + dataloaders = self._dataloaders + + # Call model loading hooks that might have been registered with + # accelerator.register_model_state_hook + for hook in self._load_model_state_pre_hook.values(): + hook(models, input_dir) + + map_location = load_model_func_kwargs.pop("map_location", None) + if map_location is None: + if self.num_processes > 1 and self.multi_device and self.distributed_type != DistributedType.MULTI_XPU: + map_location = "on_device" + else: + map_location = "cpu" + + override_attributes = load_accelerator_state( + input_dir, + models, + optimizers, + schedulers, + dataloaders, + self.state.process_index, + scaler, + map_location, + load_kwargs, + **load_model_func_kwargs, + ) + if "step" in override_attributes: + self.step = override_attributes["step"] + custom_checkpoints = [ + f for f in os.listdir(input_dir) if re.search(r"^custom_checkpoint_\d+\.pkl$", f) is not None + ] + if len(custom_checkpoints) != len(self._custom_objects): + err = ( + f"Number of custom checkpoints in folder {input_dir} does not match the number of registered objects:" + ) + err += f"\n\tFound checkpoints: {len(custom_checkpoints)}" + err += f"\n\tRegistered objects: {len(self._custom_objects)}\n" + err += "Please make sure to only load checkpoints from folders that were created with the same set of registered objects," + err += "or avoid using `custom_checkpoint` in the filename for files in that same directory and load them in manually." + raise RuntimeError(err) + else: + logger.info(f"Loading in {len(custom_checkpoints)} custom states") + for index, obj in enumerate(self._custom_objects): + load_custom_state(obj, input_dir, index) + + def free_memory(self, *objects): + """ + Will release all references to the internal objects stored and call the garbage collector. You should call this + method between two trainings with different models/optimizers. Also will reset `Accelerator.step` to 0. + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> model, optimizer, scheduler = ... + >>> model, optimizer, scheduler = accelerator.prepare(model, optimizer, scheduler) + >>> model, optimizer, scheduler = accelerator.free_memory(model, optimizer, scheduler) + ``` + """ + # Deepspeed needs a bit more prep that should be done first + if hasattr(self, "deepspeed_engine_wrapped"): + if self.deepspeed_engine_wrapped is not None: + self.deepspeed_engine_wrapped.engine.destroy() + self.deepspeed_engine_wrapped = None + objects = release_memory(*objects) + self._schedulers = [] + self._optimizers = [] + self._models = [] + self._dataloaders = [] + self.step = 0 + return objects + + def clear(self, *objects): + """ + Alias for [`Accelerate.free_memory`], releases all references to the internal objects stored and call the + garbage collector. You should call this method between two trainings with different models/optimizers. + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> model, optimizer, scheduler = ... + >>> model, optimizer, scheduler = accelerator.prepare(model, optimizer, scheduler) + >>> model, optimizer, scheduler = accelerator.clear(model, optimizer, scheduler) + ``` + """ + return self.free_memory(*objects) + + def _get_named_parameters(self, *args, drop_refs=False): + named_parameters = {} + accessor_mapping = {} + for obj in args: + if isinstance(obj, torch.nn.Module): + obj = extract_model_from_parallel(obj) + if not drop_refs: + named_parameters.update({n: p for n, p in obj.named_parameters()}) + continue + + # we need this bit as `WeightWithDynamic...` returns 0 when `data_ptr()` is called, + # the underlying pointer is actually hidden in `_tensor` attribute + if self.fp8_backend == FP8BackendType.AO: + from torchao.float8.fsdp_utils import WeightWithDynamicFloat8CastTensor + + accessor_mapping[WeightWithDynamicFloat8CastTensor] = "_tensor" + _torch_distributed_available = torch.distributed.is_available() + _is_dtensor_available = _torch_distributed_available and is_torch_version( + ">=", DTENSOR_PYTORCH_VERSION + ) + # we know we're in FSDP2 so DTensor is available + if _is_dtensor_available: + from torch.distributed.tensor import DTensor + + accessor_mapping[DTensor] = "_local_tensor" + + named_parameters.update( + { + n: getattr(p, accessor_mapping[type(p)]).data_ptr() + if type(p) in accessor_mapping + else p.data_ptr() + for n, p in obj.named_parameters() + } + ) + return named_parameters + + def _get_devices(self, *args): + model_device = None + optimizer_device = None + for obj in args: + # Loop through model parameters and stop at the first once we have its device. + if isinstance(obj, torch.nn.Module): + for param in obj.parameters(): + model_device = param.device + break + # Loop through optimizer parameters groups and stop at the first once we have its device. + if isinstance(obj, torch.optim.Optimizer): + for param_group in obj.param_groups: + if len(param_group["params"]) > 0: + optimizer_device = param_group["params"][0].device + break + return (model_device, optimizer_device) + + def get_state_dict(self, model, unwrap=True): + """ + Returns the state dictionary of a model sent through [`Accelerator.prepare`] potentially without full + precision. + + Args: + model (`torch.nn.Module`): + A PyTorch model sent through [`Accelerator.prepare`] + unwrap (`bool`, *optional*, defaults to `True`): + Whether to return the original underlying state_dict of `model` or to return the wrapped state_dict + + Returns: + `dict`: The state dictionary of the model potentially without full precision. + + Example: + + ```python + >>> import torch + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> net = torch.nn.Linear(2, 2) + >>> net = accelerator.prepare(net) + >>> state_dict = accelerator.get_state_dict(net) + ``` + """ + + if self.distributed_type == DistributedType.DEEPSPEED: + zero3_sharding = self.deepspeed_config["zero_optimization"]["stage"] == 3 + tp_sharding = self.deepspeed_config.get("tensor_parallel", {}).get("autotp_size", 0) > 1 + if zero3_sharding or tp_sharding: + if model.zero_gather_16bit_weights_on_model_save(): + ver_min_required = "0.16.4" + if tp_sharding and not compare_versions("deepspeed", ">=", ver_min_required): + raise ImportError( + f"Deepspeed TP requires deepspeed>={ver_min_required}. Please update DeepSpeed via `pip install deepspeed -U`." + ) + state_dict = ( + model._consolidated_16bit_state_dict() + if tp_sharding + else model._zero3_consolidated_16bit_state_dict() + ) + else: + raise ValueError( + "Cannot get 16bit model weights because `stage3_gather_16bit_weights_on_model_save` in DeepSpeed config is False. " + "To save the model weights in 16bit, set `stage3_gather_16bit_weights_on_model_save` to True in DeepSpeed config file or " + "set `zero3_save_16bit_model` to True when using `accelerate config`. " + "To save the full checkpoint, run `model.save_checkpoint(save_dir)` and use `zero_to_fp32.py` to recover weights." + ) + else: + from deepspeed.checkpoint.utils import clone_tensors_for_torch_save + + state_dict = clone_tensors_for_torch_save(self.unwrap_model(model).state_dict()) + elif self.is_fsdp2: + from torch.distributed.checkpoint.state_dict import StateDictOptions, get_model_state_dict + + options = StateDictOptions(full_state_dict=True, broadcast_from_rank0=True, cpu_offload=True) + state_dict = get_model_state_dict(model, options=options) + elif self.distributed_type == DistributedType.FSDP: + from torch.distributed.fsdp import FullStateDictConfig, StateDictType + from torch.distributed.fsdp import FullyShardedDataParallel as FSDP + + full_state_dict_config = FullStateDictConfig(offload_to_cpu=True, rank0_only=True) + with FSDP.state_dict_type(model, StateDictType.FULL_STATE_DICT, full_state_dict_config): + state_dict = model.state_dict() + else: + if unwrap: + model = self.unwrap_model(model) + state_dict = model.state_dict() + + return state_dict + + def register_for_checkpointing(self, *objects): + """ + Makes note of `objects` and will save or load them in during `save_state` or `load_state`. + + These should be utilized when the state is being loaded or saved in the same script. It is not designed to be + used in different scripts. + + + + Every `object` must have a `load_state_dict` and `state_dict` function to be stored. + + + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> # Assume `CustomObject` has a `state_dict` and `load_state_dict` function. + >>> obj = CustomObject() + >>> accelerator.register_for_checkpointing(obj) + >>> accelerator.save_state("checkpoint.pt") + ``` + """ + invalid_objects = [] + for obj in objects: + if not hasattr(obj, "state_dict") or not hasattr(obj, "load_state_dict"): + invalid_objects.append(obj) + if len(invalid_objects) > 0: + err = "All `objects` must include a `state_dict` and `load_state_dict` function to be stored. The following inputs are invalid:" + for index, obj in enumerate(invalid_objects): + err += f"\n\t- Item at index {index}, `{get_pretty_name(obj)}`" + raise ValueError(err) + self._custom_objects.extend(objects) + + @contextmanager + def maybe_context_parallel( + self, + buffers: list[torch.Tensor] | None = None, + buffer_seq_dims: list[int] | None = None, + no_restore_buffers: set[torch.Tensor] | None = None, + ): + """ + A context manager that enables context parallel training. + + Args: + buffers (`list[torch.Tensor]`, `optional`): + Buffers, which are going to be sharded along the sequence dimension. Common examples are inputs, labels + or positional embedding buffers. This context manager will modify these buffers in-place, and after + exiting the context, the buffers will be restored to their original state. To avoid unnecessary + restores, you can use `no_restore_buffers` to specify which buffers don't need to be restored. + buffer_seq_dims (`list[int]`, `optional`): + Sequence dimensions of `buffers`. + no_restore_buffers (`set[torch.Tensor]`, `optional`): + This set must be a subset of `buffers`. Specifies which buffers from `buffers` argument won't be + restored after the context exits. These buffers will be then kept in sharded state. + + + + `context_parallel` is currently supported with FSDP2 and requires `parallelism_config.cp_size` > + 1. If either of these conditions are not met, this context manager will have no effect, though to enable fewer + code changes it will not raise an Exception. + + + + + + This context manager has to be recreated with each training step, as shown in the example below. + + + + Example: + + ```python + >>> for batch in dataloader: + ... with accelerator.maybe_context_parallel( + ... buffers=[batch["input_ids"], batch["attention_mask"]], + ... buffer_seq_dims=[1, 1], + ... no_restore_buffers={batch["input_ids"]}, + ... ): + ... outputs = model(batch) + ... ... + ``` + """ + # We don't need to check FSDP2 as parallelism_config does that for us + # Invariant: in this branch self._cp_context is set, as it was set by `self._prepare_cp` + if ( + self.parallelism_config + and self.parallelism_config.cp_backend == "torch" + and self.parallelism_config.cp_enabled + ): + with self._cp_context( + buffers=buffers, buffer_seq_dims=buffer_seq_dims, no_restore_buffers=no_restore_buffers + ): + yield + else: + logger.warning_once( + "Context parallel training is not enabled. This context manager will have no effect. " + "To enable it, set `parallelism_config.cp_size` > 1 in the `Accelerator` constructor." + ) + yield + + @contextmanager + def autocast(self, autocast_handler: AutocastKwargs = None): + """ + Will apply automatic mixed-precision inside the block inside this context manager, if it is enabled. Nothing + different will happen otherwise. + + A different `autocast_handler` can be passed in to override the one set in the `Accelerator` object. This is + useful in blocks under `autocast` where you want to revert to fp32. + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator(mixed_precision="fp16") + >>> with accelerator.autocast(): + ... train() + ``` + """ + if autocast_handler is None: + autocast_handler = self.autocast_handler + autocast_context = get_mixed_precision_context_manager(self.native_amp, autocast_handler) + with autocast_context: + yield + + @contextmanager + def profile(self, profile_handler: ProfileKwargs | None = None): + """ + Will profile the code inside the context manager. The profile will be saved to a Chrome Trace file if + `profile_handler.output_trace_dir` is set. + + A different `profile_handler` can be passed in to override the one set in the `Accelerator` object. + + Args: + profile_handler (`ProfileKwargs`, *optional*): + The profile handler to use for this context manager. If not passed, will use the one set in the + `Accelerator` object. + + Example: + + ```python + # Profile with default settings + from accelerate import Accelerator + from accelerate.utils import ProfileKwargs + + accelerator = Accelerator() + with accelerator.profile() as prof: + train() + accelerator.print(prof.key_averages().table()) + + + # Profile with the custom handler + def custom_handler(prof): + print(prof.key_averages().table(sort_by="self_cpu_time_total", row_limit=10)) + + + kwargs = ProfileKwargs(schedule_option=dict(wait=1, warmup=1, active=1), on_trace_ready=custom_handler) + accelerator = Accelerator(kwarg_handler=[kwargs]) + with accelerator.profile() as prof: + for _ in range(10): + train_iteration() + prof.step() + + + # Profile and export to Chrome Trace + kwargs = ProfileKwargs(output_trace_dir="output_trace") + accelerator = Accelerator(kwarg_handler=[kwargs]) + with accelerator.profile(): + train() + ``` + """ + profile_handler = profile_handler or self.profile_handler or ProfileKwargs() + + with profile_handler.build() as profiler: + yield profiler + + if profile_handler.output_trace_dir is None: + return + + os.makedirs(profile_handler.output_trace_dir, exist_ok=True) + profiler.export_chrome_trace( + os.path.join(profile_handler.output_trace_dir, PROFILE_PATTERN_NAME.format(suffix=self.process_index)) + ) + self.wait_for_everyone() + + @property + def optimizer_step_was_skipped(self): + """ + Whether or not the optimizer update was skipped (because of gradient overflow in mixed precision), in which + case the learning rate should not be changed. + """ + for optimizer in self._optimizers: + if optimizer.step_was_skipped: + return True + return False + + def skip_first_batches(self, dataloader, num_batches: int = 0): + """ + Creates a new `torch.utils.data.DataLoader` that will efficiently skip the first `num_batches`. + + Args: + dataloader (`torch.utils.data.DataLoader`): The data loader in which to skip batches. + num_batches (`int`, *optional*, defaults to 0): The number of batches to skip + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> dataloader, model, optimizer, scheduler = accelerator.prepare(dataloader, model, optimizer, scheduler) + >>> skipped_dataloader = accelerator.skip_first_batches(dataloader, num_batches=2) + >>> # for the first epoch only + >>> for input, target in skipped_dataloader: + ... optimizer.zero_grad() + ... output = model(input) + ... loss = loss_func(output, target) + ... accelerator.backward(loss) + ... optimizer.step() + + >>> # subsequent epochs + >>> for input, target in dataloader: + ... optimizer.zero_grad() + ... ... + ``` + """ + return skip_first_batches(dataloader, num_batches=num_batches) + + def __deepcopy__(self, memo): + logger.info("Deep copying the `Accelerator` object, note that this will point to the same original object.") + return self + + def verify_device_map(self, model: torch.nn.Module) -> bool: + """ + Verifies that `model` has not been prepared with big model inference with a device-map resembling `auto`. + """ + # Checks if any of the child modules has the attribute `hf_device_map` and this map has more than one entry. + for m in model.modules(): + if hasattr(m, "hf_device_map") and len(m.hf_device_map) > 1: + return True + + return False + + def lomo_backward(self, loss: torch.Tensor, learning_rate: float) -> None: + """ + Runs backward pass on LOMO optimizers. + """ + if is_lomo_available(): + # We need to import locally to avoid circular imports since lomo imports stuff from + # transformers & accelerate + from lomo_optim import AdaLomo, Lomo + + if learning_rate is None: + raise ValueError("A learning rate must be passed in order to call backward pass with LOMO optimizers.") + + _backward_called = False + + for optimizer in self._optimizers: + if isinstance(optimizer.optimizer, (Lomo, AdaLomo)): + optimizer.optimizer.fused_backward(loss, learning_rate) + _backward_called = True + + if not _backward_called: + raise ValueError( + "Backward pass not properly called on LOMO optimizers. Are you sure you passed a LOMO optimizer in accelerator.prepare()?" + ) + + @property + def fp8_backend(self) -> FP8BackendType: + "Returns the configured backend for training in FP8" + if self.has_fp8_handler: + if self.fp8_recipe_handler is not None: + return FP8BackendType(self.fp8_recipe_handler.backend) + elif self.ao_recipe_handler is not None: + return FP8BackendType.AO + elif self.te_recipe_handler is not None: + return FP8BackendType.TE + elif self.msamp_recipe_handler is not None: + return FP8BackendType.MSAMP + elif self.state.deepspeed_plugin is not None and self.state.deepspeed_plugin.enable_msamp: + return FP8BackendType.MSAMP + + return FP8BackendType(parse_choice_from_env("ACCELERATE_FP8_BACKEND", "NO")) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/big_modeling.py b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/big_modeling.py new file mode 100644 index 0000000000000000000000000000000000000000..ac629efe8baed921d4c835f2f7c797dac56bb1ba --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/big_modeling.py @@ -0,0 +1,799 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import re +from contextlib import contextmanager +from functools import wraps +from typing import Optional, Union + +import torch +import torch.nn as nn + +from .hooks import ( + AlignDevicesHook, + CpuOffload, + LayerwiseCastingHook, + UserCpuOffloadHook, + add_hook_to_module, + attach_align_device_hook, + attach_align_device_hook_on_blocks, +) +from .utils import ( + OffloadedWeightsLoader, + check_cuda_p2p_ib_support, + check_device_map, + extract_submodules_state_dict, + find_tied_parameters, + get_balanced_memory, + infer_auto_device_map, + is_bnb_available, + is_mlu_available, + is_musa_available, + is_neuron_available, + is_npu_available, + is_sdaa_available, + is_xpu_available, + load_checkpoint_in_model, + offload_state_dict, + parse_flag_from_env, + retie_parameters, +) +from .utils.constants import SUPPORTED_PYTORCH_LAYERS_FOR_UPCASTING +from .utils.other import recursive_getattr + + +logger = logging.getLogger(__name__) + + +@contextmanager +def init_empty_weights(include_buffers: Optional[bool] = None): + """ + A context manager under which models are initialized with all parameters on the meta device, therefore creating an + empty model. Useful when just initializing the model would blow the available RAM. + + Args: + include_buffers (`bool`, *optional*): + Whether or not to also put all buffers on the meta device while initializing. + + Example: + + ```python + import torch.nn as nn + from accelerate import init_empty_weights + + # Initialize a model with 100 billions parameters in no time and without using any RAM. + with init_empty_weights(): + tst = nn.Sequential(*[nn.Linear(10000, 10000) for _ in range(1000)]) + ``` + + + + Any model created under this context manager has no weights. As such you can't do something like + `model.to(some_device)` with it. To load weights inside your empty model, see [`load_checkpoint_and_dispatch`]. + Make sure to overwrite the default device_map param for [`load_checkpoint_and_dispatch`], otherwise dispatch is not + called. + + + """ + if include_buffers is None: + include_buffers = parse_flag_from_env("ACCELERATE_INIT_INCLUDE_BUFFERS", False) + with init_on_device(torch.device("meta"), include_buffers=include_buffers) as f: + yield f + + +@contextmanager +def init_on_device(device: torch.device, include_buffers: Optional[bool] = None): + """ + A context manager under which models are initialized with all parameters on the specified device. + + Args: + device (`torch.device`): + Device to initialize all parameters on. + include_buffers (`bool`, *optional*): + Whether or not to also put all buffers on the meta device while initializing. + + Example: + + ```python + import torch.nn as nn + from accelerate import init_on_device + + # init model on specified device(e.g., "cuda", "xpu" and so on) + with init_on_device(device=torch.device("cuda")): + tst = nn.Linear(100, 100) # on specified device + ``` + """ + if include_buffers is None: + include_buffers = parse_flag_from_env("ACCELERATE_INIT_INCLUDE_BUFFERS", False) + + if include_buffers: + with device: + yield + return + + old_register_parameter = nn.Module.register_parameter + if include_buffers: + old_register_buffer = nn.Module.register_buffer + + def register_empty_parameter(module, name, param): + old_register_parameter(module, name, param) + if param is not None: + param_cls = type(module._parameters[name]) + kwargs = module._parameters[name].__dict__ + kwargs["requires_grad"] = param.requires_grad + # Pop non-constructor attributes before creating the parameter, then restore them after + _is_hf_initialized = kwargs.pop("_is_hf_initialized", None) + module._parameters[name] = param_cls(module._parameters[name].to(device), **kwargs) + if _is_hf_initialized is not None: + module._parameters[name]._is_hf_initialized = _is_hf_initialized + + def register_empty_buffer(module, name, buffer, persistent=True): + old_register_buffer(module, name, buffer, persistent=persistent) + if buffer is not None: + module._buffers[name] = module._buffers[name].to(device) + + # Patch tensor creation + if include_buffers: + tensor_constructors_to_patch = { + torch_function_name: getattr(torch, torch_function_name) + for torch_function_name in ["empty", "zeros", "ones", "full"] + } + else: + tensor_constructors_to_patch = {} + + def patch_tensor_constructor(fn): + def wrapper(*args, **kwargs): + kwargs["device"] = device + return fn(*args, **kwargs) + + return wrapper + + try: + nn.Module.register_parameter = register_empty_parameter + if include_buffers: + nn.Module.register_buffer = register_empty_buffer + for torch_function_name in tensor_constructors_to_patch.keys(): + setattr(torch, torch_function_name, patch_tensor_constructor(getattr(torch, torch_function_name))) + yield + finally: + nn.Module.register_parameter = old_register_parameter + if include_buffers: + nn.Module.register_buffer = old_register_buffer + for torch_function_name, old_torch_function in tensor_constructors_to_patch.items(): + setattr(torch, torch_function_name, old_torch_function) + + +def cpu_offload( + model: nn.Module, + execution_device: Optional[torch.device] = None, + offload_buffers: bool = False, + state_dict: Optional[dict[str, torch.Tensor]] = None, + preload_module_classes: Optional[list[str]] = None, +): + """ + Activates full CPU offload for a model. As a result, all parameters of the model will be offloaded and only one + copy of the state dict of the model will be kept. During the forward pass, parameters will be extracted from that + state dict and put on the execution device passed as they are needed, then offloaded again. + + Args: + model (`torch.nn.Module`): + The model to offload. + execution_device (`torch.device`, *optional*): + The device on which the forward pass of the model will be executed (should be a GPU). Will default to the + model first parameter device. + offload_buffers (`bool`, *optional*, defaults to `False`): + Whether or not to offload the buffers with the model parameters. + state_dict (`Dict[str, torch.Tensor]`, *optional*): + The state dict of the model that will be kept on CPU. + preload_module_classes (`List[str]`, *optional*): + A list of classes whose instances should load all their weights (even in the submodules) at the beginning + of the forward. This should only be used for classes that have submodules which are registered but not + called directly during the forward, for instance if a `dense` linear layer is registered, but at forward, + `dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly. + """ + if execution_device is None: + execution_device = next(iter(model.parameters())).device + if state_dict is None: + state_dict = {n: p.to("cpu") for n, p in model.state_dict().items()} + + add_hook_to_module(model, AlignDevicesHook(io_same_device=True), append=True) + attach_align_device_hook( + model, + execution_device=execution_device, + offload=True, + offload_buffers=offload_buffers, + weights_map=state_dict, + preload_module_classes=preload_module_classes, + ) + + return model + + +def cpu_offload_with_hook( + model: torch.nn.Module, + execution_device: Optional[Union[int, str, torch.device]] = None, + prev_module_hook: Optional[UserCpuOffloadHook] = None, +): + """ + Offloads a model on the CPU and puts it back to an execution device when executed. The difference with + [`cpu_offload`] is that the model stays on the execution device after the forward and is only offloaded again when + the `offload` method of the returned `hook` is called. Useful for pipelines running a model in a loop. + + Args: + model (`torch.nn.Module`): + The model to offload. + execution_device(`str`, `int` or `torch.device`, *optional*): + The device on which the model should be executed. Will default to the MPS device if it's available, then + device 0 if there is an accelerator device, and finally to the CPU. + prev_module_hook (`UserCpuOffloadHook`, *optional*): + The hook sent back by this function for a previous model in the pipeline you are running. If passed, its + offload method will be called just before the forward of the model to which this hook is attached. + + Example: + + ```py + model_1, hook_1 = cpu_offload_with_hook(model_1, device) + model_2, hook_2 = cpu_offload_with_hook(model_2, device, prev_module_hook=hook_1) + model_3, hook_3 = cpu_offload_with_hook(model_3, device, prev_module_hook=hook_2) + + hid_1 = model_1(input) + for i in range(50): + # model1 is offloaded on the CPU at the first iteration, model 2 stays on the GPU for this whole loop. + hid_2 = model_2(hid_1) + # model2 is offloaded to the CPU just before this forward. + hid_3 = model_3(hid_3) + + # For model3, you need to manually call the hook offload method. + hook_3.offload() + ``` + """ + hook = CpuOffload(execution_device=execution_device, prev_module_hook=prev_module_hook) + add_hook_to_module(model, hook, append=True) + user_hook = UserCpuOffloadHook(model, hook) + return model, user_hook + + +def disk_offload( + model: nn.Module, + offload_dir: Union[str, os.PathLike], + execution_device: Optional[torch.device] = None, + offload_buffers: bool = False, + preload_module_classes: Optional[list[str]] = None, +): + """ + Activates full disk offload for a model. As a result, all parameters of the model will be offloaded as + memory-mapped array in a given folder. During the forward pass, parameters will be accessed from that folder and + put on the execution device passed as they are needed, then offloaded again. + + Args: + model (`torch.nn.Module`): The model to offload. + offload_dir (`str` or `os.PathLike`): + The folder in which to offload the model weights (or where the model weights are already offloaded). + execution_device (`torch.device`, *optional*): + The device on which the forward pass of the model will be executed (should be a GPU). Will default to the + model's first parameter device. + offload_buffers (`bool`, *optional*, defaults to `False`): + Whether or not to offload the buffers with the model parameters. + preload_module_classes (`List[str]`, *optional*): + A list of classes whose instances should load all their weights (even in the submodules) at the beginning + of the forward. This should only be used for classes that have submodules which are registered but not + called directly during the forward, for instance if a `dense` linear layer is registered, but at forward, + `dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly. + """ + if not os.path.isdir(offload_dir) or not os.path.isfile(os.path.join(offload_dir, "index.json")): + offload_state_dict(offload_dir, model.state_dict()) + if execution_device is None: + execution_device = next(iter(model.parameters())).device + weights_map = OffloadedWeightsLoader(save_folder=offload_dir) + + add_hook_to_module(model, AlignDevicesHook(io_same_device=True), append=True) + attach_align_device_hook( + model, + execution_device=execution_device, + offload=True, + offload_buffers=offload_buffers, + weights_map=weights_map, + preload_module_classes=preload_module_classes, + ) + + return model + + +def dispatch_model( + model: nn.Module, + device_map: dict[str, Union[str, int, torch.device]], + main_device: Optional[torch.device] = None, + state_dict: Optional[dict[str, torch.Tensor]] = None, + offload_dir: Optional[Union[str, os.PathLike]] = None, + offload_index: Optional[dict[str, str]] = None, + offload_buffers: bool = False, + skip_keys: Optional[Union[str, list[str]]] = None, + preload_module_classes: Optional[list[str]] = None, + force_hooks: bool = False, +): + """ + Dispatches a model according to a given device map. Layers of the model might be spread across GPUs, offloaded on + the CPU or even the disk. + + Args: + model (`torch.nn.Module`): + The model to dispatch. + device_map (`Dict[str, Union[str, int, torch.device]]`): + A dictionary mapping module names in the models `state_dict` to the device they should go to. Note that + `"disk"` is accepted even if it's not a proper value for `torch.device`. + main_device (`str`, `int` or `torch.device`, *optional*): + The main execution device. Will default to the first device in the `device_map` different from `"cpu"` or + `"disk"`. + state_dict (`Dict[str, torch.Tensor]`, *optional*): + The state dict of the part of the model that will be kept on CPU. + offload_dir (`str` or `os.PathLike`): + The folder in which to offload the model weights (or where the model weights are already offloaded). + offload_index (`Dict`, *optional*): + A dictionary from weight name to their information (`dtype`/ `shape` or safetensors filename). Will default + to the index saved in `save_folder`. + offload_buffers (`bool`, *optional*, defaults to `False`): + Whether or not to offload the buffers with the model parameters. + skip_keys (`str` or `List[str]`, *optional*): + A list of keys to ignore when moving inputs or outputs between devices. + preload_module_classes (`List[str]`, *optional*): + A list of classes whose instances should load all their weights (even in the submodules) at the beginning + of the forward. This should only be used for classes that have submodules which are registered but not + called directly during the forward, for instance if a `dense` linear layer is registered, but at forward, + `dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly. + force_hooks (`bool`, *optional*, defaults to `False`): + Whether or not to force device hooks to be attached to the model even if all layers are dispatched to a + single device. + """ + # Error early if the device map is incomplete. + check_device_map(model, device_map) + + # We need to force hook for quantized model that can't be moved with to() + if getattr(model, "quantization_method", "bitsandbytes") == "bitsandbytes": + # since bnb 0.43.2, we can move 4-bit model + if (getattr(model, "is_loaded_in_8bit", False) and not is_bnb_available(min_version="0.48.0")) or ( + getattr(model, "is_loaded_in_4bit", False) and not is_bnb_available(min_version="0.43.2") + ): + force_hooks = True + + # We attach hooks if the device_map has at least 2 different devices or if + # force_hooks is set to `True`. Otherwise, the model in already loaded + # in the unique device and the user can decide where to dispatch the model. + # If the model is quantized, we always force-dispatch the model + if (len(set(device_map.values())) > 1) or force_hooks: + if main_device is None: + if set(device_map.values()) == {"cpu"} or set(device_map.values()) == {"cpu", "disk"}: + main_device = "cpu" + else: + main_device = [d for d in device_map.values() if d not in ["cpu", "disk"]][0] + + if main_device != "cpu": + cpu_modules = [name for name, device in device_map.items() if device == "cpu"] + if state_dict is None and len(cpu_modules) > 0: + state_dict = extract_submodules_state_dict(model.state_dict(), cpu_modules) + + disk_modules = [name for name, device in device_map.items() if device == "disk"] + if offload_dir is None and offload_index is None and len(disk_modules) > 0: + raise ValueError( + "We need an `offload_dir` to dispatch this model according to this `device_map`, the following submodules " + f"need to be offloaded: {', '.join(disk_modules)}." + ) + if ( + len(disk_modules) > 0 + and offload_index is None + and (not os.path.isdir(offload_dir) or not os.path.isfile(os.path.join(offload_dir, "index.json"))) + ): + disk_state_dict = extract_submodules_state_dict(model.state_dict(), disk_modules) + offload_state_dict(offload_dir, disk_state_dict) + + execution_device = { + name: main_device if device in ["cpu", "disk"] else device for name, device in device_map.items() + } + execution_device[""] = main_device + offloaded_devices = ["disk"] if main_device == "cpu" or main_device == "mps" else ["cpu", "disk"] + offload = {name: device in offloaded_devices for name, device in device_map.items()} + save_folder = offload_dir if len(disk_modules) > 0 else None + if state_dict is not None or save_folder is not None or offload_index is not None: + device = main_device if offload_index is not None else None + weights_map = OffloadedWeightsLoader( + state_dict=state_dict, save_folder=save_folder, index=offload_index, device=device + ) + else: + weights_map = None + + # When dispatching the model's parameters to the devices specified in device_map, we want to avoid allocating memory several times for the + # tied parameters. The dictionary tied_params_map keeps track of the already allocated data for a given tied parameter (represented by its + # original pointer) on each devices. + tied_params = find_tied_parameters(model) + + tied_params_map = {} + for group in tied_params: + for param_name in group: + # data_ptr() is enough here, as `find_tied_parameters` finds tied params simply by comparing `param1 is param2`, so we don't need + # to care about views of tensors through storage_offset. + data_ptr = recursive_getattr(model, param_name).data_ptr() + tied_params_map[data_ptr] = {} + + # Note: To handle the disk offloading case, we can not simply use weights_map[param_name].data_ptr() as the reference pointer, + # as we have no guarantee that safetensors' `file.get_tensor()` will always give the same pointer. + + attach_align_device_hook_on_blocks( + model, + execution_device=execution_device, + offload=offload, + offload_buffers=offload_buffers, + weights_map=weights_map, + skip_keys=skip_keys, + preload_module_classes=preload_module_classes, + tied_params_map=tied_params_map, + ) + + # warn if there is any params on the meta device + offloaded_devices_str = " and ".join( + [device for device in set(device_map.values()) if device in ("cpu", "disk")] + ) + if len(offloaded_devices_str) > 0: + logger.warning( + f"Some parameters are on the meta device because they were offloaded to the {offloaded_devices_str}." + ) + + # Attaching the hook may break tied weights, so we retie them + retie_parameters(model, tied_params) + + # add warning on `to` method + def add_warning(fn, model): + @wraps(fn) + def wrapper(*args, **kwargs): + warning_msg = "You shouldn't move a model that is dispatched using accelerate hooks." + if str(fn.__name__) == "to": + to_device = torch._C._nn._parse_to(*args, **kwargs)[0] + if to_device is not None: + logger.warning(warning_msg) + else: + logger.warning(warning_msg) + for param in model.parameters(): + if param.device == torch.device("meta"): + raise RuntimeError("You can't move a model that has some modules offloaded to cpu or disk.") + return fn(*args, **kwargs) + + return wrapper + + # Make sure to update _accelerate_added_attributes in hooks.py if you add any hook + model.to = add_warning(model.to, model) + if is_npu_available(): + model.npu = add_warning(model.npu, model) + elif is_mlu_available(): + model.mlu = add_warning(model.mlu, model) + elif is_sdaa_available(): + model.sdaa = add_warning(model.sdaa, model) + elif is_musa_available(): + model.musa = add_warning(model.musa, model) + elif is_xpu_available(): + model.xpu = add_warning(model.xpu, model) + elif is_neuron_available(): + model.neuron = add_warning(model.neuron, model) + else: + model.cuda = add_warning(model.cuda, model) + + # Check if we are using multi-gpus with RTX 4000 series + use_multi_gpu = len([device for device in set(device_map.values()) if device not in ("cpu", "disk")]) > 1 + if use_multi_gpu and not check_cuda_p2p_ib_support(): + logger.warning( + "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. " + "This can affect the multi-gpu inference when using accelerate device_map." + "Please make sure to update your driver to the latest version which resolves this." + ) + else: + device = list(device_map.values())[0] + # `torch.Tensor.to()` is not supported by `torch_npu` (see this [issue](https://github.com/Ascend/pytorch/issues/16)). + if is_npu_available() and isinstance(device, int): + device = f"npu:{device}" + elif is_mlu_available() and isinstance(device, int): + device = f"mlu:{device}" + elif is_sdaa_available() and isinstance(device, int): + device = f"sdaa:{device}" + elif is_musa_available() and isinstance(device, int): + device = f"musa:{device}" + elif is_neuron_available() and isinstance(device, int): + device = f"neuron:{device}" + if device != "disk": + model.to(device) + else: + raise ValueError( + "You are trying to offload the whole model to the disk. Please use the `disk_offload` function instead." + ) + # Convert OrderedDict back to dict for easier usage + model.hf_device_map = dict(device_map) + return model + + +def load_checkpoint_and_dispatch( + model: nn.Module, + checkpoint: Union[str, os.PathLike], + device_map: Optional[Union[str, dict[str, Union[int, str, torch.device]]]] = None, + max_memory: Optional[dict[Union[int, str], Union[int, str]]] = None, + no_split_module_classes: Optional[list[str]] = None, + offload_folder: Optional[Union[str, os.PathLike]] = None, + offload_buffers: bool = False, + dtype: Optional[Union[str, torch.dtype]] = None, + offload_state_dict: Optional[bool] = None, + skip_keys: Optional[Union[str, list[str]]] = None, + preload_module_classes: Optional[list[str]] = None, + force_hooks: bool = False, + strict: bool = False, + full_state_dict: bool = True, + broadcast_from_rank0: bool = False, +): + """ + Loads a (potentially sharded) checkpoint inside a model, potentially sending weights to a given device as they are + loaded and adds the various hooks that will make this model run properly (even if split across devices). + + Args: + model (`torch.nn.Module`): The model in which we want to load a checkpoint. + checkpoint (`str` or `os.PathLike`): + The folder checkpoint to load. It can be: + - a path to a file containing a whole model state dict + - a path to a `.json` file containing the index to a sharded checkpoint + - a path to a folder containing a unique `.index.json` file and the shards of a checkpoint. + device_map (`Dict[str, Union[int, str, torch.device]]`, *optional*): + A map that specifies where each submodule should go. It doesn't need to be refined to each parameter/buffer + name, once a given module name is inside, every submodule of it will be sent to the same device. + + To have Accelerate compute the most optimized `device_map` automatically, set `device_map="auto"`. For more + information about each option see [here](../concept_guides/big_model_inference#designing-a-device-map). + Defaults to None, which means [`dispatch_model`] will not be called. + max_memory (`Dict`, *optional*): + A dictionary device identifier to maximum memory. Will default to the maximum memory available for each GPU + and the available CPU RAM if unset. + no_split_module_classes (`List[str]`, *optional*): + A list of layer class names that should never be split across device (for instance any layer that has a + residual connection). + offload_folder (`str` or `os.PathLike`, *optional*): + If the `device_map` contains any value `"disk"`, the folder where we will offload weights. + offload_buffers (`bool`, *optional*, defaults to `False`): + In the layers that are offloaded on the CPU or the hard drive, whether or not to offload the buffers as + well as the parameters. + dtype (`str` or `torch.dtype`, *optional*): + If provided, the weights will be converted to that type when loaded. + offload_state_dict (`bool`, *optional*): + If `True`, will temporarily offload the CPU state dict on the hard drive to avoid getting out of CPU RAM if + the weight of the CPU state dict + the biggest shard does not fit. Will default to `True` if the device map + picked contains `"disk"` values. + skip_keys (`str` or `List[str]`, *optional*): + A list of keys to ignore when moving inputs or outputs between devices. + preload_module_classes (`List[str]`, *optional*): + A list of classes whose instances should load all their weights (even in the submodules) at the beginning + of the forward. This should only be used for classes that have submodules which are registered but not + called directly during the forward, for instance if a `dense` linear layer is registered, but at forward, + `dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly. + force_hooks (`bool`, *optional*, defaults to `False`): + Whether or not to force device hooks to be attached to the model even if all layers are dispatched to a + single device. + strict (`bool`, *optional*, defaults to `False`): + Whether to strictly enforce that the keys in the checkpoint state_dict match the keys of the model's + state_dict. + full_state_dict (`bool`, *optional*, defaults to `True`): if this is set to `True`, all the tensors in the + loaded state_dict will be gathered. No ShardedTensor and DTensor will be in the loaded state_dict. + broadcast_from_rank0 (`False`, *optional*, defaults to `False`): when the option is `True`, a distributed + `ProcessGroup` must be initialized. rank0 should receive a full state_dict and will broadcast the tensors + in the state_dict one by one to other ranks. Other ranks will receive the tensors and shard (if applicable) + according to the local shards in the model. + + Example: + + ```python + >>> from accelerate import init_empty_weights, load_checkpoint_and_dispatch + >>> from huggingface_hub import hf_hub_download + >>> from transformers import AutoConfig, AutoModelForCausalLM + + >>> # Download the Weights + >>> checkpoint = "EleutherAI/gpt-j-6B" + >>> weights_location = hf_hub_download(checkpoint, "pytorch_model.bin") + + >>> # Create a model and initialize it with empty weights + >>> config = AutoConfig.from_pretrained(checkpoint) + >>> with init_empty_weights(): + ... model = AutoModelForCausalLM.from_config(config) + + >>> # Load the checkpoint and dispatch it to the right devices + >>> model = load_checkpoint_and_dispatch( + ... model, weights_location, device_map="auto", no_split_module_classes=["GPTJBlock"] + ... ) + ``` + """ + if isinstance(device_map, str) and device_map not in ["auto", "balanced", "balanced_low_0", "sequential"]: + raise ValueError( + "If passing a string for `device_map`, please choose 'auto', 'balanced', 'balanced_low_0' or 'sequential'." + ) + if isinstance(device_map, str): + if device_map != "sequential": + max_memory = get_balanced_memory( + model, + max_memory=max_memory, + no_split_module_classes=no_split_module_classes, + dtype=dtype, + low_zero=(device_map == "balanced_low_0"), + ) + device_map = infer_auto_device_map( + model, + max_memory=max_memory, + no_split_module_classes=no_split_module_classes, + dtype=dtype, + offload_buffers=offload_buffers, + ) + if offload_state_dict is None and device_map is not None and "disk" in device_map.values(): + offload_state_dict = True + load_checkpoint_in_model( + model, + checkpoint, + device_map=device_map, + offload_folder=offload_folder, + dtype=dtype, + offload_state_dict=offload_state_dict, + offload_buffers=offload_buffers, + strict=strict, + full_state_dict=full_state_dict, + broadcast_from_rank0=broadcast_from_rank0, + ) + if device_map is None: + return model + return dispatch_model( + model, + device_map=device_map, + offload_dir=offload_folder, + offload_buffers=offload_buffers, + skip_keys=skip_keys, + preload_module_classes=preload_module_classes, + force_hooks=force_hooks, + ) + + +def attach_layerwise_casting_hooks( + module: torch.nn.Module, + storage_dtype: torch.dtype, + compute_dtype: torch.dtype, + skip_modules_pattern: Optional[Union[str, tuple[str, ...]]] = None, + skip_modules_classes: Optional[tuple[type[torch.nn.Module], ...]] = None, + non_blocking: bool = False, +) -> None: + r""" + Applies layerwise casting to a given module. The module expected here is a PyTorch `nn.Module`. This is helpful for + reducing memory requirements when one doesn't want to fully quantize a model. Model params can be kept in say, + `torch.float8_e4m3fn` and upcasted to a higher precision like `torch.bfloat16` during forward pass and downcasted + back to `torch.float8_e4m3fn` to realize memory savings. + + Args: + module (`torch.nn.Module`): + The module whose leaf modules will be cast to a high precision dtype for computation, and to a low + precision dtype for storage. + storage_dtype (`torch.dtype`): + The dtype to cast the module to before/after the forward pass for storage. + compute_dtype (`torch.dtype`): + The dtype to cast the module to during the forward pass for computation. + skip_modules_pattern (`tuple[str, ...]`, defaults to `None`): + A list of patterns to match the names of the modules to skip during the layerwise casting process. If set + to `None` alongside `skip_modules_classes` being `None`, the layerwise casting is applied directly to the + module instead of its internal submodules. + skip_modules_classes (`tuple[type[torch.nn.Module], ...]`, defaults to `None`): + A list of module classes to skip during the layerwise casting process. + non_blocking (`bool`, defaults to `False`): + If `True`, the weight casting operations are non-blocking. + + Example: + + ```python + >>> from accelerate.hooks import attach_layerwise_casting_hooks + >>> from transformers import AutoModelForCausalLM + >>> import torch + + >>> # Model + >>> checkpoint = "EleutherAI/gpt-j-6B" + >>> model = AutoModelForCausalLM.from_pretrained(checkpoint) + + >>> # Attach hooks and perform inference + >>> attach_layerwise_casting_hooks(model, storage_dtype=torch.float8_e4m3fn, compute_dtype=torch.bfloat16) + >>> with torch.no_grad(): + ... model(...) + ``` + + Users can also pass modules they want to avoid from getting downcasted. + + ```py + >>> attach_layerwise_casting_hooks( + ... model, storage_dtype=torch.float8_e4m3fn, compute_dtype=torch.bfloat16, skip_modules_pattern=["norm"] + ... ) + ``` + """ + _attach_layerwise_casting_hooks( + module, storage_dtype, compute_dtype, skip_modules_pattern, skip_modules_classes, non_blocking + ) + + +def _attach_layerwise_casting_hooks( + module: torch.nn.Module, + storage_dtype: torch.dtype, + compute_dtype: torch.dtype, + skip_modules_pattern: Optional[Union[str, tuple[str, ...]]] = None, + skip_modules_classes: Optional[tuple[type[torch.nn.Module], ...]] = None, + non_blocking: bool = False, + _prefix: str = "", +): + should_skip = (skip_modules_classes is not None and isinstance(module, skip_modules_classes)) or ( + skip_modules_pattern is not None and any(re.search(pattern, _prefix) for pattern in skip_modules_pattern) + ) + if should_skip: + logger.debug(f'Skipping layerwise casting for layer "{_prefix}"') + return + + if isinstance(module, SUPPORTED_PYTORCH_LAYERS_FOR_UPCASTING): + logger.debug(f'Applying layerwise casting to layer "{_prefix}"') + add_hook_to_module( + module, + LayerwiseCastingHook(storage_dtype=storage_dtype, compute_dtype=compute_dtype, non_blocking=non_blocking), + append=True, + ) + return + + for name, submodule in module.named_children(): + layer_name = f"{_prefix}.{name}" if _prefix else name + _attach_layerwise_casting_hooks( + submodule, + storage_dtype, + compute_dtype, + skip_modules_pattern, + skip_modules_classes, + non_blocking, + _prefix=layer_name, + ) + + +def _attach_context_parallel_hooks( + model: nn.Module, +): + """ + Monkeypatch huggingface's `transformers` model to fix attention mask issues when using context parallelism. + + This function attaches forward_pre_hooks to each self_attn module of the model, where each hook checks the + args/kwargs, if they contain an attention mask, if it does, it will remove this mask, check if it is a causal mask, + if yes, will add a kwarg `is_causal=True`, otherwise will raise an error. This is because context parallelism does + not support attention masks. This function modifies the model in place. + + Args: + model (`nn.Module`): + The model to attach the hooks to. + + """ + + def _self_attn_pre_forward_hook(_module, module_args, module_kwargs): + if "attention_mask" in module_kwargs: + module_kwargs["attention_mask"] = None + module_kwargs["is_causal"] = True + + return module_args, module_kwargs + + for name, module in model.named_modules(): + # We hope (assume) that if user uses their own model (without this structure which transformers uses), they read the docs saying they can't pass in attention masks + # Then these cases can happen: + # 1) some modules end with a `self-attn` module, in which case we attach the hook, but the + # there's no attention mask kwarg -> hook is a no-op + # 2) some modules end with a `self-attn` module, in which case we attach the hook, and the + # attention mask kwarg is passed -> hook will remove the attention mask and add + # `is_causal=True` kwarg, which either crashes the training or fixes it + # (training would crash anyway as attention mask isn't supported) + # 3) no modules end with a `self-attn` module, in which case we don't attach the hook, this is + # a no-op as well + if name.endswith("self_attn"): + # we want the hook to be executed first, to avoid any other hooks doing work on the attention mask + module.register_forward_pre_hook(_self_attn_pre_forward_hook, with_kwargs=True, prepend=True) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/checkpointing.py b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/checkpointing.py new file mode 100644 index 0000000000000000000000000000000000000000..2b753e6e206b3d1fbca55adb0cd68fc1e526c6ab --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/checkpointing.py @@ -0,0 +1,338 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +from pathlib import Path +from typing import Optional + +import numpy as np +import torch +from safetensors.torch import load_model + +from .utils import ( + MODEL_NAME, + OPTIMIZER_NAME, + RNG_STATE_NAME, + SAFE_MODEL_NAME, + SAFE_WEIGHTS_NAME, + SAMPLER_NAME, + SCALER_NAME, + SCHEDULER_NAME, + WEIGHTS_NAME, + get_pretty_name, + is_cuda_available, + is_hpu_available, + is_mlu_available, + is_musa_available, + is_neuron_available, + is_sdaa_available, + is_torch_version, + is_torch_xla_available, + is_xpu_available, + load, + save, +) + + +if is_torch_version(">=", "2.4.0"): + from torch.amp import GradScaler +else: + from torch.cuda.amp import GradScaler + +if is_torch_xla_available(): + import torch_xla.core.xla_model as xm + +from .logging import get_logger +from .state import PartialState + + +logger = get_logger(__name__) + + +def save_accelerator_state( + output_dir: str, + model_states: list[dict], + optimizers: list, + schedulers: list, + dataloaders: list, + process_index: int, + step: int, + scaler: Optional[GradScaler] = None, + save_on_each_node: bool = False, + safe_serialization: bool = True, +): + """ + Saves the current states of the models, optimizers, scaler, and RNG generators to a given directory. + + + + If `safe_serialization` is `True`, models will be saved with `safetensors` while the rest are saved using native + `pickle`. + + + + Args: + output_dir (`str` or `os.PathLike`): + The name of the folder to save all relevant weights and states. + model_states (`List[torch.nn.Module]`): + A list of model states + optimizers (`List[torch.optim.Optimizer]`): + A list of optimizer instances + schedulers (`List[torch.optim.lr_scheduler._LRScheduler]`): + A list of learning rate schedulers + dataloaders (`List[torch.utils.data.DataLoader]`): + A list of dataloader instances to save their sampler states + process_index (`int`): + The current process index in the Accelerator state + step (`int`): + The current step in the internal step tracker + scaler (`torch.amp.GradScaler`, *optional*): + An optional gradient scaler instance to save; + save_on_each_node (`bool`, *optional*): + Whether to save on every node, or only the main node. + safe_serialization (`bool`, *optional*, defaults to `True`): + Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + """ + output_dir = Path(output_dir) + # Model states + for i, state in enumerate(model_states): + weights_name = WEIGHTS_NAME if not safe_serialization else SAFE_WEIGHTS_NAME + if i > 0: + weights_name = weights_name.replace(".", f"_{i}.") + output_model_file = output_dir.joinpath(weights_name) + save(state, output_model_file, save_on_each_node=save_on_each_node, safe_serialization=safe_serialization) + logger.info(f"Model weights saved in {output_model_file}") + # Optimizer states + for i, opt in enumerate(optimizers): + state = opt.state_dict() + optimizer_name = f"{OPTIMIZER_NAME}.bin" if i == 0 else f"{OPTIMIZER_NAME}_{i}.bin" + output_optimizer_file = output_dir.joinpath(optimizer_name) + save(state, output_optimizer_file, save_on_each_node=save_on_each_node, safe_serialization=False) + logger.info(f"Optimizer state saved in {output_optimizer_file}") + # Scheduler states + for i, scheduler in enumerate(schedulers): + state = scheduler.state_dict() + scheduler_name = f"{SCHEDULER_NAME}.bin" if i == 0 else f"{SCHEDULER_NAME}_{i}.bin" + output_scheduler_file = output_dir.joinpath(scheduler_name) + save(state, output_scheduler_file, save_on_each_node=save_on_each_node, safe_serialization=False) + logger.info(f"Scheduler state saved in {output_scheduler_file}") + # DataLoader states + for i, dataloader in enumerate(dataloaders): + sampler_name = f"{SAMPLER_NAME}.bin" if i == 0 else f"{SAMPLER_NAME}_{i}.bin" + output_sampler_file = output_dir.joinpath(sampler_name) + # Only save if we have our custom sampler + from .data_loader import IterableDatasetShard, SeedableRandomSampler + + if isinstance(dataloader.dataset, IterableDatasetShard): + sampler = dataloader.get_sampler() + if isinstance(sampler, SeedableRandomSampler): + save(sampler, output_sampler_file, save_on_each_node=save_on_each_node, safe_serialization=False) + if getattr(dataloader, "use_stateful_dataloader", False): + dataloader_state_dict_name = "dl_state_dict.bin" if i == 0 else f"dl_state_dict_{i}.bin" + output_dataloader_state_dict_file = output_dir.joinpath(dataloader_state_dict_name) + state_dict = dataloader.state_dict() + torch.save(state_dict, output_dataloader_state_dict_file) + logger.info(f"Sampler state for dataloader {i} saved in {output_sampler_file}") + + # GradScaler state + if scaler is not None: + state = scaler.state_dict() + output_scaler_file = output_dir.joinpath(SCALER_NAME) + torch.save(state, output_scaler_file) + logger.info(f"Gradient scaler state saved in {output_scaler_file}") + # Random number generator states + states = {} + states_name = f"{RNG_STATE_NAME}_{process_index}.pkl" + states["step"] = step + states["random_state"] = random.getstate() + states["numpy_random_seed"] = np.random.get_state() + states["torch_manual_seed"] = torch.get_rng_state() + if is_xpu_available(): + states["torch_xpu_manual_seed"] = torch.xpu.get_rng_state_all() + if is_mlu_available(): + states["torch_mlu_manual_seed"] = torch.mlu.get_rng_state_all() + elif is_sdaa_available(): + states["torch_sdaa_manual_seed"] = torch.sdaa.get_rng_state_all() + elif is_musa_available(): + states["torch_musa_manual_seed"] = torch.musa.get_rng_state_all() + if is_hpu_available(): + states["torch_hpu_manual_seed"] = torch.hpu.get_rng_state_all() + if is_neuron_available(): + states["torch_neuron_manual_seed"] = torch.neuron.get_rng_state_all() + if is_cuda_available(): + states["torch_cuda_manual_seed"] = torch.cuda.get_rng_state_all() + if is_torch_xla_available(): + states["xm_seed"] = xm.get_rng_state() + output_states_file = output_dir.joinpath(states_name) + torch.save(states, output_states_file) + logger.info(f"Random states saved in {output_states_file}") + return output_dir + + +def load_accelerator_state( + input_dir, + models, + optimizers, + schedulers, + dataloaders, + process_index, + scaler=None, + map_location=None, + load_kwargs=None, + **load_model_func_kwargs, +): + """ + Loads states of the models, optimizers, scaler, and RNG generators from a given directory. + + Args: + input_dir (`str` or `os.PathLike`): + The name of the folder to load all relevant weights and states. + models (`List[torch.nn.Module]`): + A list of model instances + optimizers (`List[torch.optim.Optimizer]`): + A list of optimizer instances + schedulers (`List[torch.optim.lr_scheduler._LRScheduler]`): + A list of learning rate schedulers + process_index (`int`): + The current process index in the Accelerator state + scaler (`torch.amp.GradScaler`, *optional*): + An optional *GradScaler* instance to load + map_location (`str`, *optional*): + What device to load the optimizer state onto. Should be one of either "cpu" or "on_device". + load_kwargs (`dict`, *optional*): + Additional arguments that can be passed to the `load` function. + load_model_func_kwargs (`dict`, *optional*): + Additional arguments that can be passed to the model's `load_state_dict` method. + + Returns: + `dict`: Contains the `Accelerator` attributes to override while loading the state. + """ + # stores the `Accelerator` attributes to override + override_attributes = dict() + if map_location not in [None, "cpu", "on_device"]: + raise TypeError( + "Unsupported optimizer map location passed, please choose one of `None`, `'cpu'`, or `'on_device'`" + ) + if map_location is None: + map_location = "cpu" + elif map_location == "on_device": + map_location = PartialState().device + + if load_kwargs is None: + load_kwargs = {} + + input_dir = Path(input_dir) + # Model states + for i, model in enumerate(models): + ending = f"_{i}" if i > 0 else "" + input_model_file = input_dir.joinpath(f"{SAFE_MODEL_NAME}{ending}.safetensors") + if input_model_file.exists(): + load_model(model, input_model_file, device=str(map_location), **load_model_func_kwargs) + else: + # Load with torch + input_model_file = input_dir.joinpath(f"{MODEL_NAME}{ending}.bin") + state_dict = load(input_model_file, map_location=map_location) + model.load_state_dict(state_dict, **load_model_func_kwargs) + logger.info("All model weights loaded successfully") + + # Optimizer states + for i, opt in enumerate(optimizers): + optimizer_name = f"{OPTIMIZER_NAME}.bin" if i == 0 else f"{OPTIMIZER_NAME}_{i}.bin" + input_optimizer_file = input_dir.joinpath(optimizer_name) + optimizer_state = load(input_optimizer_file, map_location=map_location, **load_kwargs) + optimizers[i].load_state_dict(optimizer_state) + logger.info("All optimizer states loaded successfully") + + # Scheduler states + for i, scheduler in enumerate(schedulers): + scheduler_name = f"{SCHEDULER_NAME}.bin" if i == 0 else f"{SCHEDULER_NAME}_{i}.bin" + input_scheduler_file = input_dir.joinpath(scheduler_name) + scheduler_state = load(input_scheduler_file, **load_kwargs) + scheduler.load_state_dict(scheduler_state) + logger.info("All scheduler states loaded successfully") + + for i, dataloader in enumerate(dataloaders): + sampler_name = f"{SAMPLER_NAME}.bin" if i == 0 else f"{SAMPLER_NAME}_{i}.bin" + input_sampler_file = input_dir.joinpath(sampler_name) + # Only load if we have our custom sampler + from .data_loader import IterableDatasetShard, SeedableRandomSampler + + if isinstance(dataloader.dataset, IterableDatasetShard): + sampler = dataloader.get_sampler() + if isinstance(sampler, SeedableRandomSampler): + sampler = dataloader.set_sampler(load(input_sampler_file)) + if getattr(dataloader, "use_stateful_dataloader", False): + dataloader_state_dict_name = "dl_state_dict.bin" if i == 0 else f"dl_state_dict_{i}.bin" + input_dataloader_state_dict_file = input_dir.joinpath(dataloader_state_dict_name) + if input_dataloader_state_dict_file.exists(): + state_dict = load(input_dataloader_state_dict_file, **load_kwargs) + dataloader.load_state_dict(state_dict) + logger.info("All dataloader sampler states loaded successfully") + + # GradScaler state + if scaler is not None: + input_scaler_file = input_dir.joinpath(SCALER_NAME) + scaler_state = load(input_scaler_file) + scaler.load_state_dict(scaler_state) + logger.info("GradScaler state loaded successfully") + + # Random states + try: + states = load(input_dir.joinpath(f"{RNG_STATE_NAME}_{process_index}.pkl")) + if "step" in states: + override_attributes["step"] = states["step"] + random.setstate(states["random_state"]) + np.random.set_state(states["numpy_random_seed"]) + torch.set_rng_state(states["torch_manual_seed"]) + if is_xpu_available(): + torch.xpu.set_rng_state_all(states["torch_xpu_manual_seed"]) + if is_mlu_available(): + torch.mlu.set_rng_state_all(states["torch_mlu_manual_seed"]) + elif is_sdaa_available(): + torch.sdaa.set_rng_state_all(states["torch_sdaa_manual_seed"]) + elif is_musa_available(): + torch.musa.set_rng_state_all(states["torch_musa_manual_seed"]) + elif is_hpu_available(): + torch.hpu.set_rng_state_all(states["torch_hpu_manual_seed"]) + elif is_neuron_available(): + torch.neuron.set_rng_state_all(states["torch_neuron_manual_seed"]) + else: + torch.cuda.set_rng_state_all(states["torch_cuda_manual_seed"]) + if is_torch_xla_available(): + xm.set_rng_state(states["xm_seed"]) + logger.info("All random states loaded successfully") + except Exception: + logger.info("Could not load random states") + + return override_attributes + + +def save_custom_state(obj, path, index: int = 0, save_on_each_node: bool = False): + """ + Saves the state of `obj` to `{path}/custom_checkpoint_{index}.pkl` + """ + # Should this be the right way to get a qual_name type value from `obj`? + save_location = Path(path) / f"custom_checkpoint_{index}.pkl" + logger.info(f"Saving the state of {get_pretty_name(obj)} to {save_location}") + save(obj.state_dict(), save_location, save_on_each_node=save_on_each_node) + + +def load_custom_state(obj, path, index: int = 0): + """ + Loads the state of `obj` at `{path}/custom_checkpoint_{index}.pkl`. Will always set `weights_only=False` when + loading the state. + """ + load_location = f"{path}/custom_checkpoint_{index}.pkl" + logger.info(f"Loading the state of {get_pretty_name(obj)} from {load_location}") + obj.load_state_dict(load(load_location, map_location="cpu", weights_only=False)) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/data_loader.py b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/data_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..755ccf6f0a8c70e1972e69db2c5a8b51fbcf32ae --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/data_loader.py @@ -0,0 +1,1461 @@ +# Copyright 2021 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +import math +from contextlib import suppress +from typing import Callable, Optional, Union + +import torch +from packaging import version +from torch.utils.data import BatchSampler, DataLoader, IterableDataset, RandomSampler + +from .logging import get_logger +from .state import DistributedType, GradientState, PartialState, is_torch_xla_available +from .utils import ( + RNGType, + broadcast, + broadcast_object_list, + compare_versions, + concatenate, + find_batch_size, + get_data_structure, + initialize_tensors, + is_datasets_available, + is_torch_version, + is_torchdata_stateful_dataloader_available, + send_to_device, + slice_tensors, + synchronize_rng_states, +) + + +logger = get_logger(__name__) + +# kwargs of the DataLoader in min version 2.0 +_PYTORCH_DATALOADER_KWARGS = { + "batch_size": 1, + "shuffle": False, + "sampler": None, + "batch_sampler": None, + "num_workers": 0, + "collate_fn": None, + "pin_memory": False, + "drop_last": False, + "timeout": 0, + "worker_init_fn": None, + "multiprocessing_context": None, + "generator": None, + "prefetch_factor": 2, + "persistent_workers": False, + "pin_memory_device": "", +} + +# kwargs added after by version +_PYTORCH_DATALOADER_ADDITIONAL_KWARGS = {"2.6.0": {"in_order": True}} + +for v, additional_kwargs in _PYTORCH_DATALOADER_ADDITIONAL_KWARGS.items(): + if is_torch_version(">=", v): + _PYTORCH_DATALOADER_KWARGS.update(additional_kwargs) + + +class SeedableRandomSampler(RandomSampler): + """ + Same as a random sampler, except that in `__iter__` a seed can be used. + + Needed specifically in distributed cases, when the random generator for each GPU needs to start from the same seed + and be fully reproducible on multiple iterations. + + If a custom `generator` is passed, it will rely on its initial seed as well as the current iteration it is on + (stored in `self.epoch`). + """ + + def __init__(self, *args, **kwargs): + data_seed = kwargs.pop("data_seed", None) + super().__init__(*args, **kwargs) + + self.initial_seed = data_seed if data_seed is not None else torch.random.initial_seed() + self.epoch = 0 + + def __iter__(self): + if self.generator is None: + self.generator = torch.Generator( + device=torch.get_default_device() if hasattr(torch, "get_default_device") else "cpu" + ) + self.generator.manual_seed(self.initial_seed) + + # Allow `self.epoch` to modify the seed of the generator + seed = self.epoch + self.initial_seed + # print("Setting seed at epoch", self.epoch, seed) + self.generator.manual_seed(seed) + yield from super().__iter__() + self.set_epoch(self.epoch + 1) + + def set_epoch(self, epoch: int): + "Sets the current iteration of the sampler." + self.epoch = epoch + + +class BatchSamplerShard(BatchSampler): + """ + Wraps a PyTorch `BatchSampler` to generate batches for one of the processes only. Instances of this class will + always yield a number of batches that is a round multiple of `num_processes` and that all have the same size. + Depending on the value of the `drop_last` attribute of the batch sampler passed, it will either stop the iteration + at the first batch that would be too small / not present on all processes or loop with indices from the beginning. + + Args: + batch_sampler (`torch.utils.data.sampler.BatchSampler`): + The batch sampler to split in several shards. + num_processes (`int`, *optional*, defaults to 1): + The number of processes running concurrently. + process_index (`int`, *optional*, defaults to 0): + The index of the current process. + split_batches (`bool`, *optional*, defaults to `False`): + Whether the shards should be created by splitting a batch to give a piece of it on each process, or by + yielding different full batches on each process. + + On two processes with a sampler of `[[0, 1, 2, 3], [4, 5, 6, 7]]`, this will result in: + + - the sampler on process 0 to yield `[0, 1, 2, 3]` and the sampler on process 1 to yield `[4, 5, 6, 7]` if + this argument is set to `False`. + - the sampler on process 0 to yield `[0, 1]` then `[4, 5]` and the sampler on process 1 to yield `[2, 3]` + then `[6, 7]` if this argument is set to `True`. + even_batches (`bool`, *optional*, defaults to `True`): + Whether or not to loop back at the beginning of the sampler when the number of samples is not a round + multiple of (original batch size / number of processes). + + + + `BatchSampler`s with varying batch sizes are not enabled by default. To enable this behaviour, set `even_batches` + equal to `False` + + """ + + def __init__( + self, + batch_sampler: BatchSampler, + num_processes: int = 1, + process_index: int = 0, + split_batches: bool = False, + even_batches: bool = True, + ): + if split_batches and batch_sampler.batch_size % num_processes != 0: + raise ValueError( + f"To use `BatchSamplerShard` in `split_batches` mode, the batch size ({batch_sampler.batch_size}) " + f"needs to be a round multiple of the number of processes ({num_processes})." + ) + self.batch_sampler = batch_sampler + self.num_processes = num_processes + self.process_index = process_index + self.split_batches = split_batches + self.even_batches = even_batches + self.batch_size = getattr(batch_sampler, "batch_size", None) + self.drop_last = getattr(batch_sampler, "drop_last", False) + if self.batch_size is None and self.even_batches: + raise ValueError( + "You need to use `even_batches=False` when the batch sampler has no batch size. If you " + "are not calling this method directly, set `accelerator.even_batches=False` instead." + ) + + @property + def total_length(self): + return len(self.batch_sampler) + + def __len__(self): + if self.split_batches: + # Split batches does not change the length of the batch sampler + return len(self.batch_sampler) + if len(self.batch_sampler) % self.num_processes == 0: + # If the length is a round multiple of the number of processes, it's easy. + return len(self.batch_sampler) // self.num_processes + length = len(self.batch_sampler) // self.num_processes + if self.drop_last: + # Same if we drop the remainder. + return length + elif self.even_batches: + # When we even batches we always get +1 + return length + 1 + else: + # Otherwise it depends on the process index. + return length + 1 if self.process_index < len(self.batch_sampler) % self.num_processes else length + + def __iter__(self): + return self._iter_with_split() if self.split_batches else self._iter_with_no_split() + + def _iter_with_split(self): + initial_data = [] + batch_length = self.batch_sampler.batch_size // self.num_processes + for idx, batch in enumerate(self.batch_sampler): + if idx == 0: + initial_data = batch + if len(batch) == self.batch_size: + # If the batch is full, we yield the part of it this process is responsible of. + yield batch[batch_length * self.process_index : batch_length * (self.process_index + 1)] + + # If drop_last is True of the last batch was full, iteration is over, otherwise... + if not self.drop_last and len(initial_data) > 0 and len(batch) < self.batch_size: + if not self.even_batches: + if len(batch) > batch_length * self.process_index: + yield batch[batch_length * self.process_index : batch_length * (self.process_index + 1)] + else: + # For degenerate cases where the dataset has less than num_process * batch_size samples + while len(initial_data) < self.batch_size: + initial_data += initial_data + batch = batch + initial_data + yield batch[batch_length * self.process_index : batch_length * (self.process_index + 1)] + + def _iter_with_no_split(self): + initial_data = [] + batch_to_yield = [] + for idx, batch in enumerate(self.batch_sampler): + # We gather the initial indices in case we need to circle back at the end. + if not self.drop_last and idx < self.num_processes: + initial_data += batch + # We identify the batch to yield but wait until we ar sure every process gets a full batch before actually + # yielding it. + if idx % self.num_processes == self.process_index: + batch_to_yield = batch + if idx % self.num_processes == self.num_processes - 1 and ( + self.batch_size is None or len(batch) == self.batch_size + ): + yield batch_to_yield + batch_to_yield = [] + + # If drop_last is True, iteration is over, otherwise... + if not self.drop_last and len(initial_data) > 0: + if not self.even_batches: + if len(batch_to_yield) > 0: + yield batch_to_yield + else: + # ... we yield the complete batch we had saved before if it has the proper length + if len(batch_to_yield) == self.batch_size: + yield batch_to_yield + + # For degenerate cases where the dataset has less than num_process * batch_size samples + while len(initial_data) < self.num_processes * self.batch_size: + initial_data += initial_data + + # If the last batch seen was of the proper size, it has been yielded by its process so we move to the next + if len(batch) == self.batch_size: + batch = [] + idx += 1 + + # Make sure we yield a multiple of self.num_processes batches + cycle_index = 0 + while idx % self.num_processes != 0 or len(batch) > 0: + end_index = cycle_index + self.batch_size - len(batch) + batch += initial_data[cycle_index:end_index] + if idx % self.num_processes == self.process_index: + yield batch + cycle_index = end_index + batch = [] + idx += 1 + + +class IterableDatasetShard(IterableDataset): + """ + Wraps a PyTorch `IterableDataset` to generate samples for one of the processes only. Instances of this class will + always yield a number of samples that is a round multiple of the actual batch size (depending of the value of + `split_batches`, this is either `batch_size` or `batch_size x num_processes`). Depending on the value of the + `drop_last` attribute of the batch sampler passed, it will either stop the iteration at the first batch that would + be too small or loop with indices from the beginning. + + Args: + dataset (`torch.utils.data.dataset.IterableDataset`): + The batch sampler to split in several shards. + batch_size (`int`, *optional*, defaults to 1): + The size of the batches per shard (if `split_batches=False`) or the size of the batches (if + `split_batches=True`). + drop_last (`bool`, *optional*, defaults to `False`): + Whether or not to drop the last incomplete batch or complete the last batches by using the samples from the + beginning. + num_processes (`int`, *optional*, defaults to 1): + The number of processes running concurrently. + process_index (`int`, *optional*, defaults to 0): + The index of the current process. + split_batches (`bool`, *optional*, defaults to `False`): + Whether the shards should be created by splitting a batch to give a piece of it on each process, or by + yielding different full batches on each process. + + On two processes with an iterable dataset yielding of `[0, 1, 2, 3, 4, 5, 6, 7]`, this will result in: + + - the shard on process 0 to yield `[0, 1, 2, 3]` and the shard on process 1 to yield `[4, 5, 6, 7]` if this + argument is set to `False`. + - the shard on process 0 to yield `[0, 1, 4, 5]` and the sampler on process 1 to yield `[2, 3, 6, 7]` if + this argument is set to `True`. + """ + + def __init__( + self, + dataset: IterableDataset, + batch_size: int = 1, + drop_last: bool = False, + num_processes: int = 1, + process_index: int = 0, + split_batches: bool = False, + ): + if split_batches and batch_size > 1 and batch_size % num_processes != 0: + raise ValueError( + f"To use `IterableDatasetShard` in `split_batches` mode, the batch size ({batch_size}) " + f"needs to be a round multiple of the number of processes ({num_processes})." + ) + self.dataset: IterableDataset = dataset + self.batch_size = batch_size + self.drop_last = drop_last + self.num_processes = num_processes + self.process_index = process_index + self.split_batches = split_batches + + def set_epoch(self, epoch): + self.epoch = epoch + if hasattr(self.dataset, "set_epoch"): + self.dataset.set_epoch(epoch) + + def __len__(self): + # We will just raise the downstream error if the underlying dataset is not sized + if self.drop_last: + return (len(self.dataset) // (self.batch_size * self.num_processes)) * self.batch_size + else: + return math.ceil(len(self.dataset) / (self.batch_size * self.num_processes)) * self.batch_size + + def __iter__(self): + if ( + not hasattr(self.dataset, "set_epoch") + and hasattr(self.dataset, "generator") + and isinstance(self.dataset.generator, torch.Generator) + ): + self.dataset.generator.manual_seed(self.epoch) + real_batch_size = self.batch_size if self.split_batches else (self.batch_size * self.num_processes) + process_batch_size = (self.batch_size // self.num_processes) if self.split_batches else self.batch_size + process_slice = range(self.process_index * process_batch_size, (self.process_index + 1) * process_batch_size) + + first_batch = None + current_batch = [] + for element in self.dataset: + current_batch.append(element) + # Wait to have a full batch before yielding elements. + if len(current_batch) == real_batch_size: + for i in process_slice: + yield current_batch[i] + if first_batch is None: + first_batch = current_batch.copy() + current_batch = [] + + # Finished if drop_last is True, otherwise complete the last batch with elements from the beginning. + if not self.drop_last and len(current_batch) > 0: + if first_batch is None: + first_batch = current_batch.copy() + while len(current_batch) < real_batch_size: + current_batch += first_batch + for i in process_slice: + yield current_batch[i] + + +class DataLoaderStateMixin: + """ + Mixin class that adds a state to a `DataLoader` to keep track of the status inside the dataloader such as at the + end of the iteration, the number of items in the dataset in the last batch relative to the batch size, and other + useful information that might be needed. + + **Available attributes:** + + - **end_of_dataloader** (`bool`) -- Whether at the last iteration or batch + - **remainder** (`int`) -- The number of items that are remaining in the last batch, relative to the total + batch size + + + + Inheriters of this class should ensure that the class creates a `GradientState()` instance, stored in + `self.gradient_state`. + + + + """ + + def __init_subclass__(cls, **kwargs): + cls.end_of_dataloader = False + cls.remainder = -1 + + def reset(self): + self.end_of_dataloader = False + self.remainder = -1 + + def begin(self): + "Prepares the gradient state for the current dataloader" + self.reset() + with suppress(Exception): + if not self._drop_last: + length = getattr(self.dataset, "total_dataset_length", len(self.dataset)) + self.remainder = length % self.total_batch_size + self.gradient_state._add_dataloader(self) + + def end(self): + "Cleans up the gradient state after exiting the dataloader" + self.gradient_state._remove_dataloader(self) + + +class DataLoaderAdapter: + """ + A class which wraps around a PyTorch `DataLoader` (or variants of it) to be used with the `Accelerator`. For + compatibility reasons, this class inherits from the class it wraps around, so it can be used as a drop-in. + """ + + def __init__(self, dataset, use_stateful_dataloader=False, batch_sampler=None, **kwargs): + self.use_stateful_dataloader = use_stateful_dataloader + if is_torchdata_stateful_dataloader_available(): + from torchdata.stateful_dataloader import StatefulDataLoader + + if use_stateful_dataloader and not is_torchdata_stateful_dataloader_available(): + raise ImportError( + "StatefulDataLoader is not available. Please install torchdata version 0.8.0 or higher to use it." + ) + if use_stateful_dataloader: + torchdata_version = version.parse(importlib.metadata.version("torchdata")) + if ( + "in_order" in kwargs + and compare_versions(torchdata_version, "<", "0.11") + and is_torch_version(">=", "2.6.0") + ): + kwargs.pop("in_order") + self.base_dataloader = StatefulDataLoader(dataset, batch_sampler=batch_sampler, **kwargs) + else: + self.base_dataloader = DataLoader(dataset, batch_sampler=batch_sampler, **kwargs) + + if hasattr(self.base_dataloader, "state_dict"): + self.dl_state_dict = self.base_dataloader.state_dict() + + def __getattr__(self, name): + # Avoid infinite recursion if we try to access a nonexistent base_dataloader attribute. + if name == "base_dataloader": + raise AttributeError() + # Delegate attribute access to the internal dataloader + return getattr(self.base_dataloader, name) + + def state_dict(self): + return self.dl_state_dict + + def load_state_dict(self, state_dict): + self.base_dataloader.load_state_dict(state_dict) + + @property + def __class__(self): + """ + In order to maintain backwards compatibility with other code, we need to ensure `isinstance(obj, DataLoader)` + returns true. This is because some downstream code assumes that the `DataLoader` is the base class of the + object. + """ + return self.base_dataloader.__class__ + + def __len__(self): + return len(self.base_dataloader) + + def adjust_state_dict_for_prefetch(self): + """ + Adjusts the state dict for prefetching. Natively, this will adjust all of the iters yielded keys in + `self.dl_state_dict` by a factor of `num_processes - 1`, however if a custom correction is needed, this can be + overridden. + + This should modify `self.dl_state_dict` directly + """ + # The state dict will be off by a factor of `n-1` batch too many during DDP, + # so we need to adjust it here + if PartialState().distributed_type != DistributedType.NO: + factor = PartialState().num_processes - 1 + # When num_workers > 0, StatefulDataLoader uses _MultiProcessingDataLoaderIter + # which may not have _sampler_iter_yielded or _num_yielded in its state_dict + if "_sampler_iter_yielded" in self.dl_state_dict and self.dl_state_dict["_sampler_iter_yielded"] > 0: + self.dl_state_dict["_sampler_iter_yielded"] -= factor + if "_num_yielded" in self.dl_state_dict and self.dl_state_dict["_num_yielded"] > 0: + self.dl_state_dict["_num_yielded"] -= factor + if self.dl_state_dict.get("_index_sampler_state") is not None: + if ( + "samples_yielded" in self.dl_state_dict["_index_sampler_state"] + and self.dl_state_dict["_index_sampler_state"]["samples_yielded"] > 0 + ): + self.dl_state_dict["_index_sampler_state"]["samples_yielded"] -= self.batch_size * factor + + def _update_state_dict(self): + # The state_dict of the underlying base_dataloader may be ahead of what is currently being yielded. + # E.g. the implementation of DataLoaderShard involves having an underlying iterator 1 element ahead of + # what it wants to yield. + # + # _update_state_dict is called to snapshot the state_dict that would properly recover the DataLoaderAdapter. + if hasattr(self.base_dataloader, "state_dict"): + self.dl_state_dict = self.base_dataloader.state_dict() + # Potentially modify the state_dict to adjust for prefetching + self.adjust_state_dict_for_prefetch() + # Then tag if we are at the end of the dataloader + self.dl_state_dict["_iterator_finished"] = self.end_of_dataloader + + +class DataLoaderShard(DataLoaderAdapter, DataLoaderStateMixin): + """ + Subclass of `DataLoaderAdapter` that will deal with device placement and current distributed setup. + + Args: + dataset (`torch.utils.data.dataset.Dataset`): + The dataset to use to build this dataloader. + device (`torch.device`, *optional*): + If passed, the device to put all batches on. + rng_types (list of `str` or [`~utils.RNGType`]): + The list of random number generators to synchronize at the beginning of each iteration. Should be one or + several of: + + - `"torch"`: the base torch random number generator + - `"cuda"`: the CUDA random number generator (GPU only) + - `"xla"`: the XLA random number generator (TPU only) + - `"generator"`: an optional `torch.Generator` + synchronized_generator (`torch.Generator`, *optional*): + A random number generator to keep synchronized across processes. + skip_batches (`int`, *optional*, defaults to 0): + The number of batches to skip at the beginning. + use_stateful_dataloader (`bool`, *optional*, defaults to `False`): + Whether to have this class adapt `StatefulDataLoader` from `torchdata` instead of the regular `DataLoader`. + **kwargs (additional keyword arguments, *optional*): + All other keyword arguments to pass to the regular `DataLoader` initialization. + + **Available attributes:** + + - **total_batch_size** (`int`) -- Total batch size of the dataloader across all processes. + Equal to the original batch size when `split_batches=True`; otherwise the original batch size * the total + number of processes + + - **total_dataset_length** (`int`) -- Total length of the inner dataset across all processes. + """ + + def __init__( + self, + dataset, + device=None, + rng_types=None, + synchronized_generator=None, + skip_batches=0, + use_stateful_dataloader=False, + _drop_last: bool = False, + _non_blocking: bool = False, + torch_device_mesh=None, + **kwargs, + ): + super().__init__(dataset, use_stateful_dataloader=use_stateful_dataloader, **kwargs) + self.device = device + self.rng_types = rng_types + self.synchronized_generator = synchronized_generator + self.skip_batches = skip_batches + self.gradient_state = GradientState() + self._drop_last = _drop_last + self._non_blocking = _non_blocking + self.iteration = 0 + + def adjust_state_dict_for_prefetch(self): + # DataLoaderShard does not need the DDP prefetch adjustment that DataLoaderDispatcher needs. + # In DataLoaderShard, each process has its own sharded base dataloader and the 1-batch + # look-ahead is already accounted for by the timing of _update_state_dict() calls + # (called before the inner next(), so the captured state already equals the number of + # batches yielded to the user). + pass + + def __iter__(self): + if self.rng_types is not None: + synchronize_rng_states(self.rng_types, self.synchronized_generator) + self.begin() + + self.set_epoch(self.iteration) + dataloader_iter = self.base_dataloader.__iter__() + # We iterate one batch ahead to check when we are at the end + try: + current_batch = next(dataloader_iter) + except StopIteration: + self.end() + return + + batch_index = 0 + while True: + try: + # But we still move it to the device so it is done before `StopIteration` is reached + if self.device is not None: + current_batch = send_to_device(current_batch, self.device, non_blocking=self._non_blocking) + self._update_state_dict() + next_batch = next(dataloader_iter) + if batch_index >= self.skip_batches: + yield current_batch + batch_index += 1 + current_batch = next_batch + except StopIteration: + self.end_of_dataloader = True + self._update_state_dict() + if batch_index >= self.skip_batches: + yield current_batch + break + + self.iteration += 1 + self.end() + + def __reduce__(self): + """ + Define the `__reduce__` method to ensure a `DataLoaderShard` can be pickled and unpickled. This needs to be + explicitly defined since default pickling behavior is broken by `DataLoaderAdapter` messing with its + `__class__` member. + """ + args = super().__reduce__() + return (DataLoaderShard, *args[1:]) + + def set_epoch(self, epoch: int): + # In case it is manually passed in, the user can set it to what they like + if self.iteration != epoch: + self.iteration = epoch + if hasattr(self.batch_sampler, "set_epoch"): + self.batch_sampler.set_epoch(epoch) + if hasattr(self.batch_sampler, "sampler") and hasattr(self.batch_sampler.sampler, "set_epoch"): + self.batch_sampler.sampler.set_epoch(epoch) + if ( + hasattr(self.batch_sampler, "batch_sampler") + and hasattr(self.batch_sampler.batch_sampler, "sampler") + and hasattr(self.batch_sampler.batch_sampler.sampler, "set_epoch") + ): + self.batch_sampler.batch_sampler.sampler.set_epoch(epoch) + # We support if a custom `Dataset` implementation has `set_epoch` + # or in general HF datasets `Datasets` + elif hasattr(self.dataset, "set_epoch"): + self.dataset.set_epoch(epoch) + + @property + def total_batch_size(self): + batch_sampler = self.sampler if isinstance(self.sampler, BatchSampler) else self.batch_sampler + return ( + batch_sampler.batch_size + if getattr(batch_sampler, "split_batches", False) + else (batch_sampler.batch_size * getattr(batch_sampler, "num_processes", 1)) + ) + + @property + def total_dataset_length(self): + if hasattr(self.dataset, "total_length"): + return self.dataset.total_length + else: + return len(self.dataset) + + def get_sampler(self): + return get_sampler(self) + + def set_sampler(self, sampler): + sampler_is_batch_sampler = isinstance(self.sampler, BatchSampler) + if sampler_is_batch_sampler: + self.sampler.sampler = sampler + else: + self.batch_sampler.sampler = sampler + if hasattr(self.batch_sampler, "batch_sampler"): + self.batch_sampler.batch_sampler.sampler = sampler + + +if is_torch_xla_available(): + import torch_xla.distributed.parallel_loader as xpl + + class MpDeviceLoaderWrapper(xpl.MpDeviceLoader): + """ + Wrapper for the xpl.MpDeviceLoader class that knows the total batch size. + + XLA preloading threads will all call DataLoaderShard's __iter__(). Remove rng_types from DataLoaderShard to + prevent it from using the XLA device in the preloading threads, and synchronize the RNG once from the main + thread only. + + **Available attributes:** + + - **total_batch_size** (`int`) -- Total batch size of the dataloader across all processes. + Equal to the original batch size when `split_batches=True`; otherwise the original batch size * the total + number of processes + + - **total_dataset_length** (`int`) -- Total length of the inner dataset across all processes. + """ + + def __init__(self, dataloader: DataLoaderShard, device: torch.device): + super().__init__(dataloader, device) + self._rng_types = self._loader.rng_types + self._loader.rng_types = None + self.device = device + + def __iter__(self): + if self._rng_types is not None: + synchronize_rng_states(self._rng_types, self._loader.synchronized_generator) + + return super().__iter__() + + def set_epoch(self, epoch: int): + if hasattr(self.dataloader, "set_epoch"): + self.dataloader.set_epoch(epoch) + + @property + def total_batch_size(self): + return self._loader.total_batch_size + + @property + def total_dataset_length(self): + return self._loader.total_dataset_length + + @property + def batch_sampler(self): + return self._loader.batch_sampler + + @property + def dataloader(self): + return self._loader + + +class DataLoaderDispatcher(DataLoaderAdapter, DataLoaderStateMixin): + """ + Subclass of `DataLoaderAdapter` that will iterate and preprocess on process 0 only, then dispatch on each process + their part of the batch. + + Args: + split_batches (`bool`, *optional*, defaults to `False`): + Whether the resulting `DataLoader` should split the batches of the original data loader across devices or + yield full batches (in which case it will yield batches starting at the `process_index`-th and advancing of + `num_processes` batches at each iteration). Another way to see this is that the observed batch size will be + the same as the initial `dataloader` if this option is set to `True`, the batch size of the initial + `dataloader` multiplied by `num_processes` otherwise. Setting this option to `True` requires that the batch + size of the `dataloader` is a round multiple of `batch_size`. + skip_batches (`int`, *optional*, defaults to 0): + The number of batches to skip at the beginning of an iteration. + use_stateful_dataloader (`bool`, *optional*, defaults to `False`): + Whether to have this class adapt `StatefulDataLoader` from `torchdata` instead of the regular `DataLoader`. + + **Available attributes:** + + - **total_batch_size** (`int`) -- Total batch size of the dataloader across all processes. + Equal to the original batch size when `split_batches=True`; otherwise the original batch size * the total + number of processes + + - **total_dataset_length** (`int`) -- Total length of the inner dataset across all processes. + """ + + def __init__( + self, + dataset, + split_batches: bool = False, + skip_batches=0, + use_stateful_dataloader=False, + _drop_last: bool = False, + _non_blocking: bool = False, + slice_fn=None, + torch_device_mesh=None, + **kwargs, + ): + shuffle = False + from torch.utils.data.datapipes.iter.combinatorics import ShufflerIterDataPipe + + # We need to save the shuffling state of the DataPipe + if isinstance(dataset, ShufflerIterDataPipe): + shuffle = dataset._shuffle_enabled + super().__init__(dataset, use_stateful_dataloader=use_stateful_dataloader, **kwargs) + self.split_batches = split_batches + if shuffle: + torch.utils.data.graph_settings.apply_shuffle_settings(dataset, shuffle=shuffle) + + self.gradient_state = GradientState() + self.state = PartialState() + self._drop_last = _drop_last + self._non_blocking = _non_blocking + self.skip_batches = skip_batches + self.torch_device_mesh = torch_device_mesh + + self.slice_fn = slice_tensors if slice_fn is None else slice_fn + self.iteration = 0 + + # if a device mesh is provided extract each dimension (dp, fsdp, tp) + # device mesh may hold any number of dimensions, however, + # below code is for targeted support for dp, fsdp and tp + + # device mesh will be used only if there is tp involved + # or any multi-dimensional parallelism involving tp + # (dp, tp) (fsdp, tp) (dp, fsdp, tp) + # otherwise the default behaviour not using device mesh should be sufficient + # since multi dimensional parallelism devoid of tp would anyway need + # different batches for each process irrespective of dp or fsdp + self.submesh_tp = None + self.submesh_dp = None + self.submesh_fsdp = None + if self.torch_device_mesh and "tp" in self.torch_device_mesh.mesh_dim_names: + self.submesh_tp = self.torch_device_mesh["tp"] + if "dp" in self.torch_device_mesh.mesh_dim_names: + self.submesh_dp = self.torch_device_mesh["dp"] + if "fsdp" in self.torch_device_mesh.mesh_dim_names: + self.submesh_fsdp = self.torch_device_mesh["fsdp"] + if self.submesh_tp and (self.submesh_dp or self.submesh_fsdp): + raise ValueError("TP + (DP/FSDP) is not yet supported in dispatch mode") + + def _fetch_batches(self, iterator): + batches, batch = None, None + # On process 0, we gather the batch to dispatch. + if self.state.process_index == 0: + # Procedure to support TP only is simpler + # since we want to dispatch the same batch of samples across all ranks + # this removes complexity of handling multiple tp rank groups when TP + DP + # combination is involved. + + try: + # for TP case avoid using split_batches + # since it would mean that the dataloader should be spilling out + # duplicates of batches. + if self.split_batches: + # One batch of the main iterator is dispatched and split. + if self.submesh_tp: + logger.warning( + "Use of split_batches for TP would need the dataloader to produce duplicate batches," + "otherwise, use dispatch_batches=True instead." + ) + self._update_state_dict() + batch = next(iterator) + else: + # num_processes batches of the main iterator are concatenated then dispatched and split. + # We add the batches one by one so we have the remainder available when drop_last=False. + batches = [] + if self.submesh_tp: + # when tp, extract single batch and then replicate + self._update_state_dict() + batch = next(iterator) + batches = [batch] * self.state.num_processes + else: + for _ in range(self.state.num_processes): + self._update_state_dict() + batches.append(next(iterator)) + try: + batch = concatenate(batches, dim=0) + except RuntimeError as e: + raise RuntimeError( + "You can't use batches of different size with `dispatch_batches=True` or when using an `IterableDataset`." + "either pass `dispatch_batches=False` and have each process fetch its own batch " + " or pass `split_batches=True`. By doing so, the main process will fetch a full batch and " + "slice it into `num_processes` batches for each process." + ) from e + # In both cases, we need to get the structure of the batch that we will broadcast on other + # processes to initialize the tensors with the right shape. + # data_structure, stop_iteration + batch_info = [get_data_structure(batch), False] + except StopIteration: + batch_info = [None, True] + else: + batch_info = [None, self._stop_iteration] + # This is inplace, so after this instruction, every process has the same `batch_info` as process 0. + broadcast_object_list(batch_info) + self._stop_iteration = batch_info[1] + if self._stop_iteration: + # If drop_last is False and split_batches is False, we may have a remainder to take care of. + if not self.split_batches and not self._drop_last: + if self.state.process_index == 0 and len(batches) > 0: + batch = concatenate(batches, dim=0) + batch_info = [get_data_structure(batch), False] + else: + batch_info = [None, True] + broadcast_object_list(batch_info) + return batch, batch_info + + def __iter__(self): + self.begin() + self.set_epoch(self.iteration) + main_iterator = None + if is_torch_version(">=", "2.0.1"): + # NOTE PyTorch DataLoader adds forward compatibilities for DataPipes, which broadcasts + # shared seed to all dist processes. Thus, we need to create iterator for all dist processes. + # But, we only iterate through the DataLoader on process 0. + main_iterator = self.base_dataloader.__iter__() + elif self.state.process_index == 0: + main_iterator = self.base_dataloader.__iter__() + stop_iteration = False + self._stop_iteration = False + first_batch = None + next_batch, next_batch_info = self._fetch_batches(main_iterator) + batch_index = 0 + while not stop_iteration: + batch, batch_info = next_batch, next_batch_info + + if self.state.process_index != 0: + # Initialize tensors on other processes than process 0. + batch = initialize_tensors(batch_info[0]) + batch = send_to_device(batch, self.state.device, non_blocking=self._non_blocking) + # Broadcast the batch before splitting it. + batch = broadcast(batch, from_process=0) + + if not self._drop_last and first_batch is None: + # We keep at least num processes elements of the first batch to be able to complete the last batch + first_batch = self.slice_fn( + batch, + slice(0, self.state.num_processes), + process_index=self.state.process_index, + num_processes=self.state.num_processes, + ) + + if batch is None: + raise ValueError( + f"Batch does not contain any data (`{batch}`). At the end of all iterable data available before expected stop iteration." + ) + + observed_batch_size = find_batch_size(batch) + batch_size = observed_batch_size // self.state.num_processes + + stop_iteration = self._stop_iteration + if not stop_iteration: + # We may still be at the end of the dataloader without knowing it yet: if there is nothing left in + # the dataloader since the number of batches is a round multiple of the number of processes. + next_batch, next_batch_info = self._fetch_batches(main_iterator) + # next_batch_info[0] is None when there are no more batches, otherwise we still need to process them. + if self._stop_iteration and next_batch_info[0] is None: + stop_iteration = True + + if not self._drop_last and stop_iteration and observed_batch_size % self.state.num_processes != 0: + # If the last batch is not complete, let's add the first batch to it. + batch = concatenate([batch, first_batch], dim=0) + # Batch size computation above is wrong, it's off by 1 so we fix it. + batch_size += 1 + + data_slice = slice(self.state.process_index * batch_size, (self.state.process_index + 1) * batch_size) + batch = self.slice_fn( + batch, + data_slice, + process_index=self.state.process_index, + num_processes=self.state.num_processes, + ) + + if stop_iteration: + self.end_of_dataloader = True + self._update_state_dict() + self.remainder = observed_batch_size + if batch_index >= self.skip_batches: + yield batch + batch_index += 1 + self.iteration += 1 + self.end() + + def set_epoch(self, epoch: int): + # In case it is manually passed in, the user can set it to what they like + if self.iteration != epoch: + self.iteration = epoch + if hasattr(self.batch_sampler, "sampler") and hasattr(self.batch_sampler.sampler, "set_epoch"): + self.batch_sampler.sampler.set_epoch(epoch) + elif hasattr(self.dataset, "set_epoch"): + self.dataset.set_epoch(epoch) + + def __len__(self): + whole_length = len(self.base_dataloader) + if self.split_batches: + return whole_length + elif self._drop_last: + return whole_length // self.state.num_processes + else: + return math.ceil(whole_length / self.state.num_processes) + + def __reduce__(self): + """ + Define the `__reduce__` method to ensure a `DataLoaderDispatcher` can be pickled and unpickled. This needs to + be explicitly defined since default pickling behavior is broken by `DataLoaderAdapter` messing with its + `__class__` member. + """ + args = super().__reduce__() + return (DataLoaderDispatcher, *args[1:]) + + @property + def total_batch_size(self): + return ( + self.dataset.batch_size if self.split_batches else (self.dataset.batch_size * self.dataset.num_processes) + ) + + @property + def total_dataset_length(self): + return len(self.dataset) + + def get_sampler(self): + return get_sampler(self) + + def set_sampler(self, sampler): + sampler_is_batch_sampler = isinstance(self.sampler, BatchSampler) + if sampler_is_batch_sampler: + self.sampler.sampler = sampler + else: + self.batch_sampler.sampler = sampler + if hasattr(self.batch_sampler, "batch_sampler"): + self.batch_sampler.batch_sampler.sampler = sampler + + +def get_sampler(dataloader): + """ + Get the sampler associated to the dataloader + + Args: + dataloader (`torch.utils.data.dataloader.DataLoader`): + The data loader to split across several devices. + Returns: + `torch.utils.data.Sampler`: The sampler associated to the dataloader + """ + sampler_is_batch_sampler = isinstance(dataloader.sampler, BatchSampler) + if sampler_is_batch_sampler: + sampler = getattr(dataloader.sampler, "sampler", None) + else: + sampler = getattr(dataloader.batch_sampler, "sampler", None) + return sampler + + +def prepare_data_loader( + dataloader: DataLoader, + device: Optional[torch.device] = None, + num_processes: Optional[int] = None, + process_index: Optional[int] = None, + split_batches: bool = False, + put_on_device: bool = False, + rng_types: Optional[list[Union[str, RNGType]]] = None, + dispatch_batches: Optional[bool] = None, + even_batches: bool = True, + slice_fn_for_dispatch: Optional[Callable] = None, + use_seedable_sampler: bool = False, + data_seed: Optional[int] = None, + non_blocking: bool = False, + use_stateful_dataloader: bool = False, + torch_device_mesh=None, +) -> DataLoader: + """ + Wraps a PyTorch `DataLoader` to generate batches for one of the processes only. + + Depending on the value of the `drop_last` attribute of the `dataloader` passed, it will either stop the iteration + at the first batch that would be too small / not present on all processes or loop with indices from the beginning. + + Args: + dataloader (`torch.utils.data.dataloader.DataLoader`): + The data loader to split across several devices. + device (`torch.device`): + The target device for the returned `DataLoader`. + num_processes (`int`, *optional*): + The number of processes running concurrently. Will default to the value given by [`~state.PartialState`]. + process_index (`int`, *optional*): + The index of the current process. Will default to the value given by [`~state.PartialState`]. + split_batches (`bool`, *optional*, defaults to `False`): + Whether the resulting `DataLoader` should split the batches of the original data loader across devices or + yield full batches (in which case it will yield batches starting at the `process_index`-th and advancing of + `num_processes` batches at each iteration). + + Another way to see this is that the observed batch size will be the same as the initial `dataloader` if + this option is set to `True`, the batch size of the initial `dataloader` multiplied by `num_processes` + otherwise. + + Setting this option to `True` requires that the batch size of the `dataloader` is a round multiple of + `batch_size`. + put_on_device (`bool`, *optional*, defaults to `False`): + Whether or not to put the batches on `device` (only works if the batches are nested list, tuples or + dictionaries of tensors). + rng_types (list of `str` or [`~utils.RNGType`]): + The list of random number generators to synchronize at the beginning of each iteration. Should be one or + several of: + + - `"torch"`: the base torch random number generator + - `"cuda"`: the CUDA random number generator (GPU only) + - `"xla"`: the XLA random number generator (TPU only) + - `"generator"`: the `torch.Generator` of the sampler (or batch sampler if there is no sampler in your + dataloader) or of the iterable dataset (if it exists) if the underlying dataset is of that type. + + dispatch_batches (`bool`, *optional*): + If set to `True`, the dataloader prepared is only iterated through on the main process and then the batches + are split and broadcast to each process. Will default to `True` when the underlying dataset is an + `IterableDataset`, `False` otherwise. + even_batches (`bool`, *optional*, defaults to `True`): + If set to `True`, in cases where the total batch size across all processes does not exactly divide the + dataset, samples at the start of the dataset will be duplicated so the batch can be divided equally among + all workers. + slice_fn_for_dispatch (`Callable`, *optional*`): + If passed, this function will be used to slice tensors across `num_processes`. Will default to + [`~utils.slice_tensors`]. This argument is used only when `dispatch_batches` is set to `True` and will be + ignored otherwise. + use_seedable_sampler (`bool`, *optional*, defaults to `False`): + Whether to use the [`~data_loader.SeedableRandomSampler`] instead of a `RandomSampler` for better + reproducibility. Comes at a cost of potentially different performances due to different shuffling + algorithms but ensures results will be the *exact* same. Should be paired with `set_seed()` at every + `self.set_epoch` + data_seed (`int`, *optional*, defaults to `None`): + The seed to use for the underlying generator when using `use_seedable_sampler`. If `None`, the generator + will use the current default seed from torch. + non_blocking (`bool`, *optional*, defaults to `False`): + If set to `True`, dataloader will utilize non-blocking host-to-device transfers. If the dataloader has + `pin_memory` set to `True`, this will help to increase overlap between data transfer and computations. + use_stateful_dataloader (`bool`, *optional*, defaults to `False`): + "If set to true, the dataloader prepared by the Accelerator will be backed by " + "[torchdata.StatefulDataLoader](https://github.com/pytorch/data/tree/main/torchdata/stateful_dataloader). + This requires `torchdata` version 0.8.0 or higher that supports StatefulDataLoader to be installed." + torch_device_mesh (`torch.distributed.DeviceMesh`, *optional*, defaults to `None`): + PyTorch device mesh. + + + Returns: + `torch.utils.data.dataloader.DataLoader`: A new data loader that will yield the portion of the batches + + + + `BatchSampler`s with varying batch sizes are not enabled by default. To enable this behaviour, set `even_batches` + equal to `False` + + + """ + if dispatch_batches is None: + if not put_on_device: + dispatch_batches = False + else: + dispatch_batches = isinstance(dataloader.dataset, IterableDataset) + + if dispatch_batches and not put_on_device: + raise ValueError("Using `dispatch_batches=True` requires `put_on_device=True`.") + # Grab defaults from PartialState + state = PartialState() + if num_processes is None: + num_processes = state.num_processes + + if process_index is None: + process_index = state.process_index + + if torch_device_mesh: + if state.distributed_type == DistributedType.DEEPSPEED: + # In DeepSpeed, the optimizer sharing level in DP is determined by the config file. + # Only considers "dp" and "tp". + # Given a device mesh (dp, tp) = (2, 3): + # - From the data parallel perspective, ranks should be structured as: 0 0 0 1 1 1 + # - Processes with the same DP rank will receive the same batch. + submesh_tp_size = 1 + if "tp" in torch_device_mesh.mesh_dim_names: + submesh_tp_size = torch_device_mesh["tp"].size() + process_index = process_index // submesh_tp_size + num_processes = num_processes // submesh_tp_size + else: + # when device mesh is used, specifically with TP + # then there is need to update process_index and num_processes + # to bring in the effect of generating same batch across TP ranks + # and different batch across FSDP and DP ranks. + # Example: + # if device mesh is (dp,fsdp,tp) = (2, 2, 3) + # ranks would range from 0...11 + # from data angle ranks should look like 0 0 0 1 1 1 2 2 2 3 3 3 + # processes with same ranks/ids would receive the same batch + # for CP the same as TP applies + submesh_fsdp_size = 1 + submesh_dp_size = 1 + submesh_tp_size = 1 + submesh_cp_size = 1 + if "tp" in torch_device_mesh.mesh_dim_names: + submesh_tp_size = torch_device_mesh["tp"].size() + if "cp" in torch_device_mesh.mesh_dim_names: + submesh_cp_size = torch_device_mesh["cp"].size() + if "dp_replicate" in torch_device_mesh.mesh_dim_names: + submesh_dp_size = torch_device_mesh["dp_replicate"].size() + if "dp_shard" in torch_device_mesh.mesh_dim_names: + submesh_fsdp_size = torch_device_mesh["dp_shard"].size() + process_index = process_index // (submesh_tp_size * submesh_cp_size) + num_processes = submesh_fsdp_size * submesh_dp_size + + # Sanity check + if split_batches: + if dataloader.batch_size is not None: + batch_size_for_check = dataloader.batch_size + else: + # For custom batch_sampler + if hasattr(dataloader.batch_sampler, "batch_size"): + batch_size_for_check = dataloader.batch_sampler.batch_size + else: + raise ValueError( + "In order to use `split_batches==True` you must have a `batch_size` attribute either in the passed " + "`dataloader` or `dataloader.batch_sampler` objects, and it has to return a natural number. " + "Your `dataloader.batch_size` is None and `dataloader.batch_sampler` " + f"(`{type(dataloader.batch_sampler)}`) does not have the `batch_size` attribute set." + ) + + if batch_size_for_check > 1 and batch_size_for_check % num_processes != 0: + raise ValueError( + f"To use a `DataLoader` in `split_batches` mode, the batch size ({dataloader.batch_size}) " + f"needs to be a round multiple of the number of processes ({num_processes})." + ) + + new_dataset = dataloader.dataset + # Iterable dataset doesn't like batch_sampler, but data_loader creates a default one for it + new_batch_sampler = dataloader.batch_sampler if not isinstance(new_dataset, IterableDataset) else None + sampler_is_batch_sampler = isinstance(dataloader.sampler, BatchSampler) + synchronized_generator = None + + sampler = get_sampler(dataloader) + if isinstance(sampler, RandomSampler) and use_seedable_sampler: + # When iterating through the dataloader during distributed processes + # we want to ensure that on each process we are iterating through the same + # samples in the same order if a seed is set. This requires a tweak + # to the `torch.utils.data.RandomSampler` class (if used). + sampler = SeedableRandomSampler( + data_source=sampler.data_source, + replacement=sampler.replacement, + num_samples=sampler._num_samples, + generator=getattr( + sampler, + "generator", + torch.Generator(device=torch.get_default_device() if hasattr(torch, "get_default_device") else "cpu"), + ), + data_seed=data_seed, + ) + + if isinstance(dataloader.sampler, RandomSampler) and state.distributed_type == DistributedType.XLA: + # isinstance(dataloader.sampler, RandomSampler) indicates the original dataloader has `shuffle` enabled. + generator = torch.Generator( + device=torch.get_default_device() if hasattr(torch, "get_default_device") else "cpu" + ) + seed = int(torch.empty((), dtype=torch.int64).random_().item()) + generator.manual_seed(seed) + dataloader.generator = generator + dataloader.sampler.generator = generator + # No change if no multiprocess + if (num_processes != 1 or state.distributed_type == DistributedType.MEGATRON_LM) and not dispatch_batches: + if is_datasets_available(): + from datasets import IterableDataset as DatasetsIterableDataset + if ( + is_datasets_available() + and isinstance(new_dataset, DatasetsIterableDataset) + and not split_batches + and new_dataset.n_shards > num_processes + ): + new_dataset = new_dataset.shard(num_shards=num_processes, index=process_index) + elif isinstance(new_dataset, IterableDataset): + if getattr(dataloader.dataset, "generator", None) is not None: + synchronized_generator = dataloader.dataset.generator + new_dataset = IterableDatasetShard( + new_dataset, + batch_size=dataloader.batch_size, + drop_last=dataloader.drop_last, + num_processes=num_processes, + process_index=process_index, + split_batches=split_batches, + ) + else: + if not use_seedable_sampler and hasattr(sampler, "generator"): + if sampler.generator is None: + sampler.generator = torch.Generator( + device=torch.get_default_device() if hasattr(torch, "get_default_device") else "cpu" + ) + seed = int(torch.empty((), dtype=torch.int64).random_().item()) + sampler.generator.manual_seed(seed) + synchronized_generator = sampler.generator + batch_sampler = dataloader.sampler if sampler_is_batch_sampler else dataloader.batch_sampler + new_batch_sampler = BatchSamplerShard( + batch_sampler, + num_processes=num_processes, + process_index=process_index, + split_batches=split_batches, + even_batches=even_batches, + ) + + # We ignore all of those since they are all dealt with by our new_batch_sampler + ignore_kwargs = [ + "batch_size", + "shuffle", + "sampler", + "batch_sampler", + "drop_last", + ] + + if rng_types is not None and synchronized_generator is None and "generator" in rng_types: + rng_types.remove("generator") + + kwargs = { + k: getattr(dataloader, k, _PYTORCH_DATALOADER_KWARGS[k]) + for k in _PYTORCH_DATALOADER_KWARGS + if k not in ignore_kwargs + } + + # Need to provide batch_size as batch_sampler is None for Iterable dataset + if new_batch_sampler is None: + kwargs["drop_last"] = dataloader.drop_last + kwargs["batch_size"] = ( + dataloader.batch_size // num_processes if split_batches and not dispatch_batches else dataloader.batch_size + ) + if dispatch_batches: + kwargs.pop("generator") + dataloader = DataLoaderDispatcher( + new_dataset, + split_batches=split_batches, + batch_sampler=new_batch_sampler, + _drop_last=dataloader.drop_last, + _non_blocking=non_blocking, + slice_fn=slice_fn_for_dispatch, + use_stateful_dataloader=use_stateful_dataloader, + torch_device_mesh=torch_device_mesh, + **kwargs, + ) + elif sampler_is_batch_sampler: + dataloader = DataLoaderShard( + new_dataset, + device=device if put_on_device and state.distributed_type != DistributedType.XLA else None, + sampler=new_batch_sampler, + batch_size=dataloader.batch_size, + rng_types=rng_types, + _drop_last=dataloader.drop_last, + _non_blocking=non_blocking, + synchronized_generator=synchronized_generator, + use_stateful_dataloader=use_stateful_dataloader, + **kwargs, + ) + else: + dataloader = DataLoaderShard( + new_dataset, + device=device if put_on_device and state.distributed_type != DistributedType.XLA else None, + batch_sampler=new_batch_sampler, + rng_types=rng_types, + synchronized_generator=synchronized_generator, + _drop_last=dataloader.drop_last, + _non_blocking=non_blocking, + use_stateful_dataloader=use_stateful_dataloader, + **kwargs, + ) + + if isinstance(sampler, SeedableRandomSampler) and use_seedable_sampler: + dataloader.set_sampler(sampler) + if state.distributed_type == DistributedType.XLA: + return MpDeviceLoaderWrapper(dataloader, device) + return dataloader + + +class SkipBatchSampler(BatchSampler): + """ + A `torch.utils.data.BatchSampler` that skips the first `n` batches of another `torch.utils.data.BatchSampler`. + Should not be used if the original dataloader is a `StatefulDataLoader`. + """ + + def __init__(self, batch_sampler, skip_batches=0): + self.batch_sampler = batch_sampler + self.skip_batches = skip_batches + + def __iter__(self): + for index, samples in enumerate(self.batch_sampler): + if index >= self.skip_batches: + yield samples + + @property + def total_length(self): + return len(self.batch_sampler) + + def __len__(self): + return len(self.batch_sampler) - self.skip_batches + + +class SkipDataLoader(DataLoaderAdapter, DataLoaderStateMixin): + """ + Subclass of a PyTorch `DataLoader` that will skip the first batches. Generally it's preferable to use + `skip_first_batches`/`torchdata.StatefulDataLoader` instead of this class. + + Args: + dataset (`torch.utils.data.dataset.Dataset`): + The dataset to use to build this dataloader. + skip_batches (`int`, *optional*, defaults to 0): + The number of batches to skip at the beginning. + kwargs: + All other keyword arguments to pass to the regular `DataLoader` initialization. + """ + + def __init__(self, dataset, skip_batches=0, use_stateful_dataloader=False, **kwargs): + super().__init__(dataset, use_stateful_dataloader=use_stateful_dataloader, **kwargs) + self.skip_batches = skip_batches + self.gradient_state = GradientState() + + def __iter__(self): + self.begin() + for index, batch in enumerate(self.base_dataloader.__iter__()): + if index >= self.skip_batches: + self._update_state_dict() + yield batch + self.end() + + def __len__(self): + return len(self.base_dataloader) - self.skip_batches + + def __reduce__(self): + """ + Define the `__reduce__` method to ensure a `SkipDataLoader` can be pickled and unpickled. This needs to be + explicitly defined since default pickling behavior is broken by `DataLoaderAdapter` messing with its + `__class__` member. + """ + args = super().__reduce__() + return (SkipDataLoader, *args[1:]) + + +def skip_first_batches(dataloader, num_batches=0): + """ + Creates a `torch.utils.data.DataLoader` that will efficiently skip the first `num_batches`. Should not be used if + the original dataloader is a `StatefulDataLoader`. + """ + state = PartialState() + if state.distributed_type == DistributedType.XLA: + device = dataloader.device + dataloader = dataloader.dataloader + + dataset = dataloader.dataset + sampler_is_batch_sampler = False + if isinstance(dataset, IterableDataset): + new_batch_sampler = None + else: + sampler_is_batch_sampler = isinstance(dataloader.sampler, BatchSampler) + batch_sampler = dataloader.sampler if sampler_is_batch_sampler else dataloader.batch_sampler + new_batch_sampler = SkipBatchSampler(batch_sampler, skip_batches=num_batches) + + # We ignore all of those since they are all dealt with by our new_batch_sampler + ignore_kwargs = [ + "batch_size", + "shuffle", + "sampler", + "batch_sampler", + "drop_last", + ] + + kwargs = { + k: getattr(dataloader, k, _PYTORCH_DATALOADER_KWARGS[k]) + for k in _PYTORCH_DATALOADER_KWARGS + if k not in ignore_kwargs + } + + # Need to provide batch_size as batch_sampler is None for Iterable dataset + if new_batch_sampler is None: + kwargs["drop_last"] = dataloader.drop_last + kwargs["batch_size"] = dataloader.batch_size + + if isinstance(dataloader, DataLoaderDispatcher): + if new_batch_sampler is None: + # Need to manually skip batches in the dataloader + kwargs["skip_batches"] = num_batches + dataloader = DataLoaderDispatcher( + dataset, + split_batches=dataloader.split_batches, + batch_sampler=new_batch_sampler, + _drop_last=dataloader._drop_last, + **kwargs, + ) + elif isinstance(dataloader, DataLoaderShard): + if new_batch_sampler is None: + # Need to manually skip batches in the dataloader + kwargs["skip_batches"] = num_batches + elif sampler_is_batch_sampler: + kwargs["sampler"] = new_batch_sampler + kwargs["batch_size"] = dataloader.batch_size + else: + kwargs["batch_sampler"] = new_batch_sampler + dataloader = DataLoaderShard( + dataset, + device=dataloader.device, + rng_types=dataloader.rng_types, + synchronized_generator=dataloader.synchronized_generator, + **kwargs, + ) + else: + if new_batch_sampler is None: + # Need to manually skip batches in the dataloader + dataloader = SkipDataLoader(dataset, skip_batches=num_batches, **kwargs) + else: + dataloader = DataLoader(dataset, batch_sampler=new_batch_sampler, **kwargs) + + if state.distributed_type == DistributedType.XLA: + dataloader = MpDeviceLoaderWrapper(dataloader, device) + + return dataloader diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/hooks.py b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/hooks.py new file mode 100644 index 0000000000000000000000000000000000000000..6b28642b30f0ce65f8a3c0702b826eae34e2d950 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/hooks.py @@ -0,0 +1,798 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +from collections.abc import Mapping +from typing import Optional, Union + +import torch +import torch.nn as nn + +from .state import PartialState +from .utils import ( + PrefixedDataset, + find_device, + named_module_tensors, + send_to_device, + set_module_tensor_to_device, +) +from .utils.imports import ( + is_mlu_available, + is_musa_available, + is_npu_available, +) +from .utils.memory import clear_device_cache +from .utils.modeling import get_non_persistent_buffers +from .utils.other import recursive_getattr + + +def _compiler_disable(fn): + """ + Lazy version of `torch.compiler.disable` that avoids importing `torch._dynamo` at decoration time. + `torch.compiler.disable` eagerly imports `torch._dynamo` which adds ~4s to import time. + """ + + @functools.wraps(fn) + def wrapper(*args, **kwargs): + if not hasattr(wrapper, "_compiled_fn"): + wrapper._compiled_fn = torch.compiler.disable(fn) + return wrapper._compiled_fn(*args, **kwargs) + + return wrapper + + +_accelerate_added_attributes = ["to", "cuda", "npu", "xpu", "mlu", "sdaa", "musa"] + + +class ModelHook: + """ + A hook that contains callbacks to be executed just before and after the forward method of a model. The difference + with PyTorch existing hooks is that they get passed along the kwargs. + + Class attribute: + - **no_grad** (`bool`, *optional*, defaults to `False`) -- Whether or not to execute the actual forward pass under + the `torch.no_grad()` context manager. + """ + + no_grad = False + + def init_hook(self, module): + """ + To be executed when the hook is attached to the module. + + Args: + module (`torch.nn.Module`): The module attached to this hook. + """ + return module + + def pre_forward(self, module, *args, **kwargs): + """ + To be executed just before the forward method of the model. + + Args: + module (`torch.nn.Module`): The module whose forward pass will be executed just after this event. + args (`Tuple[Any]`): The positional arguments passed to the module. + kwargs (`Dict[Str, Any]`): The keyword arguments passed to the module. + + Returns: + `Tuple[Tuple[Any], Dict[Str, Any]]`: A tuple with the treated `args` and `kwargs`. + """ + return args, kwargs + + def post_forward(self, module, output): + """ + To be executed just after the forward method of the model. + + Args: + module (`torch.nn.Module`): The module whose forward pass been executed just before this event. + output (`Any`): The output of the module. + + Returns: + `Any`: The processed `output`. + """ + return output + + def detach_hook(self, module): + """ + To be executed when the hook is detached from a module. + + Args: + module (`torch.nn.Module`): The module detached from this hook. + """ + return module + + +class SequentialHook(ModelHook): + """ + A hook that can contain several hooks and iterates through them at each event. + """ + + def __init__(self, *hooks): + self.hooks = hooks + + def init_hook(self, module): + for hook in self.hooks: + module = hook.init_hook(module) + return module + + @_compiler_disable + def pre_forward(self, module, *args, **kwargs): + for hook in self.hooks: + args, kwargs = hook.pre_forward(module, *args, **kwargs) + return args, kwargs + + @_compiler_disable + def post_forward(self, module, output): + for hook in self.hooks: + output = hook.post_forward(module, output) + return output + + def detach_hook(self, module): + for hook in self.hooks: + module = hook.detach_hook(module) + return module + + +def add_hook_to_module(module: nn.Module, hook: ModelHook, append: bool = False): + """ + Adds a hook to a given module. This will rewrite the `forward` method of the module to include the hook, to remove + this behavior and restore the original `forward` method, use `remove_hook_from_module`. + + + + If the module already contains a hook, this will replace it with the new hook passed by default. To chain two hooks + together, pass `append=True`, so it chains the current and new hook into an instance of the `SequentialHook` class. + + + + Args: + module (`torch.nn.Module`): + The module to attach a hook to. + hook (`ModelHook`): + The hook to attach. + append (`bool`, *optional*, defaults to `False`): + Whether the hook should be chained with an existing one (if module already contains a hook) or not. + + Returns: + `torch.nn.Module`: The same module, with the hook attached (the module is modified in place, so the result can + be discarded). + """ + if append and (getattr(module, "_hf_hook", None) is not None): + old_hook = module._hf_hook + remove_hook_from_module(module) + hook = SequentialHook(old_hook, hook) + + if hasattr(module, "_hf_hook") and hasattr(module, "_old_forward"): + # If we already put some hook on this module, we replace it with the new one. + old_forward = module._old_forward + else: + old_forward = module.forward + module._old_forward = old_forward + + module = hook.init_hook(module) + module._hf_hook = hook + + def new_forward(module, *args, **kwargs): + args, kwargs = module._hf_hook.pre_forward(module, *args, **kwargs) + if module._hf_hook.no_grad: + with torch.no_grad(): + output = module._old_forward(*args, **kwargs) + else: + output = module._old_forward(*args, **kwargs) + return module._hf_hook.post_forward(module, output) + + # Overriding a GraphModuleImpl forward freezes the forward call and later modifications on the graph will fail. + # Reference: https://pytorch.slack.com/archives/C3PDTEV8E/p1705929610405409 + if "GraphModuleImpl" in str(type(module)): + module.__class__.forward = functools.update_wrapper(functools.partial(new_forward, module), old_forward) + else: + module.forward = functools.update_wrapper(functools.partial(new_forward, module), old_forward) + + return module + + +def remove_hook_from_module(module: nn.Module, recurse=False): + """ + Removes any hook attached to a module via `add_hook_to_module`. + + Args: + module (`torch.nn.Module`): The module to attach a hook to. + recurse (`bool`, **optional**): Whether to remove the hooks recursively + + Returns: + `torch.nn.Module`: The same module, with the hook detached (the module is modified in place, so the result can + be discarded). + """ + + if hasattr(module, "_hf_hook"): + module._hf_hook.detach_hook(module) + delattr(module, "_hf_hook") + + if hasattr(module, "_old_forward"): + # Overriding a GraphModuleImpl forward freezes the forward call and later modifications on the graph will fail. + # Reference: https://pytorch.slack.com/archives/C3PDTEV8E/p1705929610405409 + if "GraphModuleImpl" in str(type(module)): + module.__class__.forward = module._old_forward + else: + module.forward = module._old_forward + delattr(module, "_old_forward") + + # Remove accelerate added warning hooks from dispatch_model + for attr in _accelerate_added_attributes: + module.__dict__.pop(attr, None) + + if recurse: + for child in module.children(): + remove_hook_from_module(child, recurse) + + return module + + +class AlignDevicesHook(ModelHook): + """ + A generic `ModelHook` that ensures inputs and model weights are on the same device for the forward pass of the + associated module, potentially offloading the weights after the forward pass. + + Args: + execution_device (`torch.device`, *optional*): + The device on which inputs and model weights should be placed before the forward pass. + offload (`bool`, *optional*, defaults to `False`): + Whether or not the weights should be offloaded after the forward pass. + io_same_device (`bool`, *optional*, defaults to `False`): + Whether or not the output should be placed on the same device as the input was. + weights_map (`Mapping[str, torch.Tensor]`, *optional*): + When the model weights are offloaded, a (potentially lazy) map from param names to the tensor values. + offload_buffers (`bool`, *optional*, defaults to `False`): + Whether or not to include the associated module's buffers when offloading. + place_submodules (`bool`, *optional*, defaults to `False`): + Whether to place the submodules on `execution_device` during the `init_hook` event. + """ + + def __init__( + self, + execution_device: Optional[Union[int, str, torch.device]] = None, + offload: bool = False, + io_same_device: bool = False, + weights_map: Optional[Mapping] = None, + offload_buffers: bool = False, + place_submodules: bool = False, + skip_keys: Optional[Union[str, list[str]]] = None, + tied_params_map: Optional[dict[int, dict[torch.device, torch.Tensor]]] = None, + ): + self.execution_device = execution_device + self.offload = offload + self.io_same_device = io_same_device + self.weights_map = weights_map + self.offload_buffers = offload_buffers + self.place_submodules = place_submodules + self.skip_keys = skip_keys + + # Will contain the input device when `io_same_device=True`. + self.input_device = None + self.param_original_devices = {} + self.buffer_original_devices = {} + self.tied_params_names = set() + + # The hook pre_forward/post_forward need to have knowledge of this dictionary, as with offloading we want to avoid duplicating memory + # for tied weights already loaded on the target execution device. + self.tied_params_map = tied_params_map + + def __repr__(self): + return ( + f"AlignDevicesHook(execution_device={self.execution_device}, offload={self.offload}, " + f"io_same_device={self.io_same_device}, offload_buffers={self.offload_buffers}, " + f"place_submodules={self.place_submodules}, skip_keys={repr(self.skip_keys)})" + ) + + def init_hook(self, module): + # In case the AlignDevicesHook is on meta device, ignore tied weights as data_ptr() is then always zero. + if self.execution_device == "meta" or self.execution_device == torch.device("meta"): + self.tied_params_map = None + + if not self.offload and self.execution_device is not None: + for name, _ in named_module_tensors(module, recurse=self.place_submodules): + set_module_tensor_to_device(module, name, self.execution_device, tied_params_map=self.tied_params_map) + elif self.offload: + self.original_devices = { + name: param.device for name, param in named_module_tensors(module, recurse=self.place_submodules) + } + if self.weights_map is None: + self.weights_map = { + name: param.to("cpu") + for name, param in named_module_tensors( + module, include_buffers=self.offload_buffers, recurse=self.place_submodules + ) + } + for name, _ in named_module_tensors( + module, include_buffers=self.offload_buffers, recurse=self.place_submodules, remove_non_persistent=True + ): + # When using disk offloading, we can not rely on `weights_map[name].data_ptr()` as the reference pointer, + # as we have no guarantee that safetensors' `file.get_tensor()` will always give the same pointer. + # As we have no reliable way to track the shared data pointer of tied weights in this case, we use tied_params_names: List[str] + # to add on the fly pointers to `tied_params_map` in the pre_forward call. + if ( + self.tied_params_map is not None + and recursive_getattr(module, name).data_ptr() in self.tied_params_map + ): + self.tied_params_names.add(name) + + set_module_tensor_to_device(module, name, "meta") + + if not self.offload_buffers and self.execution_device is not None: + for name, _ in module.named_buffers(recurse=self.place_submodules): + set_module_tensor_to_device( + module, name, self.execution_device, tied_params_map=self.tied_params_map + ) + elif self.offload_buffers and self.execution_device is not None: + for name in get_non_persistent_buffers(module, recurse=self.place_submodules): + set_module_tensor_to_device( + module, name, self.execution_device, tied_params_map=self.tied_params_map + ) + + return module + + @_compiler_disable + def pre_forward(self, module, *args, **kwargs): + if self.io_same_device: + self.input_device = find_device([args, kwargs]) + if self.offload: + self.tied_pointers_to_remove = set() + + for name, _ in named_module_tensors( + module, + include_buffers=self.offload_buffers, + recurse=self.place_submodules, + remove_non_persistent=True, + ): + fp16_statistics = None + value = self.weights_map[name] + if "weight" in name and name.replace("weight", "SCB") in self.weights_map.keys(): + if value.dtype == torch.int8: + fp16_statistics = self.weights_map[name.replace("weight", "SCB")] + + # In case we are using offloading with tied weights, we need to keep track of the offloaded weights + # that are loaded on device at this point, as we will need to remove them as well from the dictionary + # self.tied_params_map in order to allow to free memory. + if name in self.tied_params_names and value.data_ptr() not in self.tied_params_map: + self.tied_params_map[value.data_ptr()] = {} + + if ( + value is not None + and self.tied_params_map is not None + and value.data_ptr() in self.tied_params_map + and self.execution_device not in self.tied_params_map[value.data_ptr()] + ): + self.tied_pointers_to_remove.add((value.data_ptr(), self.execution_device)) + + set_module_tensor_to_device( + module, + name, + self.execution_device, + value=value, + fp16_statistics=fp16_statistics, + tied_params_map=self.tied_params_map, + ) + + return send_to_device(args, self.execution_device), send_to_device( + kwargs, self.execution_device, skip_keys=self.skip_keys + ) + + @_compiler_disable + def post_forward(self, module, output): + if self.offload: + for name, _ in named_module_tensors( + module, + include_buffers=self.offload_buffers, + recurse=self.place_submodules, + remove_non_persistent=True, + ): + set_module_tensor_to_device(module, name, "meta") + if type(module).__name__ == "Linear8bitLt": + module.state.SCB = None + module.state.CxB = None + + # We may have loaded tied weights into self.tied_params_map (avoiding to load them several times in e.g. submodules): remove them from + # this dictionary to allow the garbage collector to do its job. + for value_pointer, device in self.tied_pointers_to_remove: + if isinstance(device, int): + if is_npu_available(): + device = f"npu:{device}" + elif is_mlu_available(): + device = f"mlu:{device}" + elif is_musa_available(): + device = f"musa:{device}" + if device in self.tied_params_map[value_pointer]: + del self.tied_params_map[value_pointer][device] + self.tied_pointers_to_remove = set() + if self.io_same_device and self.input_device is not None: + output = send_to_device(output, self.input_device, skip_keys=self.skip_keys) + + return output + + def detach_hook(self, module): + if self.offload: + for name, device in self.original_devices.items(): + if device != torch.device("meta"): + set_module_tensor_to_device(module, name, device, value=self.weights_map.get(name, None)) + return module + + +def attach_execution_device_hook( + module: torch.nn.Module, + execution_device: Union[int, str, torch.device], + skip_keys: Optional[Union[str, list[str]]] = None, + preload_module_classes: Optional[list[str]] = None, + tied_params_map: Optional[dict[int, dict[torch.device, torch.Tensor]]] = None, +): + """ + Recursively attaches `AlignDevicesHook` to all submodules of a given model to make sure they have the right + execution device + + Args: + module (`torch.nn.Module`): + The module where we want to attach the hooks. + execution_device (`int`, `str` or `torch.device`): + The device on which inputs and model weights should be placed before the forward pass. + skip_keys (`str` or `List[str]`, *optional*): + A list of keys to ignore when moving inputs or outputs between devices. + preload_module_classes (`List[str]`, *optional*): + A list of classes whose instances should load all their weights (even in the submodules) at the beginning + of the forward. This should only be used for classes that have submodules which are registered but not + called directly during the forward, for instance if a `dense` linear layer is registered, but at forward, + `dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly. + tied_params_map (Optional[Dict[int, Dict[torch.device, torch.Tensor]]], *optional*, defaults to `None`): + A map of data pointers to dictionaries of devices to already dispatched tied weights. For a given execution + device, this parameter is useful to reuse the first available pointer of a shared weight for all others, + instead of duplicating memory. + """ + if not hasattr(module, "_hf_hook") and len(module.state_dict()) > 0: + add_hook_to_module( + module, + AlignDevicesHook(execution_device, skip_keys=skip_keys, tied_params_map=tied_params_map), + ) + + # Break the recursion if we get to a preload module. + if preload_module_classes is not None and module.__class__.__name__ in preload_module_classes: + return + + for child in module.children(): + attach_execution_device_hook( + child, + execution_device, + skip_keys=skip_keys, + preload_module_classes=preload_module_classes, + tied_params_map=tied_params_map, + ) + + +def attach_align_device_hook( + module: torch.nn.Module, + execution_device: Optional[torch.device] = None, + offload: bool = False, + weights_map: Optional[Mapping] = None, + offload_buffers: bool = False, + module_name: str = "", + skip_keys: Optional[Union[str, list[str]]] = None, + preload_module_classes: Optional[list[str]] = None, + tied_params_map: Optional[dict[int, dict[torch.device, torch.Tensor]]] = None, +): + """ + Recursively attaches `AlignDevicesHook` to all submodules of a given model that have direct parameters and/or + buffers. + + Args: + module (`torch.nn.Module`): + The module where we want to attach the hooks. + execution_device (`torch.device`, *optional*): + The device on which inputs and model weights should be placed before the forward pass. + offload (`bool`, *optional*, defaults to `False`): + Whether or not the weights should be offloaded after the forward pass. + weights_map (`Mapping[str, torch.Tensor]`, *optional*): + When the model weights are offloaded, a (potentially lazy) map from param names to the tensor values. + offload_buffers (`bool`, *optional*, defaults to `False`): + Whether or not to include the associated module's buffers when offloading. + module_name (`str`, *optional*, defaults to `""`): + The name of the module. + skip_keys (`str` or `List[str]`, *optional*): + A list of keys to ignore when moving inputs or outputs between devices. + preload_module_classes (`List[str]`, *optional*): + A list of classes whose instances should load all their weights (even in the submodules) at the beginning + of the forward. This should only be used for classes that have submodules which are registered but not + called directly during the forward, for instance if a `dense` linear layer is registered, but at forward, + `dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly. + tied_params_map (Optional[Dict[int, Dict[torch.device, torch.Tensor]]], *optional*, defaults to `None`): + A map of data pointers to dictionaries of devices to already dispatched tied weights. For a given execution + device, this parameter is useful to reuse the first available pointer of a shared weight for all others, + instead of duplicating memory. + """ + # Attach the hook on this module if it has any direct tensor. + directs = named_module_tensors(module) + full_offload = ( + offload and preload_module_classes is not None and module.__class__.__name__ in preload_module_classes + ) + + if len(list(directs)) > 0 or full_offload: + if weights_map is not None: + prefix = f"{module_name}." if len(module_name) > 0 else "" + prefixed_weights_map = PrefixedDataset(weights_map, prefix) + else: + prefixed_weights_map = None + hook = AlignDevicesHook( + execution_device=execution_device, + offload=offload, + weights_map=prefixed_weights_map, + offload_buffers=offload_buffers, + place_submodules=full_offload, + skip_keys=skip_keys, + tied_params_map=tied_params_map, + ) + add_hook_to_module(module, hook, append=True) + + # We stop the recursion in case we hit the full offload. + if full_offload: + return + + # Recurse on all children of the module. + for child_name, child in module.named_children(): + child_name = f"{module_name}.{child_name}" if len(module_name) > 0 else child_name + attach_align_device_hook( + child, + execution_device=execution_device, + offload=offload, + weights_map=weights_map, + offload_buffers=offload_buffers, + module_name=child_name, + preload_module_classes=preload_module_classes, + skip_keys=skip_keys, + tied_params_map=tied_params_map, + ) + + +def remove_hook_from_submodules(module: nn.Module): + """ + Recursively removes all hooks attached on the submodules of a given model. + + Args: + module (`torch.nn.Module`): The module on which to remove all hooks. + """ + remove_hook_from_module(module) + for child in module.children(): + remove_hook_from_submodules(child) + + +def attach_align_device_hook_on_blocks( + module: nn.Module, + execution_device: Optional[Union[torch.device, dict[str, torch.device]]] = None, + offload: Union[bool, dict[str, bool]] = False, + weights_map: Optional[Mapping] = None, + offload_buffers: bool = False, + module_name: str = "", + skip_keys: Optional[Union[str, list[str]]] = None, + preload_module_classes: Optional[list[str]] = None, + tied_params_map: Optional[dict[int, dict[torch.device, torch.Tensor]]] = None, +): + """ + Attaches `AlignDevicesHook` to all blocks of a given model as needed. + + Args: + module (`torch.nn.Module`): + The module where we want to attach the hooks. + execution_device (`torch.device` or `Dict[str, torch.device]`, *optional*): + The device on which inputs and model weights should be placed before the forward pass. It can be one device + for the whole module, or a dictionary mapping module name to device. + offload (`bool`, *optional*, defaults to `False`): + Whether or not the weights should be offloaded after the forward pass. It can be one boolean for the whole + module, or a dictionary mapping module name to boolean. + weights_map (`Mapping[str, torch.Tensor]`, *optional*): + When the model weights are offloaded, a (potentially lazy) map from param names to the tensor values. + offload_buffers (`bool`, *optional*, defaults to `False`): + Whether or not to include the associated module's buffers when offloading. + module_name (`str`, *optional*, defaults to `""`): + The name of the module. + skip_keys (`str` or `List[str]`, *optional*): + A list of keys to ignore when moving inputs or outputs between devices. + preload_module_classes (`List[str]`, *optional*): + A list of classes whose instances should load all their weights (even in the submodules) at the beginning + of the forward. This should only be used for classes that have submodules which are registered but not + called directly during the forward, for instance if a `dense` linear layer is registered, but at forward, + `dense.weight` and `dense.bias` are used in some operations instead of calling `dense` directly. + tied_params_map (Optional[Dict[int, Dict[torch.device, torch.Tensor]]], *optional*, defaults to `None`): + A map of data pointers to dictionaries of devices to already dispatched tied weights. For a given execution + device, this parameter is useful to reuse the first available pointer of a shared weight for all others, + instead of duplicating memory. + """ + # If one device and one offload, we've got one hook. + if not isinstance(execution_device, Mapping) and not isinstance(offload, dict): + if not offload: + hook = AlignDevicesHook( + execution_device=execution_device, + io_same_device=True, + skip_keys=skip_keys, + place_submodules=True, + tied_params_map=tied_params_map, + ) + add_hook_to_module(module, hook) + else: + attach_align_device_hook( + module, + execution_device=execution_device, + offload=True, + weights_map=weights_map, + offload_buffers=offload_buffers, + module_name=module_name, + skip_keys=skip_keys, + tied_params_map=tied_params_map, + ) + return + + if not isinstance(execution_device, Mapping): + execution_device = {key: execution_device for key in offload.keys()} + if not isinstance(offload, Mapping): + offload = {key: offload for key in execution_device.keys()} + + if module_name in execution_device and module_name in offload and not offload[module_name]: + hook = AlignDevicesHook( + execution_device=execution_device[module_name], + offload_buffers=offload_buffers, + io_same_device=(module_name == ""), + place_submodules=True, + skip_keys=skip_keys, + tied_params_map=tied_params_map, + ) + add_hook_to_module(module, hook) + attach_execution_device_hook( + module, execution_device[module_name], skip_keys=skip_keys, tied_params_map=tied_params_map + ) + elif module_name in execution_device and module_name in offload: + attach_align_device_hook( + module, + execution_device=execution_device[module_name], + offload=True, + weights_map=weights_map, + offload_buffers=offload_buffers, + module_name=module_name, + skip_keys=skip_keys, + preload_module_classes=preload_module_classes, + tied_params_map=tied_params_map, + ) + if not hasattr(module, "_hf_hook"): + hook = AlignDevicesHook( + execution_device=execution_device[module_name], + io_same_device=(module_name == ""), + skip_keys=skip_keys, + tied_params_map=tied_params_map, + ) + add_hook_to_module(module, hook) + attach_execution_device_hook( + module, + execution_device[module_name], + preload_module_classes=preload_module_classes, + skip_keys=skip_keys, + tied_params_map=tied_params_map, + ) + elif module_name == "": + hook = AlignDevicesHook( + execution_device=execution_device.get(""), + io_same_device=True, + skip_keys=skip_keys, + tied_params_map=tied_params_map, + ) + add_hook_to_module(module, hook) + + for child_name, child in module.named_children(): + child_name = f"{module_name}.{child_name}" if len(module_name) > 0 else child_name + attach_align_device_hook_on_blocks( + child, + execution_device=execution_device, + offload=offload, + weights_map=weights_map, + offload_buffers=offload_buffers, + module_name=child_name, + preload_module_classes=preload_module_classes, + skip_keys=skip_keys, + tied_params_map=tied_params_map, + ) + + +class CpuOffload(ModelHook): + """ + Offloads a model on the CPU until its forward pass is called. The model will not be offloaded back to the CPU after + the forward, the user needs to call the `init_hook` method again for this. + + Args: + execution_device(`str`, `int` or `torch.device`, *optional*): + The device on which the model should be executed. Will default to the MPS device if it's available, then + GPU 0 if there is a GPU, and finally to the CPU. + prev_module_hook (`UserCpuOffloadHook`, *optional*): + The hook sent back by [`cpu_offload_with_hook`] for a previous model in the pipeline you are running. If + passed, its offload method will be called just before the forward of the model to which this hook is + attached. + """ + + def __init__( + self, + execution_device: Optional[Union[str, int, torch.device]] = None, + prev_module_hook: Optional["UserCpuOffloadHook"] = None, + ): + self.prev_module_hook = prev_module_hook + + self.execution_device = execution_device if execution_device is not None else PartialState().default_device + + def init_hook(self, module): + return module.to("cpu") + + @_compiler_disable + def pre_forward(self, module, *args, **kwargs): + if self.prev_module_hook is not None and isinstance(self.prev_module_hook, UserCpuOffloadHook): + prev_module = self.prev_module_hook.model + prev_device = next(prev_module.parameters()).device + + # Only offload the previous module if it is not already on CPU. + if prev_device != torch.device("cpu"): + self.prev_module_hook.offload() + clear_device_cache() + + # If the current device is already the self.execution_device, we can skip the transfer. + current_device = next(module.parameters()).device + if current_device == self.execution_device: + return args, kwargs + + module.to(self.execution_device) + return send_to_device(args, self.execution_device), send_to_device(kwargs, self.execution_device) + + +class UserCpuOffloadHook: + """ + A simple hook grouping a model and a `ModelHook`, which provides easy APIs for to call the init method of the hook + or remove it entirely. + """ + + def __init__(self, model, hook): + self.model = model + self.hook = hook + + def offload(self): + self.hook.init_hook(self.model) + + def remove(self): + remove_hook_from_module(self.model) + + +class LayerwiseCastingHook(ModelHook): + r""" + A hook that casts the weights of a module to a high precision dtype for computation, and to a low precision dtype + for storage. This process may lead to quality loss in the output, but can significantly reduce the memory + footprint. + """ + + _is_stateful = False + + def __init__(self, storage_dtype: torch.dtype, compute_dtype: torch.dtype, non_blocking: bool) -> None: + self.storage_dtype = storage_dtype + self.compute_dtype = compute_dtype + self.non_blocking = non_blocking + + def init_hook(self, module: torch.nn.Module): + module.to(dtype=self.storage_dtype, non_blocking=self.non_blocking) + return module + + @_compiler_disable + def pre_forward(self, module: torch.nn.Module, *args, **kwargs): + module.to(dtype=self.compute_dtype, non_blocking=self.non_blocking) + return args, kwargs + + @_compiler_disable + def post_forward(self, module: torch.nn.Module, output): + module.to(dtype=self.storage_dtype, non_blocking=self.non_blocking) + return output diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/inference.py b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..35c99399baba62f77bbaac8adf4e2eacdbabf779 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/inference.py @@ -0,0 +1,186 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from types import MethodType +from typing import Any, Optional, Union + +from .state import PartialState +from .utils import ( + calculate_maximum_sizes, + convert_bytes, + copy_tensor_to_devices, + ignorant_find_batch_size, + infer_auto_device_map, + is_pippy_available, + pad_input_tensors, + send_to_device, +) + + +def generate_device_map( + model, num_processes: int = 1, no_split_module_classes=None, max_memory: Optional[dict] = None +): + """ + Calculates the device map for `model` with an offset for PiPPy + """ + if num_processes == 1: + return infer_auto_device_map(model, no_split_module_classes=no_split_module_classes, clean_result=False) + if max_memory is None: + model_size, shared = calculate_maximum_sizes(model) + + # Split into `n` chunks for each GPU + memory = (model_size + shared[0]) / num_processes + memory = convert_bytes(memory) + value, ending = memory.split(" ") + + # Add a chunk to deal with potential extra shared memory instances + memory = math.ceil(float(value)) * 1.1 + memory = f"{memory} {ending}" + max_memory = {i: memory for i in range(num_processes)} + device_map = infer_auto_device_map( + model, + max_memory=max_memory, + no_split_module_classes=no_split_module_classes, + clean_result=False, + ) + return device_map + + +def find_pippy_batch_size(args, kwargs): + found_batch_size = None + if args is not None: + for arg in args: + found_batch_size = ignorant_find_batch_size(arg) + if found_batch_size is not None: + break + if kwargs is not None and found_batch_size is None: + for kwarg in kwargs.values(): + found_batch_size = ignorant_find_batch_size(kwarg) + if found_batch_size is not None: + break + return found_batch_size + + +def build_pipeline(model, split_points, args, kwargs, num_chunks): + """ + Attaches the split points to the model based on `self.device_map` and generates a `PipelineStage`. Requires passing + in needed `args` and `kwargs` as the model needs on the CPU. + + Users can pass in custom `num_chunks` as an optional hyper-parameter. By default will use + `AcceleratorState.num_processes` + """ + # Note: We import here to reduce import time from general modules, and isolate outside dependencies + from torch.distributed.pipelining import ScheduleGPipe, SplitPoint, pipeline + + # We need to annotate the split points in the model for PiPPy + state = PartialState() + split_spec = {split_point: SplitPoint.BEGINNING for split_point in split_points} + pipe = pipeline( + model, + mb_args=args, + mb_kwargs=kwargs, + split_spec=split_spec, + ) + stage = pipe.build_stage(state.local_process_index, device=state.device) + schedule = ScheduleGPipe(stage, num_chunks) + + return schedule + + +def pippy_forward(forward, num_chunks, gather_output, *args, **kwargs): + state = PartialState() + output = None + + if state.num_processes == 1: + output = forward(*args, **kwargs) + elif state.is_local_main_process: + found_batch_size = find_pippy_batch_size(args, kwargs) + if found_batch_size is None: + raise ValueError("Could not find batch size from args or kwargs") + else: + if found_batch_size != num_chunks: + args = pad_input_tensors(args, found_batch_size, num_chunks) + kwargs = pad_input_tensors(kwargs, found_batch_size, num_chunks) + forward(*args, **kwargs) + elif state.is_last_process: + output = forward() + else: + forward() + if gather_output: + # Each node will get a copy of the full output which is only on the last GPU + output = copy_tensor_to_devices(output) + return output + + +def prepare_pippy( + model, + split_points: Optional[Union[str, list[str]]] = "auto", + no_split_module_classes: Optional[list[str]] = None, + example_args: Optional[tuple[Any]] = (), + example_kwargs: Optional[dict[str, Any]] = None, + num_chunks: Optional[int] = None, + gather_output: Optional[bool] = False, +): + """ + Wraps `model` for pipeline parallel inference. + + Args: + model (`torch.nn.Module`): + A model we want to split for pipeline-parallel inference + split_points (`str` or `List[str]`, defaults to 'auto'): + How to generate the split points and chunk the model across each GPU. 'auto' will find the best balanced + split given any model. Should be a list of layer names in the model to split by otherwise. + no_split_module_classes (`List[str]`): + A list of class names for layers we don't want to be split. + example_args (tuple of model inputs): + The expected inputs for the model that uses order-based inputs for a *single process*. Recommended to use + this method if possible. + example_kwargs (dict of model inputs) + The expected inputs for the model that uses dictionary-based inputs for a *single process*. This is a + *highly* limiting structure that requires the same keys be present at *all* inference calls. Not + recommended unless the prior condition is true for all cases. + num_chunks (`int`, defaults to the number of available GPUs): + The number of different stages the Pipeline will have. By default it will assign one chunk per GPU, but + this can be tuned and played with. In general one should have num_chunks >= num_gpus. + gather_output (`bool`, defaults to `False`): + If `True`, the output from the last GPU (which holds the true outputs) is sent across to all GPUs. + """ + if not is_pippy_available(): + raise ImportError("Using `torch.distributed.pipelining` requires PyTorch 2.4.0 or later.") + state = PartialState() + example_args = send_to_device(example_args, "cpu") + example_kwargs = send_to_device(example_kwargs, "cpu") + if num_chunks is None: + num_chunks = state.num_processes + if split_points == "auto": + device_map = generate_device_map(model, num_chunks, no_split_module_classes=no_split_module_classes) + split_points = [] + for i in range(1, num_chunks): + split_points.append(next(k for k, v in device_map.items() if v == i)) + model.hf_split_points = split_points + stage = build_pipeline(model, split_points, example_args, example_kwargs, num_chunks) + model._original_forward = model.forward + model._original_call = model.__call__ + model.pippy_stage = stage + model.hf_split_points = split_points + + def forward(*args, **kwargs): + return pippy_forward(stage.step, num_chunks, gather_output, *args, **kwargs) + + # To act like a decorator so that it can be popped when doing `extract_model_from_parallel` + # Note: creates an infinite recursion loop with `generate` + model_forward = MethodType(forward, model) + forward.__wrapped__ = model_forward + model.forward = forward + return model diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/launchers.py b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/launchers.py new file mode 100644 index 0000000000000000000000000000000000000000..f2a2e833b65a0bee861f9dd7afc4f96776f03471 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/launchers.py @@ -0,0 +1,309 @@ +# Copyright 2021 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import tempfile + +import torch + +from .state import AcceleratorState, PartialState +from .utils import ( + PrecisionType, + PrepareForLaunch, + are_libraries_initialized, + check_cuda_p2p_ib_support, + get_current_device_type, + get_gpu_info, + is_mps_available, + is_torch_version, + patch_environment, +) +from .utils.constants import ELASTIC_LOG_LINE_PREFIX_TEMPLATE_PYTORCH_VERSION + + +def test_launch(): + "Verify a `PartialState` can be initialized." + _ = PartialState() + + +def notebook_launcher( + function, + args=(), + num_processes=None, + mixed_precision="no", + use_port="29500", + master_addr="127.0.0.1", + node_rank=0, + num_nodes=1, + rdzv_backend="static", + rdzv_endpoint="", + rdzv_conf=None, + rdzv_id="none", + max_restarts=0, + monitor_interval=0.1, + log_line_prefix_template=None, +): + """ + Launches a training function, using several processes or multiple nodes if it's possible in the current environment + (TPU with multiple cores for instance). + + + + To use this function absolutely zero calls to a device must be made in the notebook session before calling. If any + have been made, you will need to restart the notebook and make sure no cells use any device capability. + + Setting `ACCELERATE_DEBUG_MODE="1"` in your environment will run a test before truly launching to ensure that none + of those calls have been made. + + + + Args: + function (`Callable`): + The training function to execute. If it accepts arguments, the first argument should be the index of the + process run. + args (`Tuple`): + Tuple of arguments to pass to the function (it will receive `*args`). + num_processes (`int`, *optional*): + The number of processes to use for training. Will default to 8 in Colab/Kaggle if a TPU is available, to + the number of devices available otherwise. + mixed_precision (`str`, *optional*, defaults to `"no"`): + If `fp16` or `bf16`, will use mixed precision training on multi-device. + use_port (`str`, *optional*, defaults to `"29500"`): + The port to use to communicate between processes when launching a multi-device training. + master_addr (`str`, *optional*, defaults to `"127.0.0.1"`): + The address to use for communication between processes. + node_rank (`int`, *optional*, defaults to 0): + The rank of the current node. + num_nodes (`int`, *optional*, defaults to 1): + The number of nodes to use for training. + rdzv_backend (`str`, *optional*, defaults to `"static"`): + The rendezvous method to use, such as 'static' (the default) or 'c10d' + rdzv_endpoint (`str`, *optional*, defaults to `""`): + The endpoint of the rdzv sync. storage. + rdzv_conf (`Dict`, *optional*, defaults to `None`): + Additional rendezvous configuration. + rdzv_id (`str`, *optional*, defaults to `"none"`): + The unique run id of the job. + max_restarts (`int`, *optional*, defaults to 0): + The maximum amount of restarts that elastic agent will conduct on workers before failure. + monitor_interval (`float`, *optional*, defaults to 0.1): + The interval in seconds that is used by the elastic_agent as a period of monitoring workers. + log_line_prefix_template (`str`, *optional*, defaults to `None`): + The prefix template for elastic launch logging. Available from PyTorch 2.2.0. + + Example: + + ```python + # Assume this is defined in a Jupyter Notebook on an instance with two devices + from accelerate import notebook_launcher + + + def train(*args): + # Your training function here + ... + + + notebook_launcher(train, args=(arg1, arg2), num_processes=2, mixed_precision="fp16") + ``` + """ + # Are we in a google colab or a Kaggle Kernel? + in_colab = False + in_kaggle = False + if any(key.startswith("KAGGLE") for key in os.environ.keys()): + in_kaggle = True + elif "IPython" in sys.modules: + in_colab = "google.colab" in str(sys.modules["IPython"].get_ipython()) + + try: + mixed_precision = PrecisionType(mixed_precision.lower()) + except ValueError: + raise ValueError( + f"Unknown mixed_precision mode: {args.mixed_precision.lower()}. Choose between {PrecisionType.list()}." + ) + + if (in_colab or in_kaggle) and ( + (os.environ.get("TPU_NAME", None) is not None) or (os.environ.get("PJRT_DEVICE", "") == "TPU") + ): + # TPU launch + import torch_xla.distributed.xla_multiprocessing as xmp + + if len(AcceleratorState._shared_state) > 0: + raise ValueError( + "To train on TPU in Colab or Kaggle Kernel, the `Accelerator` should only be initialized inside " + "your training function. Restart your notebook and make sure no cells initializes an " + "`Accelerator`." + ) + + launcher = PrepareForLaunch(function, distributed_type="XLA") + print("Launching a training on TPU cores.") + xmp.spawn(launcher, args=args, start_method="fork") + elif in_colab and (not torch.cuda.is_available() or get_gpu_info()[1] < 2): + # No need for a distributed launch otherwise as it's either CPU or one GPU. + if torch.cuda.is_available(): + print("Launching training on one GPU.") + else: + print("Launching training on one CPU.") + function(*args) + else: + if num_processes is None: + raise ValueError( + "You have to specify the number of devices you would like to use, add `num_processes=...` to your call." + ) + if node_rank >= num_nodes: + raise ValueError("The node_rank must be less than the number of nodes.") + if num_processes > 1: + # Multi-device launch + from torch.distributed.launcher.api import LaunchConfig, elastic_launch + from torch.multiprocessing import start_processes + from torch.multiprocessing.spawn import ProcessRaisedException + + if len(AcceleratorState._shared_state) > 0: + raise ValueError( + "To launch a multi-device training from your notebook, the `Accelerator` should only be initialized " + "inside your training function. Restart your notebook and make sure no cells initializes an " + "`Accelerator`." + ) + # Check for specific libraries known to initialize device that users constantly use + problematic_imports = are_libraries_initialized("bitsandbytes") + if len(problematic_imports) > 0: + err = ( + "Could not start distributed process. Libraries known to initialize device upon import have been " + "imported already. Please keep these imports inside your training function to try and help with this:" + ) + for lib_name in problematic_imports: + err += f"\n\t* `{lib_name}`" + raise RuntimeError(err) + + patched_env = dict( + nproc=num_processes, + node_rank=node_rank, + world_size=num_nodes * num_processes, + master_addr=master_addr, + master_port=use_port, + mixed_precision=mixed_precision, + ) + + # Check for CUDA P2P and IB issues + if not check_cuda_p2p_ib_support(): + patched_env["nccl_p2p_disable"] = "1" + patched_env["nccl_ib_disable"] = "1" + + # torch.distributed will expect a few environment variable to be here. We set the ones common to each + # process here (the other ones will be set be the launcher). + with patch_environment(**patched_env): + # First dummy launch + # Determine device type without initializing any device (which would break fork) + device_type, distributed_type = get_current_device_type() + # XPU requires spawn instead of fork + start_method = "spawn" if device_type == "xpu" else "fork" + if os.environ.get("ACCELERATE_DEBUG_MODE", "false").lower() == "true": + launcher = PrepareForLaunch(test_launch, distributed_type=distributed_type) + try: + start_processes(launcher, args=(), nprocs=num_processes, start_method=start_method) + except ProcessRaisedException as e: + err = "An issue was found when verifying a stable environment for the notebook launcher." + if f"Cannot re-initialize {device_type.upper()} in forked subprocess" in e.args[0]: + raise RuntimeError( + f"{err}" + "This likely stems from an outside import causing issues once the `notebook_launcher()` is called. " + "Please review your imports and test them when running the `notebook_launcher()` to identify " + f"which one is problematic and causing {device_type.upper()} to be initialized." + ) from e + else: + raise RuntimeError(f"{err} The following error was raised: {e}") from e + # Now the actual launch + launcher = PrepareForLaunch(function, distributed_type=distributed_type) + print(f"Launching training on {num_processes} {device_type.upper()}s.") + try: + if rdzv_conf is None: + rdzv_conf = {} + if rdzv_backend == "static": + rdzv_conf["rank"] = node_rank + if not rdzv_endpoint: + rdzv_endpoint = f"{master_addr}:{use_port}" + launch_config_kwargs = dict( + min_nodes=num_nodes, + max_nodes=num_nodes, + nproc_per_node=num_processes, + run_id=rdzv_id, + rdzv_endpoint=rdzv_endpoint, + rdzv_backend=rdzv_backend, + rdzv_configs=rdzv_conf, + max_restarts=max_restarts, + monitor_interval=monitor_interval, + start_method=start_method, + ) + if is_torch_version(">=", ELASTIC_LOG_LINE_PREFIX_TEMPLATE_PYTORCH_VERSION): + launch_config_kwargs["log_line_prefix_template"] = log_line_prefix_template + elastic_launch(config=LaunchConfig(**launch_config_kwargs), entrypoint=function)(*args) + except ProcessRaisedException as e: + if f"Cannot re-initialize {device_type.upper()} in forked subprocess" in e.args[0]: + raise RuntimeError( + f"{device_type.upper()} has been initialized before the `notebook_launcher` could create a forked subprocess. " + "This likely stems from an outside import causing issues once the `notebook_launcher()` is called. " + "Please review your imports and test them when running the `notebook_launcher()` to identify " + f"which one is problematic and causing {device_type.upper()} to be initialized." + ) from e + else: + raise RuntimeError(f"An issue was found when launching the training: {e}") from e + + else: + # No need for a distributed launch otherwise as it's either CPU, GPU, XPU or MPS. + if is_mps_available(): + os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" + print("Launching training on MPS.") + elif torch.cuda.is_available(): + print("Launching training on one GPU.") + elif torch.xpu.is_available(): + print("Launching training on one XPU.") + else: + print("Launching training on CPU.") + function(*args) + + +def debug_launcher(function, args=(), num_processes=2): + """ + Launches a training function using several processes on CPU for debugging purposes. + + + + This function is provided for internal testing and debugging, but it's not intended for real trainings. It will + only use the CPU. + + + + Args: + function (`Callable`): + The training function to execute. + args (`Tuple`): + Tuple of arguments to pass to the function (it will receive `*args`). + num_processes (`int`, *optional*, defaults to 2): + The number of processes to use for training. + """ + from torch.multiprocessing import start_processes + + with tempfile.NamedTemporaryFile() as tmp_file: + # torch.distributed will expect a few environment variable to be here. We set the ones common to each + # process here (the other ones will be set be the launcher). + with patch_environment( + world_size=num_processes, + master_addr="127.0.0.1", + master_port="29500", + accelerate_mixed_precision="no", + accelerate_debug_rdv_file=tmp_file.name, + accelerate_use_cpu="yes", + ): + launcher = PrepareForLaunch(function, debug=True) + start_processes(launcher, args=args, nprocs=num_processes, start_method="fork") diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/local_sgd.py b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/local_sgd.py new file mode 100644 index 0000000000000000000000000000000000000000..0ebac2b2c0b89a781c35fc8d9bc3971ed58d50fd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/local_sgd.py @@ -0,0 +1,107 @@ +# Copyright 2023 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + +from accelerate import Accelerator, DistributedType + + +class LocalSGD: + """ + A helper class to support local SGD on top of Accelerator. It simply runs a given number of updates independently + on each device, and averages model weights every K synchronization step. + + It should be used only in the multi-GPU (or multi-CPU) setup without extensions such as DeepSpeed. In particular, + this is a simple implementation that cannot support scenarios such as model parallelism. + + + Although we are not aware of the true origins of this simple approach, the idea of local SGD is quite old and goes + back to at least: + + Zhang, J., De Sa, C., Mitliagkas, I., & Ré, C. (2016). [Parallel SGD: When does averaging help?. arXiv preprint + arXiv:1606.07365.](https://huggingface.co/papers/1606.07365) + + We credit the term Local SGD to the following paper (but there might be earlier references we are not aware of). + + Stich, Sebastian Urban. ["Local SGD Converges Fast and Communicates Little." ICLR 2019-International Conference on + Learning Representations. No. CONF. 2019.](https://huggingface.co/papers/1805.09767) + + """ + + def __enter__(self): + if self.enabled: + self.model_sync_obj = self.model.no_sync() + self.model_sync_obj.__enter__() + + return self + + def __exit__(self, type, value, tb): + if self.enabled: + # Average all models on exit + self._sync_and_avg_model_params() + self.model_sync_obj.__exit__(type, value, tb) + + def __init__(self, accelerator: Accelerator, model: torch.nn.Module, local_sgd_steps: int, enabled: bool = True): + """ + Constructor. + + Args: + model (`torch.nn.Module): + The model whose parameters we need to average. + accelerator (`Accelerator`): + Accelerator object. + local_sgd_steps (`int`): + A number of local SGD steps (before model parameters are synchronized). + enabled (`bool): + Local SGD is disabled if this parameter set to `False`. + """ + if accelerator.distributed_type not in [ + DistributedType.NO, + DistributedType.MULTI_CPU, + DistributedType.MULTI_GPU, + DistributedType.MULTI_XPU, + DistributedType.MULTI_MLU, + DistributedType.MULTI_HPU, + DistributedType.MULTI_SDAA, + DistributedType.MULTI_MUSA, + DistributedType.MULTI_NPU, + DistributedType.MULTI_NEURON, + ]: + raise NotImplementedError("LocalSGD is supported only for CPUs and GPUs (no DeepSpeed or MegatronLM)") + self.enabled = enabled and accelerator.distributed_type != DistributedType.NO + self.num_steps = 0 + if self.enabled: + self.accelerator = accelerator + self.model = model + self.local_sgd_steps = local_sgd_steps + + def step(self): + """ + This function makes a "step" and synchronizes model parameters if necessary. + """ + self.num_steps += 1 + if not self.enabled: + return + + if self.num_steps % self.local_sgd_steps == 0: + self._sync_and_avg_model_params() + + def _sync_and_avg_model_params(self): + """ + Synchronize + Average model parameters across all GPUs + """ + + self.accelerator.wait_for_everyone() + with self.accelerator.autocast(): + for param in self.model.parameters(): + param.data = self.accelerator.reduce(param.data, reduction="mean") diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/logging.py b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/logging.py new file mode 100644 index 0000000000000000000000000000000000000000..9132f8cbf4b5c06d48bec4b941b6cd96c566045d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/logging.py @@ -0,0 +1,133 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import functools +import logging +import os + +from .state import PartialState + + +class MultiProcessAdapter(logging.LoggerAdapter): + """ + An adapter to assist with logging in multiprocess. + + `log` takes in an additional `main_process_only` kwarg, which dictates whether it should be called on all processes + or only the main executed one. Default is `main_process_only=True`. + + Does not require an `Accelerator` object to be created first. + """ + + @staticmethod + def _should_log(main_process_only): + "Check if log should be performed" + state = PartialState() + return not main_process_only or (main_process_only and state.is_main_process) + + def process(self, msg, kwargs): + msg, kwargs = super().process(msg, kwargs) + + # set `stacklevel` to exclude ourself in `Logger.findCaller()` while respecting user's choice + kwargs.setdefault("stacklevel", 2) + + state = PartialState() + msg = f"[RANK {state.process_index}] {msg}" + return msg, kwargs + + def log(self, level, msg, *args, **kwargs): + """ + Delegates logger call after checking if we should log. + + Accepts a new kwarg of `main_process_only`, which will dictate whether it will be logged across all processes + or only the main executed one. Default is `True` if not passed + + Also accepts "in_order", which if `True` makes the processes log one by one, in order. This is much easier to + read, but comes at the cost of sometimes needing to wait for the other processes. Default is `False` to not + break with the previous behavior. + + `main_process_only` is ignored if `in_order` is passed. + """ + if PartialState._shared_state == {}: + raise RuntimeError( + "You must initialize the accelerate state by calling either `PartialState()` or `Accelerator()` before using the logging utility." + ) + main_process_only = kwargs.pop("main_process_only", True) + in_order = kwargs.pop("in_order", False) + + if self.isEnabledFor(level): + msg, kwargs = self.process(msg, kwargs) + if not in_order and self._should_log(main_process_only): + self.logger.log(level, msg, *args, **kwargs) + + elif in_order: + state = PartialState() + for i in range(state.num_processes): + if i == state.process_index: + self.logger.log(level, msg, *args, **kwargs) + state.wait_for_everyone() + + @functools.lru_cache(None) + def warning_once(self, *args, **kwargs): + """ + This method is identical to `logger.warning()`, but will emit the warning with the same message only once + + Note: The cache is for the function arguments, so 2 different callers using the same arguments will hit the + cache. The assumption here is that all warning messages are unique across the code. If they aren't then need to + switch to another type of cache that includes the caller frame information in the hashing function. + """ + self.warning(*args, **kwargs) + + +def get_logger(name: str, log_level: str | None = None): + """ + Returns a `logging.Logger` for `name` that can handle multiprocessing. + + If a log should be called on all processes, pass `main_process_only=False` If a log should be called on all + processes and in order, also pass `in_order=True` + + Args: + name (`str`): + The name for the logger, such as `__file__` + log_level (`str`, *optional*): + The log level to use. If not passed, will default to the `LOG_LEVEL` environment variable, or `INFO` if not + + Example: + + ```python + >>> from accelerate.logging import get_logger + >>> from accelerate import Accelerator + + >>> logger = get_logger(__name__) + + >>> accelerator = Accelerator() + >>> logger.info("My log", main_process_only=False) + >>> logger.debug("My log", main_process_only=True) + + >>> logger = get_logger(__name__, log_level="DEBUG") + >>> logger.info("My log") + >>> logger.debug("My second log") + + >>> array = ["a", "b", "c", "d"] + >>> letter_at_rank = array[accelerator.process_index] + >>> logger.info(letter_at_rank, in_order=True) + ``` + """ + if log_level is None: + log_level = os.environ.get("ACCELERATE_LOG_LEVEL", None) + logger = logging.getLogger(name) + if log_level is not None: + logger.setLevel(log_level.upper()) + logger.root.setLevel(log_level.upper()) + return MultiProcessAdapter(logger, {}) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/memory_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/memory_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..fa2e2c8b9d7d0064c3e5e282737a7ad6919bde29 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/memory_utils.py @@ -0,0 +1,22 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings + + +warnings.warn( + "memory_utils has been reorganized to utils.memory. Import `find_executable_batchsize` from the main `__init__`: " + "`from accelerate import find_executable_batch_size` to avoid this warning.", + FutureWarning, +) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/optimizer.py b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..c1f8faa15436fb5d1d02ae4d691a4fcac44cd236 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/optimizer.py @@ -0,0 +1,213 @@ +# Copyright 2021 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect + +import torch + +from .state import AcceleratorState, GradientState +from .utils import DistributedType, honor_type, is_lomo_available, is_torch_xla_available + + +if is_torch_xla_available(): + import torch_xla.core.xla_model as xm + import torch_xla.runtime as xr + + +def move_to_device(state, device): + if isinstance(state, (list, tuple)): + return honor_type(state, (move_to_device(t, device) for t in state)) + elif isinstance(state, dict): + return type(state)({k: move_to_device(v, device) for k, v in state.items()}) + elif isinstance(state, torch.Tensor): + return state.to(device) + return state + + +class AcceleratedOptimizer(torch.optim.Optimizer): + """ + Internal wrapper around a torch optimizer. + + Conditionally will perform `step` and `zero_grad` if gradients should be synchronized when performing gradient + accumulation. + + Args: + optimizer (`torch.optim.optimizer.Optimizer`): + The optimizer to wrap. + device_placement (`bool`, *optional*, defaults to `True`): + Whether or not the optimizer should handle device placement. If so, it will place the state dictionary of + `optimizer` on the right device. + scaler (`torch.amp.GradScaler` or `torch.cuda.amp.GradScaler`, *optional*): + The scaler to use in the step function if training with mixed precision. + """ + + def __init__(self, optimizer, device_placement=True, scaler=None): + self.optimizer = optimizer + self.scaler = scaler + self.accelerator_state = AcceleratorState() + self.gradient_state = GradientState() + self.device_placement = device_placement + self._is_overflow = False + + if self.scaler is not None: + self._accelerate_step_called = False + self._optimizer_original_step_method = self.optimizer.step + self._optimizer_patched_step_method = patch_optimizer_step(self, self.optimizer.step) + + # Handle device placement + if device_placement: + state_dict = self.optimizer.state_dict() + if self.accelerator_state.distributed_type == DistributedType.XLA: + xm.send_cpu_data_to_device(state_dict, self.accelerator_state.device) + else: + state_dict = move_to_device(state_dict, self.accelerator_state.device) + self.optimizer.load_state_dict(state_dict) + + @property + def state(self): + return self.optimizer.state + + @state.setter + def state(self, state): + self.optimizer.state = state + + @property + def param_groups(self): + return self.optimizer.param_groups + + @param_groups.setter + def param_groups(self, param_groups): + self.optimizer.param_groups = param_groups + + @property + def defaults(self): + return self.optimizer.defaults + + @defaults.setter + def defaults(self, defaults): + self.optimizer.defaults = defaults + + def add_param_group(self, param_group): + self.optimizer.add_param_group(param_group) + + def load_state_dict(self, state_dict): + if self.accelerator_state.distributed_type == DistributedType.XLA and self.device_placement: + xm.send_cpu_data_to_device(state_dict, self.accelerator_state.device) + self.optimizer.load_state_dict(state_dict) + + def state_dict(self): + return self.optimizer.state_dict() + + def zero_grad(self, set_to_none=None): + if self.gradient_state.sync_gradients: + accept_arg = "set_to_none" in inspect.signature(self.optimizer.zero_grad).parameters + if accept_arg: + if set_to_none is None: + set_to_none = True + self.optimizer.zero_grad(set_to_none=set_to_none) + else: + if set_to_none is not None: + raise ValueError("`set_to_none` for Optimizer.zero_grad` is not supported by this optimizer.") + self.optimizer.zero_grad() + + def train(self): + """ + Sets the optimizer to "train" mode. Useful for optimizers like `schedule_free` + """ + if hasattr(self.optimizer, "train") and callable(self.optimizer.train): + self.optimizer.train() + elif ( + hasattr(self.optimizer, "optimizer") + and hasattr(self.optimizer.optimizer, "train") + and callable(self.optimizer.optimizer.train) + ): + # the deepspeed optimizer further wraps the optimizer + self.optimizer.optimizer.train() + + def eval(self): + """ + Sets the optimizer to "eval" mode. Useful for optimizers like `schedule_free` + """ + if hasattr(self.optimizer, "eval") and callable(self.optimizer.eval): + self.optimizer.eval() + + def step(self, closure=None): + if is_lomo_available(): + from lomo_optim import AdaLomo, Lomo + + if ( + not self.gradient_state.is_xla_gradients_synced + and self.accelerator_state.distributed_type == DistributedType.XLA + ): + gradients = xm._fetch_gradients(self.optimizer) + xm.all_reduce("sum", gradients, scale=1.0 / xr.world_size()) + self.gradient_state.is_xla_gradients_synced = True + + if is_lomo_available(): + # `step` should be a no-op for LOMO optimizers. + if isinstance(self.optimizer, (Lomo, AdaLomo)): + return + + if self.gradient_state.sync_gradients: + if self.scaler is not None: + self.optimizer.step = self._optimizer_patched_step_method + + self.scaler.step(self.optimizer, closure) + self.scaler.update() + + if not self._accelerate_step_called: + # If the optimizer step was skipped, gradient overflow was detected. + self._is_overflow = True + else: + self._is_overflow = False + # Reset the step method to the original one + self.optimizer.step = self._optimizer_original_step_method + # Reset the indicator + self._accelerate_step_called = False + else: + self.optimizer.step(closure) + if self.accelerator_state.distributed_type == DistributedType.XLA: + self.gradient_state.is_xla_gradients_synced = False + + def _switch_parameters(self, parameters_map): + for param_group in self.optimizer.param_groups: + param_group["params"] = [parameters_map.get(p, p) for p in param_group["params"]] + + @property + def step_was_skipped(self): + """Whether or not the optimizer step was skipped.""" + return self._is_overflow + + def __getstate__(self): + _ignored_keys = [ + "_accelerate_step_called", + "_optimizer_original_step_method", + "_optimizer_patched_step_method", + ] + return {k: v for k, v in self.__dict__.items() if k not in _ignored_keys} + + def __setstate__(self, state): + self.__dict__.update(state) + if self.scaler is not None: + self._accelerate_step_called = False + self._optimizer_original_step_method = self.optimizer.step + self._optimizer_patched_step_method = patch_optimizer_step(self, self.optimizer.step) + + +def patch_optimizer_step(accelerated_optimizer: AcceleratedOptimizer, method): + def patched_step(*args, **kwargs): + accelerated_optimizer._accelerate_step_called = True + return method(*args, **kwargs) + + return patched_step diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/parallelism_config.py b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/parallelism_config.py new file mode 100644 index 0000000000000000000000000000000000000000..c4135f2f79123b364f6bb54303f75d90da2b1e7c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/parallelism_config.py @@ -0,0 +1,398 @@ +# +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import warnings +from dataclasses import dataclass +from typing import TYPE_CHECKING, Literal, Optional, Union + +from accelerate.utils.dataclasses import ( + DeepSpeedSequenceParallelConfig, + DistributedType, + TorchContextParallelConfig, + TorchTensorParallelConfig, +) +from accelerate.utils.versions import is_torch_version + + +if TYPE_CHECKING: + from accelerate import Accelerator + + +@dataclass +class ParallelismConfig: + """ + A dataclass to configure parallelisms applied to the model. Inspired by torchtitan's `ParallelDims` + https://github.com/pytorch/torchtitan/blob/main/torchtitan/distributed/parallel_dims.py + + Args: + dp_replicate_size (`int`, defaults to `1`): + The size of the data parallel group. If `dp_replicate_size` is set to 1, the data parallel replication + group will not be used. + dp_shard_size (`int`, defaults to `1`): + The size of the model shard group. If `dp_replicate_size > 1` and `tp_size > 1`, `dp_shard_size` must also + be greater than 1, as composing DDP + TP is currently not supported. + tp_size (`int`, defaults to `1`): + The size of the tensor parallel group. If `tp_size` is set to `1`, the tensor parallel group will not be + used. + tp_handler (`~utils.TorchTensorParallelConfig`, defaults to `None`): + The handler for the tensor parallel group. + cp_size (`int`, defaults to `1`): + The size of the context parallel group. Currently not supported, but reserved for future use and enabled + for downstream libraries. + cp_backend (`str`, defaults to `torch`): + Which CP backend to use: `torch` (FSDP2) + sp_size (`int`, defaults to `1`): + The size of the sequence parallel group. + sp_backend (`str`, defaults to `deepspeed`): + Which SP backend to use:`deepspeed` (ALST/Ulysses) + + You may obtain different distributed data parallel paradigms by configuring `dp_replicate_size` and `dp_shard_size` + together: + - `dp_replicate_size == 1` and `dp_shard_size > 1`, we obtain Fully Sharded Data Parallel (FSDP). + - `dp_replicate_size > 1` and `dp_shard_size > 1`, we obtain Hybrid Sharded Data Parallel (HSDP). + - `dp_replicate_size > 1` and `dp_shard_size == 1` is an invalid configuration, to use pure DP, use + `DistributedDataParallelKwargs` instead. + + """ + + dp_replicate_size: Optional[int] = None + dp_shard_size: Optional[int] = None + tp_size: Optional[int] = None + cp_size: Optional[int] = None + cp_backend: Literal["torch"] = None + sp_size: Optional[int] = None + sp_backend: Literal["deepspeed"] = None + + # we use Union because we might support other x parallel plugins (i.e. deepspeed, etc) + tp_handler: Union[None, TorchTensorParallelConfig] = None + cp_handler: Union[None, TorchContextParallelConfig] = None + sp_handler: Union[None, DeepSpeedSequenceParallelConfig] = None + + device_mesh = None + + def __repr__(self): + return ( + "ParallelismConfig(\n " + f"\tdp_replicate_size={self.dp_replicate_size},\n" + f"\tdp_shard_size={self.dp_shard_size},\n" + f"\ttp_size={self.tp_size},\n" + f"\tcp_size={self.cp_size},\n" + f"\tcp_backend={self.cp_backend},\n" + f"\tsp_size={self.sp_size},\n" + f"\tsp_backend={self.sp_backend},\n" + f"\ttotal_size={self.total_size}\n" + f"\ttp_handler={self.tp_handler},\n" + f"\tcp_handler={self.cp_handler})\n" + ) + + def to_json(self): + import copy + + _non_serializable_fields = ["device_mesh"] + + copy.deepcopy( + { + k: copy.deepcopy(v.__dict__) if hasattr(v, "__dict__") else v + for k, v in self.__dict__.items() + if k not in _non_serializable_fields + } + ) + + @property + def dp_dim_names(self): + """Names of enabled dimensions across which data parallelism is applied.""" + dims = [] + if self.dp_replicate_enabled: + dims += ["dp_replicate"] + if self.dp_shard_enabled: + dims += ["dp_shard"] + return dims + + @property + def non_dp_dim_names(self): + """Names of enabled dimensions which will receive the same batch (non-data parallel dimensions).""" + dims = [] + if self.tp_enabled: + dims += ["tp"] + if self.cp_enabled: + dims += ["cp"] + if self.sp_enabled: + dims += ["sp"] + return dims + + @property + def dp_shard_cp_dim_names(self): + """Names of enabled dimensions which will be flattened into a joint mesh across which is model sharded in FSDP.""" + dims = [] + if self.dp_shard_enabled: + dims += ["dp_shard"] + if self.cp_enabled: + dims += ["cp"] + return dims + + @property + def dp_cp_dim_names(self): + """Names of enabled dimensions across which loss should be averaged""" + dims = [] + if self.dp_replicate_enabled: + dims += ["dp_replicate"] + if self.dp_shard_enabled: + dims += ["dp_shard"] + if self.cp_enabled: + dims += ["cp"] + return dims + + @property + def fsdp_dim_names(self): + """Names of enabled dimensions across which FSDP is applied, including data parallel replication.""" + dims = [] + if self.dp_replicate_enabled: + dims += ["dp_replicate"] + dims += ["dp_shard_cp"] + return dims + + @property + def total_size(self): + """The total size of the parallelism configuration, which is the product of all sizes.""" + return self.dp_replicate_size * self.dp_shard_size * self.tp_size * self.cp_size * self.sp_size + + @property + def non_data_parallel_size(self): + """The size of the non-data parallel dimensions, which is the product of tensor and context parallel sizes.""" + return self.tp_size * self.cp_size * self.sp_size + + @property + def data_parallel_size(self): + """The size of the data parallel dimensions, which is the product of data parallel replication and""" + return self.dp_replicate_size * self.dp_shard_size + + @property + def dp_replicate_enabled(self): + """True if data parallel replication is enabled, i.e. `dp_replicate_size > 1`.""" + return self.dp_replicate_size > 1 + + @property + def dp_shard_enabled(self): + """True if data parallel sharding is enabled, i.e. `dp_shard_size > 1`.""" + return self.dp_shard_size > 1 + + @property + def tp_enabled(self): + """True if tensor parallelism is enabled, i.e. `tp_size > 1`.""" + return self.tp_size > 1 + + @property + def cp_enabled(self): + """True if context parallelism is enabled, i.e. `cp_size > 1`.""" + return self.cp_size > 1 + + @property + def sp_enabled(self): + """True if context parallelism is enabled, i.e. `sp_size > 1`.""" + return self.sp_size > 1 + + @property + def active_mesh_dims(self): + """Names of all active mesh dimensions.""" + return self.dp_dim_names + self.non_dp_dim_names + + def build_device_mesh(self, device_type: str): + """Builds a device mesh for the given device type based on the parallelism configuration. + This method will also create required joint meshes (e.g. `dp_shard_cp`, `dp_cp`, `dp`). + + Args: + device_type (`str`): The type of device for which to build the mesh, e + """ + # Skip mesh creation for DeepSpeed SP - DeepSpeed handles its own SP groups + # Only skip when SP is actually enabled (sp_size > 1), otherwise user might still want TP/CP/FSDP + if self.sp_backend == "deepspeed" and self.sp_size > 1: + return None + + if is_torch_version(">=", "2.2.0"): + from torch.distributed.device_mesh import init_device_mesh + else: + raise RuntimeError("Building a device_mesh requires to have torch>=2.2.0") + + mesh = self._get_mesh() + if len(mesh) == 0: + return None + mesh_dim_names, mesh_shape = mesh + device_mesh = init_device_mesh( + device_type, + mesh_shape, + mesh_dim_names=mesh_dim_names, + ) + if self.dp_dim_names: + device_mesh[self.dp_dim_names]._flatten("dp") + if self.dp_shard_cp_dim_names: + device_mesh[self.dp_shard_cp_dim_names]._flatten("dp_shard_cp") + if self.dp_cp_dim_names: + device_mesh[self.dp_cp_dim_names]._flatten("dp_cp") + + return device_mesh + + def get_device_mesh(self, device_type: Optional[str] = None): + if self.device_mesh is None: + if device_type is not None: + self.device_mesh = self.build_device_mesh(device_type) + else: + raise ("You need to pass a device_type e.g cuda to build the device mesh") + else: + if device_type is not None: + if self.device_mesh.device_type != device_type: + raise ValueError( + f"The device_mesh is already created with device type {self.device_mesh.device_type}. However, you are trying to get a device mesh with device_type {device_type}. Please check if you correctly initialized your device_mesh" + ) + return self.device_mesh + + def _get_mesh(self) -> tuple[tuple[int, ...], tuple[str, ...]]: + """Generate mesh shape and dimension names for torch.distributed.init_device_mesh().""" + + # Build mesh dimensions dictionary + mesh_dims = {parallelism: self._sizes[parallelism] for parallelism in self.active_mesh_dims} + + # Apply canonical ordering + mesh_order = ["dp_replicate", "dp_shard", "cp", "sp", "tp"] + sorted_items = sorted( + mesh_dims.items(), + key=lambda x: (mesh_order.index(x[0])), + ) + return tuple(zip(*sorted_items)) + + def __post_init__(self): + # Basic size validation + if self.dp_replicate_size is None: + self.dp_replicate_size = int(os.environ.get("PARALLELISM_CONFIG_DP_REPLICATE_SIZE", "1")) + if self.dp_shard_size is None: + self.dp_shard_size = int(os.environ.get("PARALLELISM_CONFIG_DP_SHARD_SIZE", "1")) + if self.tp_size is None: + self.tp_size = int(os.environ.get("PARALLELISM_CONFIG_TP_SIZE", "1")) + if self.cp_size is None: + self.cp_size = int(os.environ.get("PARALLELISM_CONFIG_CP_SIZE", "1")) + if self.cp_backend is None: + self.cp_backend = os.environ.get("PARALLELISM_CONFIG_CP_BACKEND", "torch") + if self.sp_size is None: + self.sp_size = int(os.environ.get("PARALLELISM_CONFIG_SP_SIZE", "1")) + if self.sp_backend is None: + self.sp_backend = os.environ.get("PARALLELISM_CONFIG_SP_BACKEND", "deepspeed") + + if self.tp_size > 1: + if self.tp_handler is None: + self.tp_handler = TorchTensorParallelConfig() + + if self.cp_size > 1: + if self.cp_handler is None: + self.cp_handler = TorchContextParallelConfig() + else: + cp_backends_config_map = dict( + torch=TorchContextParallelConfig, + ) + if not isinstance(self.cp_handler, cp_backends_config_map[self.cp_backend]): + raise ValueError( + f"ParallelismConfig's cp_backend={self.cp_backend} requires {cp_backends_config_map[self.cp_backend]}, but cp_handler was set to {type(self.cp_handler)}" + ) + + if self.sp_size > 1: + if self.sp_handler is None: + self.sp_handler = DeepSpeedSequenceParallelConfig() + if self.dp_replicate_size < 1: + raise ValueError(f"dp_replicate_size must be at least 1, but got {self.dp_replicate_size}") + if self.dp_shard_size < 1: + raise ValueError(f"dp_shard_size must be at least 1, but got {self.dp_shard_size}") + if self.tp_size < 1: + raise ValueError(f"tp_size must be at least 1, but got {self.tp_size}") + if self.cp_size < 1: + raise ValueError(f"cp_size must be at least 1, but got {self.cp_size}") + valid_cp_backends = ["torch"] + if self.cp_backend not in valid_cp_backends: + raise ValueError(f"cp_backend must be one of {valid_cp_backends}, but got {self.cp_backend}") + + if self.sp_size < 1: + raise ValueError(f"sp_size must be at least 1, but got {self.sp_size}") + valid_sp_backends = ["deepspeed"] + if self.sp_backend not in valid_sp_backends: + raise ValueError(f"sp_backend must be one of {valid_sp_backends}, but got {self.sp_backend}") + + # CP and SP are mutually exclusive + if self.cp_size > 1 and self.sp_size > 1: + raise ValueError( + "Context Parallelism (CP) and Sequence Parallelism (SP) are mutually exclusive. " + f"Got cp_size={self.cp_size} and sp_size={self.sp_size}. " + "Please set either cp_size=1 or sp_size=1." + ) + + if (self.tp_size > 1 or self.cp_size > 1) and self.dp_replicate_size > 1 and self.dp_shard_size == 1: + raise ValueError( + "Tensor/Context parallelism (tp/cp_size > 1) cannot be used with pure data parallelism (dp_replicate_size > 1 and dp_shard_size == 1). " + "Please set dp_shard_size > 1 and dp_replicate_size == 1 to compose FSDP + TP/CP for 2D parallel, " + "or set dp_replicate_size == 1 and dp_shard_size > 1 to compose HSDP + TP/CP for 3D parallel." + ) + self._sizes = { + "dp_replicate": self.dp_replicate_size, + "dp_shard": self.dp_shard_size, + "tp": self.tp_size, + "cp": self.cp_size, + "sp": self.sp_size, + } + + def _set_size(self, parallelism: str, size: int): + assert parallelism in self._sizes.keys(), f"Parallelism must be one of {self._sizes.keys()}" + self._sizes[parallelism] = size + setattr(self, f"{parallelism}_size", size) + + def _validate_accelerator(self, accelerator: "Accelerator"): + _warnings = set() + if not accelerator.multi_device and self.total_size == 1: + # No distributed setup, valid parallelism config + return + + # We need this to ensure DDP works + if self.total_size == 1: + self._set_size("dp_replicate", accelerator.num_processes) + + # For DeepSpeed SP, DeepSpeed handles global process groups internally. + # Skip the total_size == num_processes validation since: + # 1. DeepSpeed manages SP groups globally via initialize_sequence_parallel() + # 2. num_processes is per-node in multi-node, but total_size is local parallelism config + # 3. The actual global parallelism (SP × DP) is handled by DeepSpeed's process groups + if self.sp_backend == "deepspeed" and self.sp_size > 1: + pass + elif self.total_size != accelerator.num_processes: + raise ValueError( + f"ParallelismConfig total_size ({self.total_size}) does not match " + f"num_processes ({accelerator.num_processes}). Please adjust dp_replicate_size/ " + f"dp_shard_size/tp_size/cp_size/sp_size." + ) + + if self.total_size > 1 and not ( + accelerator.is_fsdp2 + or accelerator.multi_device + or accelerator.distributed_type == DistributedType.DEEPSPEED + ): + raise ValueError( + f"ParallelismConfig is only compatible DistributedType.FSDP (version 2) or DistributedType.Multi{{Device}} or DistributedType.DEEPSPEED, but got {accelerator.distributed_type}." + ) + + for parallelism, size in self._sizes.items(): + if size == 1 and getattr(self, f"{parallelism}_handler", None) is not None: + _warnings.add( + f"ParallelismConfig.{parallelism}_handler is set, but {parallelism}_size is set to 1. This handler will be ignored." + ) + + if _warnings and accelerator.is_main_process: + warnings.warn( + "ParallelismConfig has the following warnings:\n" + "\n".join(_warnings), + UserWarning, + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/scheduler.py b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..1fa8a13f238afd7b908ee8e8cb8e0620f48d4ff8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/scheduler.py @@ -0,0 +1,98 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# We ignore warnings about stepping the scheduler since we step it ourselves during gradient accumulation + +import warnings + +from .state import AcceleratorState, GradientState + + +warnings.filterwarnings("ignore", category=UserWarning, module="torch.optim.lr_scheduler") + + +class AcceleratedScheduler: + """ + A wrapper around a learning rate scheduler that will only step when the optimizer(s) have a training step. Useful + to avoid making a scheduler step too fast when gradients went overflow and there was no training step (in mixed + precision training) + + When performing gradient accumulation scheduler lengths should not be changed accordingly, Accelerate will always + step the scheduler to account for it. + + Args: + scheduler (`torch.optim.lr_scheduler._LRScheduler`): + The scheduler to wrap. + optimizers (one or a list of `torch.optim.Optimizer`): + The optimizers used. + step_with_optimizer (`bool`, *optional*, defaults to `True`): + Whether or not the scheduler should be stepped at each optimizer step. + split_batches (`bool`, *optional*, defaults to `False`): + Whether or not the dataloaders split one batch across the different processes (so batch size is the same + regardless of the number of processes) or create batches on each process (so batch size is the original + batch size multiplied by the number of processes). + """ + + def __init__(self, scheduler, optimizers, step_with_optimizer: bool = True, split_batches: bool = False): + self.scheduler = scheduler + self.optimizers = optimizers if isinstance(optimizers, (list, tuple)) else [optimizers] + self.split_batches = split_batches + self.step_with_optimizer = step_with_optimizer + self.gradient_state = GradientState() + + def step(self, *args, **kwargs): + if not self.step_with_optimizer: + # No link between scheduler and optimizer -> just step + self.scheduler.step(*args, **kwargs) + return + + # Otherwise, first make sure the optimizer was stepped. + if not self.gradient_state.sync_gradients: + if self.gradient_state.adjust_scheduler: + self.scheduler._step_count += 1 + return + + for opt in self.optimizers: + if opt.step_was_skipped: + return + if self.split_batches: + # Split batches -> the training dataloader batch size is not changed so one step per training step + self.scheduler.step(*args, **kwargs) + else: + # Otherwise the training dataloader batch size was multiplied by `num_processes`, so we need to do + # num_processes steps per training step + num_processes = AcceleratorState().num_processes + for _ in range(num_processes): + # Special case when using OneCycle and `drop_last` was not used + if hasattr(self.scheduler, "total_steps"): + if self.scheduler._step_count <= self.scheduler.total_steps: + self.scheduler.step(*args, **kwargs) + else: + self.scheduler.step(*args, **kwargs) + + # Passthroughs + def get_last_lr(self): + return self.scheduler.get_last_lr() + + def state_dict(self): + return self.scheduler.state_dict() + + def load_state_dict(self, state_dict): + self.scheduler.load_state_dict(state_dict) + + def get_lr(self): + return self.scheduler.get_lr() + + def print_lr(self, *args, **kwargs): + return self.scheduler.print_lr(*args, **kwargs) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/state.py b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/state.py new file mode 100644 index 0000000000000000000000000000000000000000..6d3e37fdfe12db4978b922c698a3d3813732623c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/state.py @@ -0,0 +1,1374 @@ +# Copyright 2021 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import logging +import os +import threading +import warnings +import weakref +from contextlib import contextmanager +from functools import partial +from typing import Any, Callable + +import torch + +from .utils import ( + DistributedType, + DynamoBackend, + GradientAccumulationPlugin, + check_cuda_fp8_capability, + check_cuda_p2p_ib_support, + deepspeed_required, + get_cpu_distributed_information, + get_int_from_env, + is_datasets_available, + is_deepspeed_available, + is_fp8_available, + is_habana_gaudi1, + is_hpu_available, + is_mlu_available, + is_mps_available, + is_musa_available, + is_neuron_available, + is_npu_available, + is_sdaa_available, + is_torch_xla_available, + is_xccl_available, + is_xpu_available, + parse_choice_from_env, + parse_flag_from_env, + set_numa_affinity, +) +from .utils.dataclasses import SageMakerDistributedType + + +if is_torch_xla_available(): + import torch_xla.core.xla_model as xm + import torch_xla.runtime as xr + +if is_mlu_available(check_device=False): + import torch_mlu # noqa: F401 + +if is_sdaa_available(check_device=False): + import torch_sdaa # noqa: F401 + +if is_musa_available(check_device=False): + import torch_musa # noqa: F401 + +if is_npu_available(check_device=False): + import torch_npu # noqa: F401 + + +logger = logging.getLogger(__name__) + + +def is_initialized() -> bool: + """ + Checks if the `AcceleratorState` has been initialized from `Accelerator`. Same as `AcceleratorState.initialized`, + but works as a module method. + """ + return AcceleratorState._shared_state != {} + + +# Lambda function that does nothing +def do_nothing(*args, **kwargs): + return None + + +class ThreadLocalSharedDict(threading.local): + """ + Descriptor that holds a dict shared between instances of a class in the same thread. + + Note: Descriptors have slightly different semantics than just a dict field on its own. + `PartialState(...)._shared_state` and `PartialState._shared_state` (instance vs class) give the same value: the + underlying _storage dict. Likewise, `PartialState(...)._shared_state = {...}` overrides the _storage dict inside + the descriptor as you would expect. However, `PartialState._shared_state = {}` actually replaces the descriptor + object with a dict instead Thus, you should modify the _storage dict in-place (e.g. `_shared_state.clear()`). + + See Python documentation for an explanation of descriptors: https://docs.python.org/3/howto/descriptor.html + + This is required for using PyTorch/XLA with PJRT in multithreaded mode (required for TPU v2 and v3). + + See https://github.com/pytorch/xla/blob/r2.0/docs/pjrt.md#multithreading-on-tpu-v2v3 + """ + + def __init__(self, thread_local: bool = False): + self._storage = {} + + def __get__(self, obj, objtype=None): + return self._storage + + def __set__(self, obj, value): + self._storage = value + + +# Prefer global shared dictionary, except when using TPU. +SharedDict = dict if not is_torch_xla_available() else ThreadLocalSharedDict + + +# Inspired by Alex Martelli's 'Borg'. +class PartialState: + """ + Singleton class that has information about the current training environment and functions to help with process + control. Designed to be used when only process control and device execution states are needed. Does *not* need to + be initialized from `Accelerator`. + + Args: + cpu (`bool`, *optional*): + Whether or not to force the script to execute on CPU. Will ignore any accelerators available if set to + `True` and force the execution on the CPU. + kwargs (additional keyword arguments, *optional*): + Additional keyword arguments to pass to the relevant `init_process_group` function. Valid `kwargs` can be + found in [`utils.InitProcessGroupKwargs`]. See the example section for detailed usage. + + **Available attributes:** + + - **device** (`torch.device`) -- The device to use. + - **distributed_type** ([`~accelerate.state.DistributedType`]) -- The type of distributed environment currently + in use. + - **local_process_index** (`int`) -- The index of the current process on the current server. + - **mixed_precision** (`str`) -- Whether or not the current script will use mixed precision, and if so the type + of mixed precision being performed. (Choose from 'no','fp16','bf16 or 'fp8'). + - **num_processes** (`int`) -- The number of processes currently launched in parallel. + - **process_index** (`int`) -- The index of the current process. + - **is_last_process** (`bool`) -- Whether or not the current process is the last one. + - **is_main_process** (`bool`) -- Whether or not the current process is the main one. + - **is_local_main_process** (`bool`) -- Whether or not the current process is the main one on the local node. + - **debug** (`bool`) -- Whether or not the current script is being run in debug mode. + + Example: + ```python + from accelerate.utils import InitProcessGroupKwargs + + # To include `InitProcessGroupKwargs`, init then call `.to_kwargs()` + kwargs = InitProcessGroupKwargs(...).to_kwargs() + state = PartialState(**kwargs) + ``` + """ + + _shared_state = SharedDict() + _known_attrs = [ + "_cpu", + "_mixed_precision", + "_shared_state", + "backend", + "debug", + "device", + "distributed_type", + "fork_launched", + "local_process_index", + "num_processes", + "process_index", + ] + + def __init__(self, cpu: bool = False, **kwargs): + self.__dict__ = self._shared_state + if not self.initialized: + self._cpu = cpu + self.backend = None + env_device = os.environ.get("ACCELERATE_TORCH_DEVICE", None) + self.device = torch.device(env_device) if env_device is not None else None + self.debug = parse_flag_from_env("ACCELERATE_DEBUG_MODE") + use_sagemaker_dp = kwargs.pop("_use_sagemaker_dp", None) + dist_information = None + if use_sagemaker_dp is None: + use_sagemaker_dp = ( + os.environ.get("ACCELERATE_USE_SAGEMAKER", "false").lower() == "true" + and os.environ.get("ACCELERATE_SAGEMAKER_DISTRIBUTED_TYPE") != SageMakerDistributedType.NO + ) + + # Sets up self.backend + imports + original_backend = kwargs.pop("backend", None) + backend, distributed_type = self._prepare_backend(cpu, use_sagemaker_dp, original_backend) + if original_backend is not None and backend != original_backend: + raise ValueError(f"Your assigned backend {original_backend} is not available, please use {backend}") + self.backend = backend + self.distributed_type = distributed_type + use_deepspeed = False + if not cpu and self.backend != "xla": + if int(os.environ.get("LOCAL_RANK", -1)) != -1: + # Deal with spawning deepspeed + if os.environ.get("ACCELERATE_USE_DEEPSPEED", "false").lower() == "true": + if not is_deepspeed_available(): + raise ImportError( + "DeepSpeed is not available => install it using `pip3 install deepspeed` or build it from source" + ) + from deepspeed import comm as dist + + if not dist.is_initialized(): + if self.backend == "tccl": + local_rank = os.environ.get("LOCAL_RANK", -1) + torch.sdaa.set_device(f"sdaa:{local_rank}") + dist.init_distributed(dist_backend=self.backend, auto_mpi_discovery=False, **kwargs) + # We need to flag to `use_deepspeed` to be True to override `distributed_type` later + use_deepspeed = True + # Deal with all other backends but CPU, that gets handled special later + elif ( + self.distributed_type is not DistributedType.MULTI_CPU + and not torch.distributed.is_initialized() + ): + if self.backend == "tccl": + local_rank = os.environ.get("LOCAL_RANK", -1) + torch.sdaa.set_device(f"sdaa:{local_rank}") + if ( + self.backend == "nccl" + and os.environ.get("ACCELERATE_USE_FSDP", "false").lower() == "true" + and ( + os.environ.get("FSDP_OFFLOAD_PARAMS", "false").lower() == "true" + or os.environ.get("FSDP_STATE_DICT_TYPE", "SHARDED_STATE_DICT") == "FULL_STATE_DICT" + ) + ): + self.backend = "cuda:nccl,cpu:gloo" + if ( + self.backend == "xccl" + and os.environ.get("ACCELERATE_USE_FSDP", "false").lower() == "true" + and ( + os.environ.get("FSDP_OFFLOAD_PARAMS", "false").lower() == "true" + or os.environ.get("FSDP_STATE_DICT_TYPE", "SHARDED_STATE_DICT") == "FULL_STATE_DICT" + ) + ): + self.backend = "xpu:xccl,cpu:gloo" + torch.distributed.init_process_group(backend=self.backend, **kwargs) + + # CPU require special env configs to be set + if self.distributed_type == DistributedType.MULTI_CPU: + dist_information = get_cpu_distributed_information() + os.environ["RANK"] = str(dist_information.rank) + os.environ["WORLD_SIZE"] = str(dist_information.world_size) + os.environ["LOCAL_RANK"] = str(dist_information.local_rank) + os.environ["LOCAL_WORLD_SIZE"] = str(dist_information.local_world_size) + if not os.environ.get("MASTER_PORT", None): + os.environ["MASTER_PORT"] = "29500" + if ( + not os.environ.get("MASTER_ADDR", None) + and dist_information.local_world_size != dist_information.world_size + and self.backend != "mpi" + ): + raise ValueError( + "Tried to launch on distributed with multinode, but `MASTER_ADDR` env was not set, " + "please try exporting rank 0's hostname as `MASTER_ADDR`" + ) + kwargs["rank"] = dist_information.rank + kwargs["world_size"] = dist_information.world_size + + if ( + self.distributed_type == DistributedType.MULTI_CPU + and get_int_from_env(["OMP_NUM_THREADS"], 0) == 0 + ): + import psutil + + num_cpu_threads_per_process = int( + psutil.cpu_count(logical=False) / dist_information.local_world_size + ) + if num_cpu_threads_per_process == 0: + num_cpu_threads_per_process = 1 + torch.set_num_threads(num_cpu_threads_per_process) + warnings.warn( + f"OMP_NUM_THREADS/MKL_NUM_THREADS unset, we set it at {num_cpu_threads_per_process} to improve oob" + " performance." + ) + + if not torch.distributed.is_initialized(): + torch.distributed.init_process_group(backend=self.backend, **kwargs) + + # No backend == no distributed training + if self.backend is None: + self.distributed_type = DistributedType.NO + self.num_processes = 1 + self.process_index = 0 + self.local_process_index = 0 + elif self.backend == "xla": + # XLA needs device setting first for `set_replication` + self.set_device() + xm.set_replication(self.device, xm.get_xla_supported_devices()) + self.num_processes = xr.world_size() + self.process_index = xr.global_ordinal() + if is_torch_xla_available(check_is_tpu=True): + self.local_process_index = xm.get_local_ordinal() + else: + self.local_process_index = int(os.environ.get("LOCAL_RANK", -1)) + else: + self.num_processes = torch.distributed.get_world_size() + self.process_index = torch.distributed.get_rank() + self.local_process_index = ( + int(os.environ.get("LOCAL_RANK", -1)) if dist_information is None else dist_information.local_rank + ) + self.set_device() + # Now we can change to deepseed + if use_deepspeed: + self.distributed_type = DistributedType.DEEPSPEED + + # Set CPU affinity if enabled + if parse_flag_from_env("ACCELERATE_CPU_AFFINITY", False): + set_numa_affinity(self.local_process_index) + + # Check for old RTX 4000's that can't use P2P or IB and are on old drivers + if self.device.type == "cuda" and not check_cuda_p2p_ib_support(): + if "NCCL_P2P_DISABLE" not in os.environ or "NCCL_IB_DISABLE" not in os.environ: + raise NotImplementedError( + "Using RTX 4000 series doesn't support faster communication broadband via P2P or IB. " + 'Please set `NCCL_P2P_DISABLE="1"` and `NCCL_IB_DISABLE="1" or use `accelerate launch` which ' + "will do this automatically." + ) + + # Important: This should be the *only* code outside of `self.initialized!` + self.fork_launched = parse_flag_from_env("FORK_LAUNCHED", 0) + + def __repr__(self) -> str: + return ( + f"Distributed environment: {self.distributed_type}{(' Backend: ' + self.backend) if self.backend else ''}\n" + f"Num processes: {self.num_processes}\n" + f"Process index: {self.process_index}\n" + f"Local process index: {self.local_process_index}\n" + f"Device: {self.device}\n" + ) + + @staticmethod + def _reset_state(): + "Resets `_shared_state`, is used internally and should not be called" + PartialState._shared_state.clear() + + @property + def initialized(self) -> bool: + "Returns whether the `PartialState` has been initialized" + return self._shared_state != {} + + @property + def use_distributed(self): + """ + Whether the Accelerator is configured for distributed training + """ + return self.distributed_type != DistributedType.NO and self.num_processes > 1 + + @property + def is_last_process(self) -> bool: + "Returns whether the current process is the last one" + return self.process_index == self.num_processes - 1 + + @property + def is_main_process(self) -> bool: + "Returns whether the current process is the main process" + return ( + self.process_index == 0 if self.distributed_type != DistributedType.MEGATRON_LM else self.is_last_process + ) + + @property + def is_local_main_process(self) -> bool: + "Returns whether the current process is the main process on the local node" + return ( + self.local_process_index == 0 + if self.distributed_type != DistributedType.MEGATRON_LM + else self.is_last_process + ) + + def wait_for_everyone(self): + """ + Will stop the execution of the current process until every other process has reached that point (so this does + nothing when the script is only run in one process). Useful to do before saving a model. + + Example: + + ```python + >>> # Assuming two GPU processes + >>> import time + >>> from accelerate.state import PartialState + + >>> state = PartialState() + >>> if state.is_main_process: + ... time.sleep(2) + >>> else: + ... print("I'm waiting for the main process to finish its sleep...") + >>> state.wait_for_everyone() + >>> # Should print on every process at the same time + >>> print("Everyone is here") + ``` + """ + if self.distributed_type in ( + DistributedType.MULTI_GPU, + DistributedType.MULTI_MLU, + DistributedType.MULTI_SDAA, + DistributedType.MULTI_MUSA, + DistributedType.MULTI_NPU, + DistributedType.MULTI_XPU, + DistributedType.MULTI_CPU, + DistributedType.MULTI_HPU, + DistributedType.MULTI_NEURON, + DistributedType.DEEPSPEED, + DistributedType.FSDP, + ): + torch.distributed.barrier(device_ids=[self.local_process_index]) + elif self.distributed_type == DistributedType.XLA: + xm.rendezvous("accelerate.utils.wait_for_everyone") + + def _goes_first(self, is_main: bool): + if not is_main: + self.wait_for_everyone() + + yield + + if is_main: + self.wait_for_everyone() + + @contextmanager + def split_between_processes(self, inputs: list | tuple | dict | torch.Tensor, apply_padding: bool = False): + """ + Splits `input` between `self.num_processes` quickly and can be then used on that process. Useful when doing + distributed inference, such as with different prompts. + + Note that when using a `dict`, all keys need to have the same number of elements. + + Args: + inputs (`list`, `tuple`, `torch.Tensor`, `dict` of `list`/`tuple`/`torch.Tensor`, or `datasets.Dataset`): + The input to split between processes. + apply_padding (`bool`, `optional`, defaults to `False`): + Whether to apply padding by repeating the last element of the input so that all processes have the same + number of elements. Useful when trying to perform actions such as `gather()` on the outputs or passing + in less inputs than there are processes. If so, just remember to drop the padded elements afterwards. + + + Example: + + ```python + # Assume there are two processes + from accelerate import PartialState + + state = PartialState() + with state.split_between_processes(["A", "B", "C"]) as inputs: + print(inputs) + # Process 0 + ["A", "B"] + # Process 1 + ["C"] + + with state.split_between_processes(["A", "B", "C"], apply_padding=True) as inputs: + print(inputs) + # Process 0 + ["A", "B"] + # Process 1 + ["C", "C"] + ``` + """ + if self.num_processes == 1: + yield inputs + return + length = len(inputs) + # Nested dictionary of any types + if isinstance(inputs, dict): + length = len(inputs[list(inputs.keys())[0]]) + if not all(len(v) == length for v in inputs.values()): + raise ValueError("All values in the dictionary must have the same length") + num_samples_per_process, num_extras = divmod(length, self.num_processes) + start_index = self.process_index * num_samples_per_process + min(self.process_index, num_extras) + end_index = start_index + num_samples_per_process + (1 if self.process_index < num_extras else 0) + + def _split_values(inputs, start_index, end_index): + if isinstance(inputs, (list, tuple, torch.Tensor)): + if start_index >= len(inputs): + result = inputs[-1:] + else: + result = inputs[start_index:end_index] + if apply_padding: + if isinstance(result, torch.Tensor): + from accelerate.utils import pad_across_processes, send_to_device + + # The tensor needs to be on the device before we can pad it + tensorized_result = send_to_device(result, self.device) + result = pad_across_processes(tensorized_result, pad_index=inputs[-1]) + else: + result += [result[-1]] * (num_samples_per_process + (1 if num_extras > 0 else 0) - len(result)) + return result + elif isinstance(inputs, dict): + for key in inputs.keys(): + inputs[key] = _split_values(inputs[key], start_index, end_index) + return inputs + else: + if is_datasets_available(): + from datasets import Dataset + + if isinstance(inputs, Dataset): + if start_index >= len(inputs): + start_index = len(inputs) - 1 + if end_index > len(inputs): + end_index = len(inputs) + result_idcs = list(range(start_index, end_index)) + if apply_padding: + result_idcs += [end_index - 1] * ( + num_samples_per_process + (1 if num_extras > 0 else 0) - len(result_idcs) + ) + return inputs.select(result_idcs) + return inputs + + yield _split_values(inputs, start_index, end_index) + + @contextmanager + def main_process_first(self): + """ + Lets the main process go first inside a with block. + + The other processes will enter the with block after the main process exits. + + Example: + + ```python + >>> from accelerate import Accelerator + + >>> accelerator = Accelerator() + >>> with accelerator.main_process_first(): + ... # This will be printed first by process 0 then in a seemingly + ... # random order by the other processes. + ... print(f"This will be printed by process {accelerator.process_index}") + ``` + """ + yield from self._goes_first(self.is_main_process) + + @contextmanager + def local_main_process_first(self): + """ + Lets the local main process go inside a with block. + + The other processes will enter the with block after the main process exits. + + Example: + + ```python + >>> from accelerate.state import PartialState + + >>> state = PartialState() + >>> with state.local_main_process_first(): + ... # This will be printed first by local process 0 then in a seemingly + ... # random order by the other processes. + ... print(f"This will be printed by process {state.local_process_index}") + ``` + """ + yield from self._goes_first(self.is_local_main_process) + + def on_main_process(self, function: Callable[..., Any] | None = None): + """ + Decorator that only runs the decorated function on the main process. + + Args: + function (`Callable`): The function to decorate. + + Example: + + ```python + >>> from accelerate.state import PartialState + + >>> state = PartialState() + + + >>> @state.on_main_process + ... def print_something(): + ... print("This will be printed by process 0 only.") + + + >>> print_something() + "This will be printed by process 0 only" + ``` + """ + if not self.initialized: + raise ValueError("The `PartialState` or `Accelerator` must be initialized before calling this function.") + if self.is_main_process or not self.use_distributed: + return function + return do_nothing + + def on_local_main_process(self, function: Callable[..., Any] | None = None): + """ + Decorator that only runs the decorated function on the local main process. + + Args: + function (`Callable`): The function to decorate. + + Example: + ```python + # Assume we have 2 servers with 4 processes each. + from accelerate.state import PartialState + + state = PartialState() + + + @state.on_local_main_process + def print_something(): + print("This will be printed by process 0 only on each server.") + + + print_something() + # On server 1: + "This will be printed by process 0 only" + # On server 2: + "This will be printed by process 0 only" + ``` + """ + if self.is_local_main_process or not self.use_distributed: + return function + return do_nothing + + def on_last_process(self, function: Callable[..., Any]): + """ + Decorator that only runs the decorated function on the last process. + + Args: + function (`Callable`): The function to decorate. + + Example: + ```python + # Assume we have 4 processes. + from accelerate.state import PartialState + + state = PartialState() + + + @state.on_last_process + def print_something(): + print(f"Printed on process {state.process_index}") + + + print_something() + "Printed on process 3" + ``` + """ + if self.is_last_process or not self.use_distributed: + return function + return do_nothing + + def on_process(self, function: Callable[..., Any] | None = None, process_index: int | None = None): + """ + Decorator that only runs the decorated function on the process with the given index. + + Args: + function (`Callable`, `optional`): + The function to decorate. + process_index (`int`, `optional`): + The index of the process on which to run the function. + + Example: + ```python + # Assume we have 4 processes. + from accelerate.state import PartialState + + state = PartialState() + + + @state.on_process(process_index=2) + def print_something(): + print(f"Printed on process {state.process_index}") + + + print_something() + "Printed on process 2" + ``` + """ + if function is None: + return partial(self.on_process, process_index=process_index) + if (self.process_index == process_index) or (not self.use_distributed): + return function + return do_nothing + + def on_local_process(self, function: Callable[..., Any] | None = None, local_process_index: int | None = None): + """ + Decorator that only runs the decorated function on the process with the given index on the current node. + + Args: + function (`Callable`, *optional*): + The function to decorate. + local_process_index (`int`, *optional*): + The index of the local process on which to run the function. + + Example: + ```python + # Assume we have 2 servers with 4 processes each. + from accelerate import Accelerator + + accelerator = Accelerator() + + + @accelerator.on_local_process(local_process_index=2) + def print_something(): + print(f"Printed on process {accelerator.local_process_index}") + + + print_something() + # On server 1: + "Printed on process 2" + # On server 2: + "Printed on process 2" + ``` + """ + if function is None: + return partial(self.on_local_process, local_process_index=local_process_index) + if (self.local_process_index == local_process_index) or (not self.use_distributed): + return function + return do_nothing + + def print(self, *args, **kwargs): + if self.is_local_main_process: + print(*args, **kwargs) + + @property + def default_device(self) -> torch.device: + """ + Returns the default device which is: + - MPS if `torch.backends.mps.is_available()` and `torch.backends.mps.is_built()` both return True. + - CUDA if `torch.cuda.is_available()` + - MLU if `is_mlu_available()` + - SDAA if `is_sdaa_available()` + - MUSA if `is_musa_available()` + - NPU if `is_npu_available()` + - HPU if `is_hpu_available()` + - NEURON if `is_neuron_available()` + - CPU otherwise + """ + if is_mps_available(): + os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" + return torch.device("mps") + elif is_mlu_available(): + return torch.device("mlu") + elif is_sdaa_available(): + return torch.device("sdaa") + elif is_musa_available(): + return torch.device("musa") + # NPU should be checked before CUDA when using `transfer_to_npu` + # See issue #3020: https://github.com/huggingface/accelerate/issues/3020 + elif is_npu_available(): + return torch.device("npu") + elif is_hpu_available(): + return torch.device("hpu") + elif torch.cuda.is_available(): + return torch.device("cuda") + elif is_xpu_available(): + return torch.device("xpu") + elif is_neuron_available(): + return torch.device("neuron") + else: + return torch.device("cpu") + + def _prepare_backend( + self, cpu: bool = False, sagemaker_dp=False, backend: str | None = None + ) -> tuple[str, DistributedType]: + "Prepares any imports needed before initializing the distributed backend and sets `self.backend` properly" + distributed_type = None + if sagemaker_dp: + import smdistributed.dataparallel.torch.torch_smddp # noqa + + backend = "smddp" + distributed_type = DistributedType.MULTI_GPU + elif is_torch_xla_available(): + backend = "xla" + distributed_type = DistributedType.XLA + + elif int(os.environ.get("LOCAL_RANK", -1)) != -1 and not cpu: + if is_mlu_available(): + backend = "cncl" + distributed_type = DistributedType.MULTI_MLU + if is_sdaa_available(): + backend = "tccl" + distributed_type = DistributedType.MULTI_SDAA + elif is_musa_available(): + backend = "mccl" + distributed_type = DistributedType.MULTI_MUSA + # NPU should be checked before CUDA when using `transfer_to_npu` + # See issue #3020: https://github.com/huggingface/accelerate/issues/3020 + elif is_npu_available(): + backend = "hccl" + distributed_type = DistributedType.MULTI_NPU + elif is_hpu_available(init_hccl=True): + if backend is None: + backend = "hccl" + distributed_type = DistributedType.MULTI_HPU + elif torch.cuda.is_available(): + if backend is None: + backend = "nccl" + distributed_type = DistributedType.MULTI_GPU + elif is_xpu_available() and is_xccl_available(): + if backend is None: + backend = "xccl" + distributed_type = DistributedType.MULTI_XPU + elif is_neuron_available(): + backend = "neuron" + distributed_type = DistributedType.MULTI_NEURON + + if ( + distributed_type is None + and cpu + and ( + int(os.environ.get("LOCAL_RANK", -1)) != -1 + or get_int_from_env(["PMI_SIZE", "OMPI_COMM_WORLD_SIZE", "MV2_COMM_WORLD_SIZE", "WORLD_SIZE"], 1) > 1 + ) + ): + distributed_type = DistributedType.MULTI_CPU + + if backend in (None, "mpi") and torch.distributed.is_mpi_available(): + backend = "mpi" + else: + backend = "gloo" + if distributed_type is None: + distributed_type = DistributedType.NO + + return backend, distributed_type + + def set_device(self): + """ + Sets the device in `self.device` to the current distributed environment. + """ + if self.device is not None: + return + if self.distributed_type == DistributedType.NO: + self.device = torch.device("cpu") if self._cpu else self.default_device + return + device = str(self.distributed_type).split(".")[-1].replace("MULTI_", "").lower() + if device not in ("cpu", "gpu", "mlu", "musa", "npu", "xpu", "xla", "hpu", "sdaa", "neuron"): + raise ValueError( + f"Can't set device for {self.distributed_type} ({device}), verify we should be calling `_set_device()` for it!" + ) + if device == "xla": + self.device = xm.xla_device() + elif device == "hpu": + self.device = torch.device("hpu", torch.hpu.current_device()) + else: + if device == "gpu": + device = "cuda" + device_module = getattr(torch, device) + device_index = self.local_process_index % device_module.device_count() + self.device = torch.device(device, device_index) + device_module.set_device(self.device) + + def destroy_process_group(self, group=None): + """ + Destroys the process group. If one is not specified, the default process group is destroyed. + """ + if self.fork_launched and group is None: + return + # needed when using torch.distributed.init_process_group + if torch.distributed.is_initialized(): + torch.distributed.destroy_process_group(group) + + def __getattr__(self, name: str): + # By this point we know that no attributes of `self` contain `name`, + # so we just modify the error message + if name in self._known_attrs: + raise AttributeError( + f"`PartialState` object has no attribute `{name}`. " + "This happens if `PartialState._reset_state()` was called and " + "an `Accelerator` or `PartialState` was not reinitialized." + ) + # Raise a typical AttributeError + raise AttributeError(f"'PartialState' object has no attribute '{name}'") + + +class AcceleratorState: + """ + Singleton class that has information about the current training environment. + + **Available attributes:** + + - **device** (`torch.device`) -- The device to use. + - **distributed_type** ([`~accelerate.state.DistributedType`]) -- The type of distributed environment currently + in use. + - **parallelism_config** ([`~accelerate.utils.ParallelismConfig`]) -- The parallelism configuration for the + current training environment. This is used to configure the distributed training environment. + - **initialized** (`bool`) -- Whether or not the `AcceleratorState` has been initialized from `Accelerator`. + - **local_process_index** (`int`) -- The index of the current process on the current server. + - **mixed_precision** (`str`) -- Whether or not the current script will use mixed precision, and if so the type + of mixed precision being performed. (Choose from 'no','fp16','bf16 or 'fp8'). + - **num_processes** (`int`) -- The number of processes currently launched in parallel. + - **process_index** (`int`) -- The index of the current process. + - **is_last_process** (`bool`) -- Whether or not the current process is the last one. + - **is_main_process** (`bool`) -- Whether or not the current process is the main one. + - **is_local_main_process** (`bool`) -- Whether or not the current process is the main one on the local node. + - **debug** (`bool`) -- Whether or not the current script is being run in debug mode. + """ + + _shared_state = SharedDict() + _known_attrs = PartialState._known_attrs + [ + "deepspeed_plugin", + "fsdp_plugin", + "megatron_lm_plugin", + "dynamo_plugin", + ] + + def __init__( + self, + mixed_precision: str | None = None, + cpu: bool = False, + dynamo_plugin=None, + deepspeed_plugin=None, + fsdp_plugin=None, + torch_tp_plugin=None, + megatron_lm_plugin=None, + parallelism_config=None, + _from_accelerator: bool = False, + **kwargs, + ): + self.__dict__ = self._shared_state + if parse_flag_from_env("ACCELERATE_USE_CPU"): + cpu = True + if PartialState._shared_state == {}: + PartialState(cpu, **kwargs) + self.__dict__.update(PartialState._shared_state) + self._check_initialized(mixed_precision, cpu) + if not self.initialized: + self.deepspeed_plugins = None + self.torch_tp_plugin = torch_tp_plugin + self.parallelism_config = parallelism_config + self.device_mesh = None + mixed_precision = ( + parse_choice_from_env("ACCELERATE_MIXED_PRECISION", "no") + if mixed_precision is None + else mixed_precision.lower() + ) + if mixed_precision == "fp8": + # this is confusing, why is is_fp8_available only checks for library availability ? + if not is_fp8_available(): + raise ValueError( + "Using `fp8` precision requires `transformer_engine` or `MS-AMP` to be installed." + ) + elif torch.cuda.is_available() and not check_cuda_fp8_capability(): + logger.warning( + f"The current device has compute capability of {torch.cuda.get_device_capability()} which is " + "insufficient for FP8 mixed precision training (requires a GPU Hopper/Ada Lovelace " + "or higher, compute capability of 8.9 or higher). Will use FP16 instead." + ) + mixed_precision = "fp16" + elif is_habana_gaudi1(): + logger.warning( + "The current HPU device is Gaudi1 which does not support FP8 mixed precision training (requires " + "Gaudi2 or higher). Will use BF16 instead." + ) + mixed_precision = "bf16" + + self.dynamo_plugin = dynamo_plugin + if not _from_accelerator: + raise ValueError( + "Please make sure to properly initialize your accelerator via `accelerator = Accelerator()` " + "before using any functionality from the `accelerate` library." + ) + # deepspeed handles mixed_precision using deepspeed_config. But we need to set it to fp8 + # if we're using fp8. + if self.distributed_type == DistributedType.DEEPSPEED and mixed_precision != "fp8": + self._mixed_precision = "no" + else: + self._mixed_precision = mixed_precision + + if self.distributed_type == DistributedType.XLA and is_torch_xla_available(check_is_tpu=True): + if mixed_precision == "bf16": + if os.environ.get("ACCELERATE_DOWNCAST_BF16"): + os.environ["XLA_USE_BF16"] = str(0) + os.environ["XLA_DOWNCAST_BF16"] = str(1) + self.downcast_bfloat = True + else: + os.environ["XLA_USE_BF16"] = str(1) + os.environ["XLA_DOWNCAST_BF16"] = str(0) + self.downcast_bfloat = False + elif os.environ.get("ACCELERATE_USE_DEEPSPEED", "false").lower() == "true" and not cpu: + self.distributed_type = DistributedType.DEEPSPEED + if not isinstance(deepspeed_plugin, dict): + deepspeed_plugin.set_mixed_precision(mixed_precision) + deepspeed_plugin.select(_from_accelerator_state=True) + else: + for plugin in deepspeed_plugin.values(): + plugin.set_mixed_precision(mixed_precision) + # The first plugin passed in is always the active one + first_plugin = next(iter(deepspeed_plugin.values())) + first_plugin.select(_from_accelerator_state=True) + self.deepspeed_plugins = deepspeed_plugin + elif self.distributed_type in [ + DistributedType.MULTI_GPU, + DistributedType.MULTI_MLU, + DistributedType.MULTI_SDAA, + DistributedType.MULTI_MUSA, + DistributedType.MULTI_NPU, + DistributedType.MULTI_XPU, + DistributedType.MULTI_HPU, + DistributedType.MULTI_NEURON, + ]: + # TODO: Siro - remove when axolotl fixes their side + if not os.environ.get("ACCELERATE_ALLOW_CP_STANDALONE", "false").lower() == "true": + if self.parallelism_config and self.parallelism_config.cp_enabled and fsdp_plugin is None: + raise ValueError( + "`cp_size > 1` specified in the `parallelism_config`, but no `fsdp_plugin` was provided. We need a `fsdp_plugin` to use context parallelism with `cp_backend=torch`, as we also shard the model across the device mesh to save more memory" + ) + if ( + self.parallelism_config is not None + and self.parallelism_config.cp_enabled + and fsdp_plugin.fsdp_version == 1 + ): + raise ValueError( + "Using `cp_size>1` requires FSDP2, but the provided `fsdp_plugin` is using FSDP1. " + ) + if (os.environ.get("ACCELERATE_USE_FSDP", "false").lower() == "true" or fsdp_plugin is not None) or ( + self.parallelism_config is not None and self.parallelism_config.cp_enabled + ): + self.distributed_type = DistributedType.FSDP + if self._mixed_precision != "no" and fsdp_plugin is not None: + fsdp_plugin.set_mixed_precision(self._mixed_precision) + self.fsdp_plugin = fsdp_plugin + if os.environ.get( + "ACCELERATE_USE_MEGATRON_LM", "false" + ).lower() == "true" and self.distributed_type not in [ + DistributedType.MULTI_XPU, + ]: + self.distributed_type = DistributedType.MEGATRON_LM + megatron_lm_plugin.set_mixed_precision(self._mixed_precision) + self.megatron_lm_plugin = megatron_lm_plugin + if ( + self.dynamo_plugin.backend != DynamoBackend.NO + and self._mixed_precision == "no" + and self.device.type == "cuda" + ): + torch.backends.cuda.matmul.allow_tf32 = True + if ( + self.dynamo_plugin.backend != DynamoBackend.NO + and self._mixed_precision == "no" + and self.device.type == "musa" + ): + torch.backends.musa.matmul.allow_tf32 = True + PartialState._shared_state["distributed_type"] = self.distributed_type + + @property + def initialized(self) -> bool: + return self._shared_state != PartialState._shared_state + + def __repr__(self): + repr = PartialState().__repr__() + f"\nMixed precision type: {self.mixed_precision}\n" + if self.distributed_type == DistributedType.DEEPSPEED: + repr += f"ds_config: {self.deepspeed_plugin.deepspeed_config}\n" + return repr + + def _check_initialized(self, mixed_precision=None, cpu=None): + "Checks if a modification is trying to be made and the `AcceleratorState` has already been initialized" + if self.initialized: + err = "AcceleratorState has already been initialized and cannot be changed, restart your runtime completely and pass `{flag}` to `Accelerator()`." + if cpu and self.device.type != "cpu": + raise ValueError(err.format(flag="cpu=True")) + if ( + mixed_precision is not None + and mixed_precision != self._mixed_precision + and self.distributed_type != DistributedType.DEEPSPEED + ): + raise ValueError(err.format(flag=f"mixed_precision='{mixed_precision}'")) + + @property + def mixed_precision(self): + if self.distributed_type == DistributedType.DEEPSPEED and self._mixed_precision != "fp8": + config = self.deepspeed_plugin.deepspeed_config + if config.get("fp16", {}).get("enabled", False): + mixed_precision = "fp16" + elif config.get("bf16", {}).get("enabled", False): + mixed_precision = "bf16" + else: + mixed_precision = "no" + else: + mixed_precision = self._mixed_precision + return mixed_precision + + @staticmethod + def _reset_state(reset_partial_state: bool = False): + "Resets `_shared_state`, is used internally and should not be called" + AcceleratorState._shared_state.clear() + if reset_partial_state: + PartialState._reset_state() + + def destroy_process_group(self, group=None): + """ + Destroys the process group. If one is not specified, the default process group is destroyed. + + If `self.fork_launched` is `True` and `group` is `None`, nothing happens. + """ + PartialState().destroy_process_group(group) + + @property + def fork_launched(self): + return PartialState().fork_launched + + @property + def use_distributed(self): + """ + Whether the Accelerator is configured for distributed training + """ + return PartialState().use_distributed + + @property + def is_fsdp2(self) -> bool: + return self.distributed_type == DistributedType.FSDP and self.fsdp_plugin.fsdp_version == 2 + + @property + def is_last_process(self) -> bool: + "Returns whether the current process is the last one" + return PartialState().is_last_process + + @property + def is_main_process(self) -> bool: + "Returns whether the current process is the main process" + return PartialState().is_main_process + + @property + def is_local_main_process(self) -> bool: + "Returns whether the current process is the main process on the local node" + return PartialState().is_local_main_process + + def wait_for_everyone(self): + PartialState().wait_for_everyone() + + @contextmanager + def split_between_processes(self, inputs: list | tuple | dict | torch.Tensor, apply_padding: bool = False): + """ + Splits `input` between `self.num_processes` quickly and can be then used on that process. Useful when doing + distributed inference, such as with different prompts. + + Note that when using a `dict`, all keys need to have the same number of elements. + + Args: + inputs (`list`, `tuple`, `torch.Tensor`, or `dict` of `list`/`tuple`/`torch.Tensor`): + The input to split between processes. + apply_padding (`bool`, `optional`, defaults to `False`): + Whether to apply padding by repeating the last element of the input so that all processes have the same + number of elements. Useful when trying to perform actions such as `gather()` on the outputs or passing + in less inputs than there are processes. If so, just remember to drop the padded elements afterwards. + + + Example: + + ```python + # Assume there are two processes + from accelerate.state import AcceleratorState + + state = AcceleratorState() + with state.split_between_processes(["A", "B", "C"]) as inputs: + print(inputs) + # Process 0 + ["A", "B"] + # Process 1 + ["C"] + + with state.split_between_processes(["A", "B", "C"], apply_padding=True) as inputs: + print(inputs) + # Process 0 + ["A", "B"] + # Process 1 + ["C", "C"] + ``` + """ + with PartialState().split_between_processes(inputs, apply_padding=apply_padding) as inputs: + yield inputs + + @contextmanager + def main_process_first(self): + """ + Lets the main process go first inside a with block. + + The other processes will enter the with block after the main process exits. + """ + with PartialState().main_process_first(): + yield + + @contextmanager + def local_main_process_first(self): + """ + Lets the local main process go inside a with block. + + The other processes will enter the with block after the main process exits. + """ + with PartialState().local_main_process_first(): + yield + + @property + def deepspeed_plugin(self): + """ + Returns the currently active DeepSpeedPlugin. + + If not using deepspeed, returns `None`. + """ + # To maintain original behavior, return None if not using deepspeed. + if self.distributed_type != DistributedType.DEEPSPEED: + return None + from accelerate.utils.deepspeed import get_active_deepspeed_plugin + + return get_active_deepspeed_plugin(self) + + @deepspeed_required + def get_deepspeed_plugin(self, name: str): + """ + Returns the DeepSpeedPlugin with the given plugin_key. + """ + return self.deepspeed_plugins[name] + + @deepspeed_required + def select_deepspeed_plugin(self, name: str | None = None): + """ + Activates the DeepSpeedPlugin with the given `name`, and will disable all other plugins. + """ + for key, plugin in self.deepspeed_plugins.items(): + if key != name: + plugin._unselect() + self.deepspeed_plugins[name].select(_from_accelerator_state=True) + + def print(self, *args, **kwargs): + PartialState().print(*args, **kwargs) + + def __getattr__(self, name: str): + # By this point we know that no attributes of `self` contain `name`, + # so we just modify the error message + if name in self._known_attrs: + raise AttributeError( + f"`AcceleratorState` object has no attribute `{name}`. " + "This happens if `AcceleratorState._reset_state()` was called and " + "an `Accelerator` or `PartialState` was not reinitialized." + ) + # Raise a typical AttributeError + raise AttributeError(f"'AcceleratorState' object has no attribute '{name}'") + + +class GradientState: + """ + Singleton class that has information related to gradient synchronization for gradient accumulation + + **Available attributes:** + + - **end_of_dataloader** (`bool`) -- Whether we have reached the end the current dataloader + - **remainder** (`int`) -- The number of extra samples that were added from padding the dataloader + - **sync_gradients** (`bool`) -- Whether the gradients should be synced across all devices + - **active_dataloader** (`Optional[DataLoader]`) -- The dataloader that is currently being iterated over + - **dataloader_references** (`List[Optional[DataLoader]]`) -- A list of references to the dataloaders that are + being iterated over + - **num_steps** (`int`) -- The number of steps to accumulate over + - **adjust_scheduler** (`bool`) -- Whether the scheduler should be adjusted to account for the gradient + accumulation + - **sync_with_dataloader** (`bool`) -- Whether the gradients should be synced at the end of the dataloader + iteration and the number of total steps reset + - **is_xla_gradients_synced** (`bool`) -- Whether the XLA gradients have been synchronized. It is initialized + as false. Once gradients have been reduced before the optimizer step, this flag is set to true. Subsequently, + after each step, the flag is reset to false. FSDP will always synchronize the gradients, hence + is_xla_gradients_synced is always true. + """ + + _shared_state = SharedDict() + + def __init__(self, gradient_accumulation_plugin: GradientAccumulationPlugin | None = None): + self.__dict__ = self._shared_state + if not self.initialized: + self.sync_gradients = True + self._dataloader_references_ref = [None] + self.plugin_kwargs = ( + gradient_accumulation_plugin.to_kwargs() if gradient_accumulation_plugin is not None else {} + ) + self._is_xla_gradients_synced = False + + # Plugin args are different and can be updated + if gradient_accumulation_plugin is not None and self.plugin_kwargs != gradient_accumulation_plugin.to_kwargs(): + self.plugin_kwargs = gradient_accumulation_plugin.to_kwargs() + + @property + def num_steps(self) -> int: + "Returns the number of steps to accumulate over" + return self.plugin_kwargs.get("num_steps", 1) + + @property + def adjust_scheduler(self) -> bool: + "Returns whether the scheduler should be adjusted" + return self.plugin_kwargs.get("adjust_scheduler", False) + + @property + def sync_with_dataloader(self) -> bool: + "Returns whether the gradients should be synced at the end of the dataloader iteration and the number of total steps reset" + return self.plugin_kwargs.get("sync_with_dataloader", True) + + @property + def initialized(self) -> bool: + "Returns whether the `GradientState` has been initialized" + return GradientState._shared_state != {} + + @property + def end_of_dataloader(self) -> bool: + "Returns whether we have reached the end of the current dataloader" + if not self.in_dataloader: + return False + return self.active_dataloader.end_of_dataloader + + @property + def remainder(self) -> int: + "Returns the number of extra samples that were added from padding the dataloader" + if not self.in_dataloader: + return -1 + return self.active_dataloader.remainder + + def __repr__(self): + return ( + f"Sync Gradients: {self.sync_gradients}\n" + f"At end of current dataloader: {self.end_of_dataloader}\n" + f"Extra samples added: {self.remainder}\n" + f"Gradient accumulation plugin: {self.plugin_kwargs}\n" + ) + + @property + def is_xla_gradients_synced(self): + "Returns the value of is_xla_gradients_synced. FSDP will always synchronize the gradients, hence is_xla_gradients_synced is always true." + if parse_flag_from_env("ACCELERATE_USE_FSDP", default=False): + return True + return self._is_xla_gradients_synced + + @is_xla_gradients_synced.setter + def is_xla_gradients_synced(self, is_synced): + "Set the _is_xla_gradients_synced attribute." + self._is_xla_gradients_synced = is_synced + + def _set_sync_gradients(self, sync_gradients): + "Private function that sets whether gradients should be synchronized. Users should not have to call this." + self.sync_gradients = sync_gradients + # Allow grad-sync to automatically work on TPUs + if ( + self.sync_gradients + and is_torch_xla_available(check_is_tpu=True) + and PartialState().distributed_type == DistributedType.XLA + ): + xm.mark_step() + + def _add_dataloader(self, dataloader): + "Private function that adds a dataloader to `self.dataloader_references` and sets `in_dataloader` to `True`. Users should not have to call this." + # We explicitly use assignment to ensure that the property setter is triggered, which is required for garbage collection. + # Avoid using self.dataloader_references.append as it will not trigger the setter. + self.dataloader_references += [dataloader] + + def _remove_dataloader(self, dataloader): + "Private function that removes a dataloader from `self.dataloader_references` and sets `in_dataloader` to `False` if there are no more dataloaders. Users should not have to call this." + # We explicitly use assignment to ensure that the property setter is triggered. + self.dataloader_references = [ + dataloader_ref for dataloader_ref in self.dataloader_references if dataloader_ref != dataloader + ] + + @property + def active_dataloader(self): + return self.dataloader_references[-1] + + @property + def dataloader_references(self): + # We use a property getter and setter with weakrefs to avoid circular references that prevent garbage collection + return [reference() if reference is not None else reference for reference in self._dataloader_references_ref] + + @dataloader_references.setter + def dataloader_references(self, references): + self._dataloader_references_ref = [ + weakref.ref(dataloader) if dataloader is not None else dataloader for dataloader in references + ] + + @property + def in_dataloader(self) -> bool: + "Returns whether the current process is in a dataloader" + return self.active_dataloader is not None + + @staticmethod + def _reset_state(): + "Resets `_shared_state`, is used internally and should not be called" + GradientState._shared_state.clear() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/tracking.py b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/tracking.py new file mode 100644 index 0000000000000000000000000000000000000000..52d8fa3405dd8c248e6db7edda59df3db21c2ccb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/accelerate/tracking.py @@ -0,0 +1,1317 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Expectation: +# Provide a project dir name, then each type of logger gets stored in project/{`logging_dir`} + +import json +import os +import time +from functools import wraps +from typing import Any, Optional, Union + +import yaml +from packaging import version + +from .logging import get_logger +from .state import PartialState +from .utils import ( + LoggerType, + compare_versions, + is_aim_available, + is_clearml_available, + is_comet_ml_available, + is_dvclive_available, + is_mlflow_available, + is_swanlab_available, + is_tensorboard_available, + is_trackio_available, + is_wandb_available, + listify, +) + + +_available_trackers = [] + +if is_tensorboard_available(): + _available_trackers.append(LoggerType.TENSORBOARD) + +if is_wandb_available(): + _available_trackers.append(LoggerType.WANDB) + +if is_comet_ml_available(): + _available_trackers.append(LoggerType.COMETML) + +if is_aim_available(): + _available_trackers.append(LoggerType.AIM) + +if is_mlflow_available(): + _available_trackers.append(LoggerType.MLFLOW) + +if is_clearml_available(): + _available_trackers.append(LoggerType.CLEARML) + +if is_dvclive_available(): + _available_trackers.append(LoggerType.DVCLIVE) + +if is_swanlab_available(): + _available_trackers.append(LoggerType.SWANLAB) + +if is_trackio_available(): + _available_trackers.append(LoggerType.TRACKIO) + +logger = get_logger(__name__) + + +def on_main_process(function): + """ + Decorator to selectively run the decorated function on the main process only based on the `main_process_only` + attribute in a class. + + Checks at function execution rather than initialization time, not triggering the initialization of the + `PartialState`. + """ + + @wraps(function) + def execute_on_main_process(self, *args, **kwargs): + if getattr(self, "main_process_only", False): + return PartialState().on_main_process(function)(self, *args, **kwargs) + else: + return function(self, *args, **kwargs) + + return execute_on_main_process + + +def get_available_trackers(): + "Returns a list of all supported available trackers in the system" + return _available_trackers + + +class GeneralTracker: + """ + A base Tracker class to be used for all logging integration implementations. + + Each function should take in `**kwargs` that will automatically be passed in from a base dictionary provided to + [`Accelerator`]. + + Should implement `name`, `requires_logging_directory`, and `tracker` properties such that: + + `name` (`str`): String representation of the tracker class name, such as "TensorBoard" `requires_logging_directory` + (`bool`): Whether the logger requires a directory to store their logs. `tracker` (`object`): Should return internal + tracking mechanism used by a tracker class (such as the `run` for wandb) + + Implementations can also include a `main_process_only` (`bool`) attribute to toggle if relevant logging, init, and + other functions should occur on the main process or across all processes (by default will use `True`) + """ + + main_process_only = True + + def __init__(self, _blank=False): + if not _blank: + err = "" + if not hasattr(self, "name"): + err += "`name`" + if not hasattr(self, "requires_logging_directory"): + if len(err) > 0: + err += ", " + err += "`requires_logging_directory`" + + # as tracker is a @property that relies on post-init + if "tracker" not in dir(self): + if len(err) > 0: + err += ", " + err += "`tracker`" + if len(err) > 0: + raise NotImplementedError( + f"The implementation for this tracker class is missing the following " + f"required attributes. Please define them in the class definition: " + f"{err}" + ) + + def start(self): + """ + Lazy initialization of the tracker inside Accelerator to avoid initializing PartialState before + InitProcessGroupKwargs. + """ + pass + + def store_init_configuration(self, values: dict): + """ + Logs `values` as hyperparameters for the run. Implementations should use the experiment configuration + functionality of a tracking API. + + Args: + values (Dictionary `str` to `bool`, `str`, `float` or `int`): + Values to be stored as initial hyperparameters as key-value pairs. The values need to have type `bool`, + `str`, `float`, `int`, or `None`. + """ + pass + + def log(self, values: dict, step: Optional[int], **kwargs): + """ + Logs `values` to the current run. Base `log` implementations of a tracking API should go in here, along with + special behavior for the `step parameter. + + Args: + values (Dictionary `str` to `str`, `float`, or `int`): + Values to be logged as key-value pairs. The values need to have type `str`, `float`, or `int`. + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + """ + pass + + def finish(self): + """ + Should run any finalizing functions within the tracking API. If the API should not have one, just don't + overwrite that method. + """ + pass + + +class TensorBoardTracker(GeneralTracker): + """ + A `Tracker` class that supports `tensorboard`. Should be initialized at the start of your script. + + Args: + run_name (`str`): + The name of the experiment run + logging_dir (`str`, `os.PathLike`): + Location for TensorBoard logs to be stored. + **kwargs (additional keyword arguments, *optional*): + Additional key word arguments passed along to the `tensorboard.SummaryWriter.__init__` method. + """ + + name = "tensorboard" + requires_logging_directory = True + + def __init__(self, run_name: str, logging_dir: Union[str, os.PathLike], **kwargs): + super().__init__() + self.run_name = run_name + self.logging_dir_param = logging_dir + self.init_kwargs = kwargs + + @on_main_process + def start(self): + try: + from torch.utils import tensorboard + except ModuleNotFoundError: + import tensorboardX as tensorboard + self.logging_dir = os.path.join(self.logging_dir_param, self.run_name) + self.writer = tensorboard.SummaryWriter(self.logging_dir, **self.init_kwargs) + logger.debug(f"Initialized TensorBoard project {self.run_name} logging to {self.logging_dir}") + logger.debug( + "Make sure to log any initial configurations with `self.store_init_configuration` before training!" + ) + + @property + def tracker(self): + return self.writer + + @on_main_process + def store_init_configuration(self, values: dict): + """ + Logs `values` as hyperparameters for the run. Should be run at the beginning of your experiment. Stores the + hyperparameters in a yaml file for future use. + + Args: + values (Dictionary `str` to `bool`, `str`, `float` or `int`): + Values to be stored as initial hyperparameters as key-value pairs. The values need to have type `bool`, + `str`, `float`, `int`, or `None`. + """ + self.writer.add_hparams(values, metric_dict={}) + self.writer.flush() + project_run_name = time.time() + dir_name = os.path.join(self.logging_dir, str(project_run_name)) + os.makedirs(dir_name, exist_ok=True) + with open(os.path.join(dir_name, "hparams.yml"), "w") as outfile: + try: + yaml.dump(values, outfile) + except yaml.representer.RepresenterError: + logger.error("Serialization to store hyperparameters failed") + raise + logger.debug("Stored initial configuration hyperparameters to TensorBoard and hparams yaml file") + + @on_main_process + def log(self, values: dict, step: Optional[int] = None, **kwargs): + """ + Logs `values` to the current run. + + Args: + values (Dictionary `str` to `str`, `float`, `int` or `dict` of `str` to `float`/`int`): + Values to be logged as key-value pairs. The values need to have type `str`, `float`, `int` or `dict` of + `str` to `float`/`int`. + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + kwargs: + Additional key word arguments passed along to either `SummaryWriter.add_scaler`, + `SummaryWriter.add_text`, or `SummaryWriter.add_scalers` method based on the contents of `values`. + """ + values = listify(values) + for k, v in values.items(): + if isinstance(v, (int, float)): + self.writer.add_scalar(k, v, global_step=step, **kwargs) + elif isinstance(v, str): + self.writer.add_text(k, v, global_step=step, **kwargs) + elif isinstance(v, dict): + self.writer.add_scalars(k, v, global_step=step, **kwargs) + self.writer.flush() + logger.debug("Successfully logged to TensorBoard") + + @on_main_process + def log_images(self, values: dict, step: Optional[int], **kwargs): + """ + Logs `images` to the current run. + + Args: + values (Dictionary `str` to `List` of `np.ndarray` or `PIL.Image`): + Values to be logged as key-value pairs. The values need to have type `List` of `np.ndarray` or + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + kwargs: + Additional key word arguments passed along to the `SummaryWriter.add_image` method. + """ + for k, v in values.items(): + self.writer.add_images(k, v, global_step=step, **kwargs) + logger.debug("Successfully logged images to TensorBoard") + + @on_main_process + def finish(self): + """ + Closes `TensorBoard` writer + """ + self.writer.close() + logger.debug("TensorBoard writer closed") + + +class WandBTracker(GeneralTracker): + """ + A `Tracker` class that supports `wandb`. Should be initialized at the start of your script. + + Args: + run_name (`str`): + The name of the experiment run. + **kwargs (additional keyword arguments, *optional*): + Additional key word arguments passed along to the `wandb.init` method. + """ + + name = "wandb" + requires_logging_directory = False + main_process_only = False + + def __init__(self, run_name: str, **kwargs): + super().__init__() + self.run_name = run_name + self.init_kwargs = kwargs + + @on_main_process + def start(self): + import wandb + + self.run = wandb.init(project=self.run_name, **self.init_kwargs) + logger.debug(f"Initialized WandB project {self.run_name}") + logger.debug( + "Make sure to log any initial configurations with `self.store_init_configuration` before training!" + ) + + @property + def tracker(self): + return self.run + + @on_main_process + def store_init_configuration(self, values: dict): + """ + Logs `values` as hyperparameters for the run. Should be run at the beginning of your experiment. + + Args: + values (Dictionary `str` to `bool`, `str`, `float` or `int`): + Values to be stored as initial hyperparameters as key-value pairs. The values need to have type `bool`, + `str`, `float`, `int`, or `None`. + """ + import wandb + + wandb.config.update(values, allow_val_change=True) + logger.debug("Stored initial configuration hyperparameters to WandB") + + @on_main_process + def log(self, values: dict, step: Optional[int] = None, **kwargs): + """ + Logs `values` to the current run. + + Args: + values (Dictionary `str` to `str`, `float`, `int` or `dict` of `str` to `float`/`int`): + Values to be logged as key-value pairs. The values need to have type `str`, `float`, `int` or `dict` of + `str` to `float`/`int`. + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + kwargs: + Additional key word arguments passed along to the `wandb.log` method. + """ + self.run.log(values, step=step, **kwargs) + logger.debug("Successfully logged to WandB") + + @on_main_process + def log_images(self, values: dict, step: Optional[int] = None, **kwargs): + """ + Logs `images` to the current run. + + Args: + values (Dictionary `str` to `List` of `np.ndarray` or `PIL.Image`): + Values to be logged as key-value pairs. The values need to have type `List` of `np.ndarray` or + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + kwargs: + Additional key word arguments passed along to the `wandb.log` method. + """ + import wandb + + for k, v in values.items(): + self.log({k: [wandb.Image(image) for image in v]}, step=step, **kwargs) + logger.debug("Successfully logged images to WandB") + + @on_main_process + def log_table( + self, + table_name: str, + columns: Optional[list[str]] = None, + data: Optional[list[list[Any]]] = None, + dataframe: Any = None, + step: Optional[int] = None, + **kwargs, + ): + """ + Log a Table containing any object type (text, image, audio, video, molecule, html, etc). Can be defined either + with `columns` and `data` or with `dataframe`. + + Args: + table_name (`str`): + The name to give to the logged table on the wandb workspace + columns (list of `str`, *optional*): + The name of the columns on the table + data (List of List of Any data type, *optional*): + The data to be logged in the table + dataframe (Any data type, *optional*): + The data to be logged in the table + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + """ + import wandb + + values = {table_name: wandb.Table(columns=columns, data=data, dataframe=dataframe)} + self.log(values, step=step, **kwargs) + + @on_main_process + def finish(self): + """ + Closes `wandb` writer + """ + self.run.finish() + logger.debug("WandB run closed") + + +class TrackioTracker(GeneralTracker): + """ + A `Tracker` class that supports `trackio`. Should be initialized at the start of your script. + + Args: + run_name (`str`): + The name of the experiment run. Will be used as the `project` name when instantiating trackio. + **kwargs (additional keyword arguments, *optional*): + Additional key word arguments passed along to the `trackio.init` method. Refer to this + [init](https://github.com/gradio-app/trackio/blob/814809552310468b13f84f33764f1369b4e5136c/trackio/__init__.py#L22) + to see all supported key word arguments. + """ + + name = "trackio" + requires_logging_directory = False + main_process_only = False + + def __init__(self, run_name: str, **kwargs): + super().__init__() + self.run_name = run_name + self.init_kwargs = kwargs + + @on_main_process + def start(self): + import trackio + + self.run = trackio.init(project=self.run_name, **self.init_kwargs) + logger.debug(f"Initialized trackio project {self.run_name}") + logger.debug( + "Make sure to log any initial configurations with `self.store_init_configuration` before training!" + ) + + @property + def tracker(self): + return self.run + + @on_main_process + def store_init_configuration(self, values: dict): + """ + Logs `values` as hyperparameters for the run. Should be run at the beginning of your experiment. + + Args: + values (Dictionary `str` to `bool`, `str`, `float` or `int`): + Values to be stored as initial hyperparameters as key-value pairs. The values need to have type `bool`, + `str`, `float`, `int`, or `None`. + """ + import trackio + + trackio.config.update(values, allow_val_change=True) + logger.debug("Stored initial configuration hyperparameters to trackio") + + @on_main_process + def log(self, values: dict, step: Optional[int] = None, **kwargs): + """ + Logs `values` to the current run. + + Args: + values (Dictionary `str` to `str`, `float`, `int` or `dict` of `str` to `float`/`int`): + Values to be logged as key-value pairs. The values need to have type `str`, `float`, `int` or `dict` of + `str` to `float`/`int`. + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + kwargs: + Additional key word arguments passed along to the `trackio.log` method. + """ + self.run.log(values, **kwargs) + logger.debug("Successfully logged to trackio") + + @on_main_process + def finish(self): + """ + Closes `trackio` run + """ + self.run.finish() + logger.debug("trackio run closed") + + +class CometMLTracker(GeneralTracker): + """ + A `Tracker` class that supports `comet_ml`. Should be initialized at the start of your script. + + API keys must be stored in a Comet config file. + + Note: + For `comet_ml` versions < 3.41.0, additional keyword arguments are passed to `comet_ml.Experiment` instead: + https://www.comet.com/docs/v2/api-and-sdk/python-sdk/reference/Experiment/#comet_ml.Experiment.__init__ + + Args: + run_name (`str`): + The name of the experiment run. + **kwargs (additional keyword arguments, *optional*): + Additional key word arguments passed along to the `comet_ml.start` method: + https://www.comet.com/docs/v2/api-and-sdk/python-sdk/reference/start/ + """ + + name = "comet_ml" + requires_logging_directory = False + + def __init__(self, run_name: str, **kwargs): + super().__init__() + self.run_name = run_name + self.init_kwargs = kwargs + + @on_main_process + def start(self): + import comet_ml + + comet_version = version.parse(comet_ml.__version__) + if compare_versions(comet_version, ">=", "3.41.0"): + self.writer = comet_ml.start(project_name=self.run_name, **self.init_kwargs) + else: + logger.info("Update `comet_ml` (>=3.41.0) for experiment reuse and offline support.") + self.writer = comet_ml.Experiment(project_name=self.run_name, **self.init_kwargs) + + logger.debug(f"Initialized CometML project {self.run_name}") + logger.debug( + "Make sure to log any initial configurations with `self.store_init_configuration` before training!" + ) + + @property + def tracker(self): + return self.writer + + @on_main_process + def store_init_configuration(self, values: dict): + """ + Logs `values` as hyperparameters for the run. Should be run at the beginning of your experiment. + + Args: + values (Dictionary `str` to `bool`, `str`, `float` or `int`): + Values to be stored as initial hyperparameters as key-value pairs. The values need to have type `bool`, + `str`, `float`, `int`, or `None`. + """ + self.writer.log_parameters(values) + logger.debug("Stored initial configuration hyperparameters to Comet") + + @on_main_process + def log(self, values: dict, step: Optional[int] = None, **kwargs): + """ + Logs `values` to the current run. + + Args: + values (Dictionary `str` to `str`, `float`, `int` or `dict` of `str` to `float`/`int`): + Values to be logged as key-value pairs. The values need to have type `str`, `float`, `int` or `dict` of + `str` to `float`/`int`. + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + kwargs: + Additional key word arguments passed along to either `Experiment.log_metric`, `Experiment.log_other`, + or `Experiment.log_metrics` method based on the contents of `values`. + """ + if step is not None: + self.writer.set_step(step) + for k, v in values.items(): + if isinstance(v, (int, float)): + self.writer.log_metric(k, v, step=step, **kwargs) + elif isinstance(v, str): + self.writer.log_other(k, v, **kwargs) + elif isinstance(v, dict): + self.writer.log_metrics(v, step=step, **kwargs) + logger.debug("Successfully logged to Comet") + + @on_main_process + def finish(self): + """ + Flush `comet-ml` writer + """ + self.writer.end() + logger.debug("Comet run flushed") + + +class AimTracker(GeneralTracker): + """ + A `Tracker` class that supports `aim`. Should be initialized at the start of your script. + + Args: + run_name (`str`): + The name of the experiment run. + **kwargs (additional keyword arguments, *optional*): + Additional key word arguments passed along to the `Run.__init__` method. + """ + + name = "aim" + requires_logging_directory = True + + def __init__(self, run_name: str, logging_dir: Optional[Union[str, os.PathLike]] = ".", **kwargs): + super().__init__() + self.run_name = run_name + self.aim_repo_path = logging_dir + self.init_kwargs = kwargs + + @on_main_process + def start(self): + from aim import Run + + self.writer = Run(repo=self.aim_repo_path, **self.init_kwargs) + self.writer.name = self.run_name + logger.debug(f"Initialized Aim project {self.run_name}") + logger.debug( + "Make sure to log any initial configurations with `self.store_init_configuration` before training!" + ) + + @property + def tracker(self): + return self.writer + + @on_main_process + def store_init_configuration(self, values: dict): + """ + Logs `values` as hyperparameters for the run. Should be run at the beginning of your experiment. + + Args: + values (`dict`): + Values to be stored as initial hyperparameters as key-value pairs. + """ + self.writer["hparams"] = values + + @on_main_process + def log(self, values: dict, step: Optional[int], **kwargs): + """ + Logs `values` to the current run. + + Args: + values (`dict`): + Values to be logged as key-value pairs. + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + kwargs: + Additional key word arguments passed along to the `Run.track` method. + """ + # Note: replace this with the dictionary support when merged + for key, value in values.items(): + self.writer.track(value, name=key, step=step, **kwargs) + + @on_main_process + def log_images(self, values: dict, step: Optional[int] = None, kwargs: Optional[dict[str, dict]] = None): + """ + Logs `images` to the current run. + + Args: + values (`Dict[str, Union[np.ndarray, PIL.Image, Tuple[np.ndarray, str], Tuple[PIL.Image, str]]]`): + Values to be logged as key-value pairs. The values need to have type `np.ndarray` or PIL.Image. If a + tuple is provided, the first element should be the image and the second element should be the caption. + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + kwargs (`Dict[str, dict]`): + Additional key word arguments passed along to the `Run.Image` and `Run.track` method specified by the + keys `aim_image` and `track`, respectively. + """ + import aim + + aim_image_kw = {} + track_kw = {} + + if kwargs is not None: + aim_image_kw = kwargs.get("aim_image", {}) + track_kw = kwargs.get("track", {}) + + for key, value in values.items(): + if isinstance(value, tuple): + img, caption = value + else: + img, caption = value, "" + aim_image = aim.Image(img, caption=caption, **aim_image_kw) + self.writer.track(aim_image, name=key, step=step, **track_kw) + + @on_main_process + def finish(self): + """ + Closes `aim` writer + """ + self.writer.close() + + +class MLflowTracker(GeneralTracker): + """ + A `Tracker` class that supports `mlflow`. Should be initialized at the start of your script. + + Args: + experiment_name (`str`, *optional*): + Name of the experiment. Environment variable MLFLOW_EXPERIMENT_NAME has priority over this argument. + logging_dir (`str` or `os.PathLike`, defaults to `"."`): + Location for mlflow logs to be stored. + run_id (`str`, *optional*): + If specified, get the run with the specified UUID and log parameters and metrics under that run. The run’s + end time is unset and its status is set to running, but the run’s other attributes (source_version, + source_type, etc.) are not changed. Environment variable MLFLOW_RUN_ID has priority over this argument. + tags (`Dict[str, str]`, *optional*): + An optional `dict` of `str` keys and values, or a `str` dump from a `dict`, to set as tags on the run. If a + run is being resumed, these tags are set on the resumed run. If a new run is being created, these tags are + set on the new run. Environment variable MLFLOW_TAGS has priority over this argument. + nested_run (`bool`, *optional*, defaults to `False`): + Controls whether run is nested in parent run. True creates a nested run. Environment variable + MLFLOW_NESTED_RUN has priority over this argument. + run_name (`str`, *optional*): + Name of new run (stored as a mlflow.runName tag). Used only when `run_id` is unspecified. + description (`str`, *optional*): + An optional string that populates the description box of the run. If a run is being resumed, the + description is set on the resumed run. If a new run is being created, the description is set on the new + run. + """ + + name = "mlflow" + requires_logging_directory = False + + def __init__( + self, + experiment_name: Optional[str] = None, + logging_dir: Optional[Union[str, os.PathLike]] = None, + run_id: Optional[str] = None, + tags: Optional[Union[dict[str, Any], str]] = None, + nested_run: Optional[bool] = False, + run_name: Optional[str] = None, + description: Optional[str] = None, + ): + experiment_name = os.environ.get("MLFLOW_EXPERIMENT_NAME", experiment_name) + run_id = os.environ.get("MLFLOW_RUN_ID", run_id) + tags = os.environ.get("MLFLOW_TAGS", tags) + if isinstance(tags, str): + tags = json.loads(tags) + + nested_run = os.environ.get("MLFLOW_NESTED_RUN", nested_run) + + self.experiment_name = experiment_name + self.logging_dir = logging_dir + self.run_id = run_id + self.tags = tags + self.nested_run = nested_run + self.run_name = run_name + self.description = description + + @on_main_process + def start(self): + import mlflow + + exps = mlflow.search_experiments(filter_string=f"name = '{self.experiment_name}'") + if len(exps) > 0: + if len(exps) > 1: + logger.warning("Multiple experiments with the same name found. Using first one.") + experiment_id = exps[0].experiment_id + else: + experiment_id = mlflow.create_experiment( + name=self.experiment_name, + artifact_location=self.logging_dir, + tags=self.tags, + ) + + self.active_run = mlflow.start_run( + run_id=self.run_id, + experiment_id=experiment_id, + run_name=self.run_name, + nested=self.nested_run, + tags=self.tags, + description=self.description, + ) + + logger.debug(f"Initialized mlflow experiment {self.experiment_name}") + logger.debug( + "Make sure to log any initial configurations with `self.store_init_configuration` before training!" + ) + + @property + def tracker(self): + return self.active_run + + @on_main_process + def store_init_configuration(self, values: dict): + """ + Logs `values` as hyperparameters for the run. Should be run at the beginning of your experiment. + + Args: + values (`dict`): + Values to be stored as initial hyperparameters as key-value pairs. + """ + import mlflow + + for name, value in list(values.items()): + # internally, all values are converted to str in MLflow + if len(str(value)) > mlflow.utils.validation.MAX_PARAM_VAL_LENGTH: + logger.warning_once( + f'Accelerate is attempting to log a value of "{value}" for key "{name}" as a parameter. MLflow\'s' + f" log_param() only accepts values no longer than {mlflow.utils.validation.MAX_PARAM_VAL_LENGTH} characters so we dropped this attribute." + ) + del values[name] + + values_list = list(values.items()) + + # MLflow cannot log more than 100 values in one go, so we have to split it + for i in range(0, len(values_list), mlflow.utils.validation.MAX_PARAMS_TAGS_PER_BATCH): + mlflow.log_params(dict(values_list[i : i + mlflow.utils.validation.MAX_PARAMS_TAGS_PER_BATCH])) + + logger.debug("Stored initial configuration hyperparameters to MLflow") + + @on_main_process + def log(self, values: dict, step: Optional[int]): + """ + Logs `values` to the current run. + + Args: + values (`dict`): + Values to be logged as key-value pairs. + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + """ + metrics = {} + for k, v in values.items(): + if isinstance(v, (int, float)): + metrics[k] = v + else: + logger.warning_once( + f'MLflowTracker is attempting to log a value of "{v}" of type {type(v)} for key "{k}" as a metric. ' + "MLflow's log_metric() only accepts float and int types so we dropped this attribute." + ) + import mlflow + + mlflow.log_metrics(metrics, step=step) + logger.debug("Successfully logged to mlflow") + + @on_main_process + def log_figure(self, figure: Any, artifact_file: str, **save_kwargs): + """ + Logs an figure to the current run. + + Args: + figure (Any): + The figure to be logged. + artifact_file (`str`, *optional*): + The run-relative artifact file path in posixpath format to which the image is saved. + If not provided, the image is saved to a default location. + **kwargs: + Additional keyword arguments passed to the underlying mlflow.log_image function. + """ + import mlflow + + mlflow.log_figure(figure=figure, artifact_file=artifact_file, **save_kwargs) + logger.debug("Successfully logged image to mlflow") + + @on_main_process + def log_artifacts(self, local_dir: str, artifact_path: Optional[str] = None): + """ + Logs an artifacts (all content of a dir) to the current run. + + local_dir (`str`): + Path to the directory to be logged as an artifact. + artifact_path (`str`, *optional*): + Directory within the run's artifact directory where the artifact will be logged. If omitted, the + artifact will be logged to the root of the run's artifact directory. The run step. If included, the + artifact will be affiliated with this step. + """ + import mlflow + + mlflow.log_artifacts(local_dir=local_dir, artifact_path=artifact_path) + logger.debug("Successfully logged artofact to mlflow") + + @on_main_process + def log_artifact(self, local_path: str, artifact_path: Optional[str] = None): + """ + Logs an artifact (file) to the current run. + + local_path (`str`): + Path to the file to be logged as an artifact. + artifact_path (`str`, *optional*): + Directory within the run's artifact directory where the artifact will be logged. If omitted, the + artifact will be logged to the root of the run's artifact directory. The run step. If included, the + artifact will be affiliated with this step. + """ + import mlflow + + mlflow.log_artifact(local_path=local_path, artifact_path=artifact_path) + logger.debug("Successfully logged artofact to mlflow") + + @on_main_process + def finish(self): + """ + End the active MLflow run. + """ + import mlflow + + mlflow.end_run() + + +class ClearMLTracker(GeneralTracker): + """ + A `Tracker` class that supports `clearml`. Should be initialized at the start of your script. + + Args: + run_name (`str`, *optional*): + Name of the experiment. Environment variables `CLEARML_PROJECT` and `CLEARML_TASK` have priority over this + argument. + **kwargs (additional keyword arguments, *optional*): + Kwargs passed along to the `Task.__init__` method. + """ + + name = "clearml" + requires_logging_directory = False + + def __init__(self, run_name: Optional[str] = None, **kwargs): + super().__init__() + self.user_provided_run_name = run_name + self._initialized_externally = False + self.init_kwargs = kwargs + + @on_main_process + def start(self): + from clearml import Task + + current_task = Task.current_task() + if current_task: + self._initialized_externally = True + self.task = current_task + return + + task_init_args = {**self.init_kwargs} + task_init_args.setdefault("project_name", os.environ.get("CLEARML_PROJECT", self.user_provided_run_name)) + task_init_args.setdefault("task_name", os.environ.get("CLEARML_TASK", self.user_provided_run_name)) + self.task = Task.init(**task_init_args) + + @property + def tracker(self): + return self.task + + @on_main_process + def store_init_configuration(self, values: dict): + """ + Connect configuration dictionary to the Task object. Should be run at the beginning of your experiment. + + Args: + values (`dict`): + Values to be stored as initial hyperparameters as key-value pairs. + """ + return self.task.connect_configuration(values) + + @on_main_process + def log(self, values: dict[str, Union[int, float]], step: Optional[int] = None, **kwargs): + """ + Logs `values` dictionary to the current run. The dictionary keys must be strings. The dictionary values must be + ints or floats + + Args: + values (`Dict[str, Union[int, float]]`): + Values to be logged as key-value pairs. If the key starts with 'eval_'/'test_'/'train_', the value will + be reported under the 'eval'/'test'/'train' series and the respective prefix will be removed. + Otherwise, the value will be reported under the 'train' series, and no prefix will be removed. + step (`int`, *optional*): + If specified, the values will be reported as scalars, with the iteration number equal to `step`. + Otherwise they will be reported as single values. + kwargs: + Additional key word arguments passed along to the `clearml.Logger.report_single_value` or + `clearml.Logger.report_scalar` methods. + """ + clearml_logger = self.task.get_logger() + for k, v in values.items(): + if not isinstance(v, (int, float)): + logger.warning_once( + "Accelerator is attempting to log a value of " + f'"{v}" of type {type(v)} for key "{k}" as a scalar. ' + "This invocation of ClearML logger's report_scalar() " + "is incorrect so we dropped this attribute." + ) + continue + if step is None: + clearml_logger.report_single_value(name=k, value=v, **kwargs) + continue + title, series = ClearMLTracker._get_title_series(k) + clearml_logger.report_scalar(title=title, series=series, value=v, iteration=step, **kwargs) + + @on_main_process + def log_images(self, values: dict, step: Optional[int] = None, **kwargs): + """ + Logs `images` to the current run. + + Args: + values (`Dict[str, List[Union[np.ndarray, PIL.Image]]`): + Values to be logged as key-value pairs. The values need to have type `List` of `np.ndarray` or + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + kwargs: + Additional key word arguments passed along to the `clearml.Logger.report_image` method. + """ + clearml_logger = self.task.get_logger() + for k, v in values.items(): + title, series = ClearMLTracker._get_title_series(k) + clearml_logger.report_image(title=title, series=series, iteration=step, image=v, **kwargs) + + @on_main_process + def log_table( + self, + table_name: str, + columns: Optional[list[str]] = None, + data: Optional[list[list[Any]]] = None, + dataframe: Any = None, + step: Optional[int] = None, + **kwargs, + ): + """ + Log a Table to the task. Can be defined eitherwith `columns` and `data` or with `dataframe`. + + Args: + table_name (`str`): + The name of the table + columns (list of `str`, *optional*): + The name of the columns on the table + data (List of List of Any data type, *optional*): + The data to be logged in the table. If `columns` is not specified, then the first entry in data will be + the name of the columns of the table + dataframe (Any data type, *optional*): + The data to be logged in the table + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + kwargs: + Additional key word arguments passed along to the `clearml.Logger.report_table` method. + """ + to_report = dataframe + if dataframe is None: + if data is None: + raise ValueError( + "`ClearMLTracker.log_table` requires that `data` to be supplied if `dataframe` is `None`" + ) + to_report = [columns] + data if columns else data + title, series = ClearMLTracker._get_title_series(table_name) + self.task.get_logger().report_table(title=title, series=series, table_plot=to_report, iteration=step, **kwargs) + + @on_main_process + def finish(self): + """ + Close the ClearML task. If the task was initialized externally (e.g. by manually calling `Task.init`), this + function is a noop + """ + if self.task and not self._initialized_externally: + self.task.close() + + @staticmethod + def _get_title_series(name): + for prefix in ["eval", "test", "train"]: + if name.startswith(prefix + "_"): + return name[len(prefix) + 1 :], prefix + return name, "train" + + +class DVCLiveTracker(GeneralTracker): + """ + A `Tracker` class that supports `dvclive`. Should be initialized at the start of your script. + + Args: + run_name (`str`, *optional*): + Ignored for dvclive. See `kwargs` instead. + kwargs: + Additional key word arguments passed along to [`dvclive.Live()`](https://dvc.org/doc/dvclive/live). + + Example: + + ```py + from accelerate import Accelerator + + accelerator = Accelerator(log_with="dvclive") + accelerator.init_trackers(project_name="my_project", init_kwargs={"dvclive": {"dir": "my_directory"}}) + ``` + """ + + name = "dvclive" + requires_logging_directory = False + + def __init__(self, run_name: Optional[str] = None, live: Optional[Any] = None, **kwargs): + super().__init__() + self.live = live + self.init_kwargs = kwargs + + @on_main_process + def start(self): + from dvclive import Live + + self.live = self.live if self.live is not None else Live(**self.init_kwargs) + + @property + def tracker(self): + return self.live + + @on_main_process + def store_init_configuration(self, values: dict): + """ + Logs `values` as hyperparameters for the run. Should be run at the beginning of your experiment. Stores the + hyperparameters in a yaml file for future use. + + Args: + values (Dictionary `str` to `bool`, `str`, `float`, `int`, or a List or Dict of those types): + Values to be stored as initial hyperparameters as key-value pairs. The values need to have type `bool`, + `str`, `float`, or `int`. + """ + self.live.log_params(values) + + @on_main_process + def log(self, values: dict, step: Optional[int] = None, **kwargs): + """ + Logs `values` to the current run. + + Args: + values (Dictionary `str` to `str`, `float`, or `int`): + Values to be logged as key-value pairs. The values need to have type `str`, `float`, or `int`. + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + kwargs: + Additional key word arguments passed along to `dvclive.Live.log_metric()`. + """ + from dvclive.plots import Metric + + if step is not None: + self.live.step = step + for k, v in values.items(): + if Metric.could_log(v): + self.live.log_metric(k, v, **kwargs) + else: + logger.warning_once( + "Accelerator attempted to log a value of " + f'"{v}" of type {type(v)} for key "{k}" as a scalar. ' + "This invocation of DVCLive's Live.log_metric() " + "is incorrect so we dropped this attribute." + ) + self.live.next_step() + + @on_main_process + def finish(self): + """ + Closes `dvclive.Live()`. + """ + self.live.end() + + +class SwanLabTracker(GeneralTracker): + """ + A `Tracker` class that supports `swanlab`. Should be initialized at the start of your script. + + Args: + run_name (`str`): + The name of the experiment run. + **kwargs (additional keyword arguments, *optional*): + Additional key word arguments passed along to the `swanlab.init` method. + """ + + name = "swanlab" + requires_logging_directory = False + main_process_only = False + + def __init__(self, run_name: str, **kwargs): + super().__init__() + self.run_name = run_name + self.init_kwargs = kwargs + + @on_main_process + def start(self): + import swanlab + + self.run = swanlab.init(project=self.run_name, **self.init_kwargs) + swanlab.config["FRAMEWORK"] = "🤗Accelerate" # add accelerate logo in config + logger.debug(f"Initialized SwanLab project {self.run_name}") + logger.debug( + "Make sure to log any initial configurations with `self.store_init_configuration` before training!" + ) + + @property + def tracker(self): + return self.run + + @on_main_process + def store_init_configuration(self, values: dict): + """ + Logs `values` as hyperparameters for the run. Should be run at the beginning of your experiment. + + Args: + values (Dictionary `str` to `bool`, `str`, `float` or `int`): + Values to be stored as initial hyperparameters as key-value pairs. The values need to have type `bool`, + `str`, `float`, `int`, or `None`. + """ + import swanlab + + swanlab.config.update(values, allow_val_change=True) + logger.debug("Stored initial configuration hyperparameters to SwanLab") + + @on_main_process + def log(self, values: dict, step: Optional[int] = None, **kwargs): + """ + Logs `values` to the current run. + + Args: + data : Dict[str, DataType] + Data must be a dict. The key must be a string with 0-9, a-z, A-Z, " ", "_", "-", "/". The value must be a + `float`, `float convertible object`, `int` or `swanlab.data.BaseType`. + step : int, optional + The step number of the current data, if not provided, it will be automatically incremented. + If step is duplicated, the data will be ignored. + kwargs: + Additional key word arguments passed along to the `swanlab.log` method. Likes: + print_to_console : bool, optional + Whether to print the data to the console, the default is False. + """ + self.run.log(values, step=step, **kwargs) + logger.debug("Successfully logged to SwanLab") + + @on_main_process + def log_images(self, values: dict, step: Optional[int] = None, **kwargs): + """ + Logs `images` to the current run. + + Args: + values (Dictionary `str` to `List` of `np.ndarray` or `PIL.Image`): + Values to be logged as key-value pairs. The values need to have type `List` of `np.ndarray` or + step (`int`, *optional*): + The run step. If included, the log will be affiliated with this step. + kwargs: + Additional key word arguments passed along to the `swanlab.log` method. Likes: + print_to_console : bool, optional + Whether to print the data to the console, the default is False. + """ + import swanlab + + for k, v in values.items(): + self.log({k: [swanlab.Image(image) for image in v]}, step=step, **kwargs) + logger.debug("Successfully logged images to SwanLab") + + @on_main_process + def finish(self): + """ + Closes `swanlab` writer + """ + self.run.finish() + logger.debug("SwanLab run closed") + + +LOGGER_TYPE_TO_CLASS = { + "aim": AimTracker, + "comet_ml": CometMLTracker, + "mlflow": MLflowTracker, + "tensorboard": TensorBoardTracker, + "wandb": WandBTracker, + "clearml": ClearMLTracker, + "dvclive": DVCLiveTracker, + "swanlab": SwanLabTracker, + "trackio": TrackioTracker, +} + + +def filter_trackers( + log_with: list[Union[str, LoggerType, GeneralTracker]], + logging_dir: Optional[Union[str, os.PathLike]] = None, +): + """ + Takes in a list of potential tracker types and checks that: + - The tracker wanted is available in that environment + - Filters out repeats of tracker types + - If `all` is in `log_with`, will return all trackers in the environment + - If a tracker requires a `logging_dir`, ensures that `logging_dir` is not `None` + + Args: + log_with (list of `str`, [`~utils.LoggerType`] or [`~tracking.GeneralTracker`], *optional*): + A list of loggers to be setup for experiment tracking. Should be one or several of: + + - `"all"` + - `"tensorboard"` + - `"wandb"` + - `"trackio"` + - `"aim"` + - `"comet_ml"` + - `"mlflow"` + - `"dvclive"` + - `"swanlab"` + If `"all"` is selected, will pick up all available trackers in the environment and initialize them. Can + also accept implementations of `GeneralTracker` for custom trackers, and can be combined with `"all"`. + logging_dir (`str`, `os.PathLike`, *optional*): + A path to a directory for storing logs of locally-compatible loggers. + """ + loggers = [] + if log_with is not None: + if not isinstance(log_with, (list, tuple)): + log_with = [log_with] + if "all" in log_with or LoggerType.ALL in log_with: + loggers = [o for o in log_with if issubclass(type(o), GeneralTracker)] + get_available_trackers() + else: + for log_type in log_with: + if log_type not in LoggerType and not issubclass(type(log_type), GeneralTracker): + raise ValueError(f"Unsupported logging capability: {log_type}. Choose between {LoggerType.list()}") + if issubclass(type(log_type), GeneralTracker): + loggers.append(log_type) + else: + log_type = LoggerType(log_type) + if log_type not in loggers: + if log_type in get_available_trackers(): + tracker_init = LOGGER_TYPE_TO_CLASS[str(log_type)] + if tracker_init.requires_logging_directory: + if logging_dir is None: + raise ValueError( + f"Logging with `{log_type}` requires a `logging_dir` to be passed in." + ) + loggers.append(log_type) + else: + logger.debug(f"Tried adding logger {log_type}, but package is unavailable in the system.") + + return loggers diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/INSTALLER b/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/LICENSE b/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..104eebf5a3002fccdaceef3a4cb936173c1c2035 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/LICENSE @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2018 Alex Grönholm + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/METADATA b/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..7b114cdbe89034ec67e6a5f8ed9fe80261ab8d77 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/METADATA @@ -0,0 +1,104 @@ +Metadata-Version: 2.1 +Name: anyio +Version: 4.8.0 +Summary: High level compatibility layer for multiple asynchronous event loop implementations +Author-email: Alex Grönholm +License: MIT +Project-URL: Documentation, https://anyio.readthedocs.io/en/latest/ +Project-URL: Changelog, https://anyio.readthedocs.io/en/stable/versionhistory.html +Project-URL: Source code, https://github.com/agronholm/anyio +Project-URL: Issue tracker, https://github.com/agronholm/anyio/issues +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Framework :: AnyIO +Classifier: Typing :: Typed +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Requires-Python: >=3.9 +Description-Content-Type: text/x-rst +License-File: LICENSE +Requires-Dist: exceptiongroup>=1.0.2; python_version < "3.11" +Requires-Dist: idna>=2.8 +Requires-Dist: sniffio>=1.1 +Requires-Dist: typing_extensions>=4.5; python_version < "3.13" +Provides-Extra: trio +Requires-Dist: trio>=0.26.1; extra == "trio" +Provides-Extra: test +Requires-Dist: anyio[trio]; extra == "test" +Requires-Dist: coverage[toml]>=7; extra == "test" +Requires-Dist: exceptiongroup>=1.2.0; extra == "test" +Requires-Dist: hypothesis>=4.0; extra == "test" +Requires-Dist: psutil>=5.9; extra == "test" +Requires-Dist: pytest>=7.0; extra == "test" +Requires-Dist: trustme; extra == "test" +Requires-Dist: truststore>=0.9.1; python_version >= "3.10" and extra == "test" +Requires-Dist: uvloop>=0.21; (platform_python_implementation == "CPython" and platform_system != "Windows" and python_version < "3.14") and extra == "test" +Provides-Extra: doc +Requires-Dist: packaging; extra == "doc" +Requires-Dist: Sphinx~=7.4; extra == "doc" +Requires-Dist: sphinx_rtd_theme; extra == "doc" +Requires-Dist: sphinx-autodoc-typehints>=1.2.0; extra == "doc" + +.. image:: https://github.com/agronholm/anyio/actions/workflows/test.yml/badge.svg + :target: https://github.com/agronholm/anyio/actions/workflows/test.yml + :alt: Build Status +.. image:: https://coveralls.io/repos/github/agronholm/anyio/badge.svg?branch=master + :target: https://coveralls.io/github/agronholm/anyio?branch=master + :alt: Code Coverage +.. image:: https://readthedocs.org/projects/anyio/badge/?version=latest + :target: https://anyio.readthedocs.io/en/latest/?badge=latest + :alt: Documentation +.. image:: https://badges.gitter.im/gitterHQ/gitter.svg + :target: https://gitter.im/python-trio/AnyIO + :alt: Gitter chat + +AnyIO is an asynchronous networking and concurrency library that works on top of either asyncio_ or +trio_. It implements trio-like `structured concurrency`_ (SC) on top of asyncio and works in harmony +with the native SC of trio itself. + +Applications and libraries written against AnyIO's API will run unmodified on either asyncio_ or +trio_. AnyIO can also be adopted into a library or application incrementally – bit by bit, no full +refactoring necessary. It will blend in with the native libraries of your chosen backend. + +Documentation +------------- + +View full documentation at: https://anyio.readthedocs.io/ + +Features +-------- + +AnyIO offers the following functionality: + +* Task groups (nurseries_ in trio terminology) +* High-level networking (TCP, UDP and UNIX sockets) + + * `Happy eyeballs`_ algorithm for TCP connections (more robust than that of asyncio on Python + 3.8) + * async/await style UDP sockets (unlike asyncio where you still have to use Transports and + Protocols) + +* A versatile API for byte streams and object streams +* Inter-task synchronization and communication (locks, conditions, events, semaphores, object + streams) +* Worker threads +* Subprocesses +* Asynchronous file I/O (using worker threads) +* Signal handling + +AnyIO also comes with its own pytest_ plugin which also supports asynchronous fixtures. +It even works with the popular Hypothesis_ library. + +.. _asyncio: https://docs.python.org/3/library/asyncio.html +.. _trio: https://github.com/python-trio/trio +.. _structured concurrency: https://en.wikipedia.org/wiki/Structured_concurrency +.. _nurseries: https://trio.readthedocs.io/en/stable/reference-core.html#nurseries-and-spawning +.. _Happy eyeballs: https://en.wikipedia.org/wiki/Happy_Eyeballs +.. _pytest: https://docs.pytest.org/en/latest/ +.. _Hypothesis: https://hypothesis.works/ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/RECORD b/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..368773fe822b215a4e866f51d6d6578990fcfd1b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/RECORD @@ -0,0 +1,86 @@ +anyio-4.8.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +anyio-4.8.0.dist-info/LICENSE,sha256=U2GsncWPLvX9LpsJxoKXwX8ElQkJu8gCO9uC6s8iwrA,1081 +anyio-4.8.0.dist-info/METADATA,sha256=WjTz5zz2NgMStBtw4xDh8CDvf6YXgAOrA0nboFQkXEg,4630 +anyio-4.8.0.dist-info/RECORD,, +anyio-4.8.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91 +anyio-4.8.0.dist-info/entry_points.txt,sha256=_d6Yu6uiaZmNe0CydowirE9Cmg7zUL2g08tQpoS3Qvc,39 +anyio-4.8.0.dist-info/top_level.txt,sha256=QglSMiWX8_5dpoVAEIHdEYzvqFMdSYWmCj6tYw2ITkQ,6 +anyio/__init__.py,sha256=mVsWuQ6wxcPT9QUAxhz1Rg2u53PskaBJw4TXVXk63ZQ,4513 +anyio/__pycache__/__init__.cpython-312.pyc,, +anyio/__pycache__/from_thread.cpython-312.pyc,, +anyio/__pycache__/lowlevel.cpython-312.pyc,, +anyio/__pycache__/pytest_plugin.cpython-312.pyc,, +anyio/__pycache__/to_interpreter.cpython-312.pyc,, +anyio/__pycache__/to_process.cpython-312.pyc,, +anyio/__pycache__/to_thread.cpython-312.pyc,, +anyio/_backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +anyio/_backends/__pycache__/__init__.cpython-312.pyc,, +anyio/_backends/__pycache__/_asyncio.cpython-312.pyc,, +anyio/_backends/__pycache__/_trio.cpython-312.pyc,, +anyio/_backends/_asyncio.py,sha256=_6BDFDrEPI1aRPFDSPsUzyPGm-dnRgBPfEOdeCH1Ixg,92951 +anyio/_backends/_trio.py,sha256=7PNO_GPq8Dmo1kQmmE2z57dhnv4TUoinsEHt_PJg3oE,40405 +anyio/_core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +anyio/_core/__pycache__/__init__.cpython-312.pyc,, +anyio/_core/__pycache__/_asyncio_selector_thread.cpython-312.pyc,, +anyio/_core/__pycache__/_eventloop.cpython-312.pyc,, +anyio/_core/__pycache__/_exceptions.cpython-312.pyc,, +anyio/_core/__pycache__/_fileio.cpython-312.pyc,, +anyio/_core/__pycache__/_resources.cpython-312.pyc,, +anyio/_core/__pycache__/_signals.cpython-312.pyc,, +anyio/_core/__pycache__/_sockets.cpython-312.pyc,, +anyio/_core/__pycache__/_streams.cpython-312.pyc,, +anyio/_core/__pycache__/_subprocesses.cpython-312.pyc,, +anyio/_core/__pycache__/_synchronization.cpython-312.pyc,, +anyio/_core/__pycache__/_tasks.cpython-312.pyc,, +anyio/_core/__pycache__/_testing.cpython-312.pyc,, +anyio/_core/__pycache__/_typedattr.cpython-312.pyc,, +anyio/_core/_asyncio_selector_thread.py,sha256=53RhMHpFAexW0dQz2Rn8iy8zt931NXyEJITyILWVV_A,5626 +anyio/_core/_eventloop.py,sha256=t_tAwBFPjF8jrZGjlJ6bbYy6KA3bjsbZxV9mvh9t1i0,4695 +anyio/_core/_exceptions.py,sha256=RlPRlwastdmfDPoskdXNO6SI8_l3fclA2wtW6cokU9I,3503 +anyio/_core/_fileio.py,sha256=r6QJmwn90vU0CyCDAWgGhCwc8cT26ofosaHl7Jo3LJU,22853 +anyio/_core/_resources.py,sha256=NbmU5O5UX3xEyACnkmYX28Fmwdl-f-ny0tHym26e0w0,435 +anyio/_core/_signals.py,sha256=vulT1M1xdLYtAR-eY5TamIgaf1WTlOwOrMGwswlTTr8,905 +anyio/_core/_sockets.py,sha256=vQ5GnSDLHjEhHhV2yvsdiPs5wmPxxb1kRsv3RM5lbQk,26951 +anyio/_core/_streams.py,sha256=OnaKgoDD-FcMSwLvkoAUGP51sG2ZdRvMpxt9q2w1gYA,1804 +anyio/_core/_subprocesses.py,sha256=WquR6sHrnaZofaeqnL8U4Yv___msVW_WqivleLHK4zI,7760 +anyio/_core/_synchronization.py,sha256=DwUh8Tl6cG_UMVC_GyzPoC_U9BpfDfjMl9SINSxcZN4,20320 +anyio/_core/_tasks.py,sha256=f3CuWwo06cCZ6jaOv-JHFKWkgpgf2cvaF25Oh4augMA,4757 +anyio/_core/_testing.py,sha256=YUGwA5cgFFbUTv4WFd7cv_BSVr4ryTtPp8owQA3JdWE,2118 +anyio/_core/_typedattr.py,sha256=P4ozZikn3-DbpoYcvyghS_FOYAgbmUxeoU8-L_07pZM,2508 +anyio/abc/__init__.py,sha256=c2OQbTCS_fQowviMXanLPh8m29ccwkXmpDr7uyNZYOo,2652 +anyio/abc/__pycache__/__init__.cpython-312.pyc,, +anyio/abc/__pycache__/_eventloop.cpython-312.pyc,, +anyio/abc/__pycache__/_resources.cpython-312.pyc,, +anyio/abc/__pycache__/_sockets.cpython-312.pyc,, +anyio/abc/__pycache__/_streams.cpython-312.pyc,, +anyio/abc/__pycache__/_subprocesses.cpython-312.pyc,, +anyio/abc/__pycache__/_tasks.cpython-312.pyc,, +anyio/abc/__pycache__/_testing.cpython-312.pyc,, +anyio/abc/_eventloop.py,sha256=Wd_3C3hLm0ex5z_eHHWGqvLle2OKCSexJSZVnwQNGV4,9658 +anyio/abc/_resources.py,sha256=DrYvkNN1hH6Uvv5_5uKySvDsnknGVDe8FCKfko0VtN8,783 +anyio/abc/_sockets.py,sha256=KhWtJxan8jpBXKwPaFeQzI4iRXdFaOIn0HXtDZnaO7U,6262 +anyio/abc/_streams.py,sha256=GzST5Q2zQmxVzdrAqtbSyHNxkPlIC9AzeZJg_YyPAXw,6598 +anyio/abc/_subprocesses.py,sha256=cumAPJTktOQtw63IqG0lDpyZqu_l1EElvQHMiwJgL08,2067 +anyio/abc/_tasks.py,sha256=yJWbMwowvqjlAX4oJ3l9Is1w-zwynr2lX1Z02AWJqsY,3080 +anyio/abc/_testing.py,sha256=tBJUzkSfOXJw23fe8qSJ03kJlShOYjjaEyFB6k6MYT8,1821 +anyio/from_thread.py,sha256=dbi5TUH45_Sg_jZ8Vv1NJWVohe0WeQ_OaCvXIKveAGg,17478 +anyio/lowlevel.py,sha256=nkgmW--SdxGVp0cmLUYazjkigveRm5HY7-gW8Bpp9oY,4169 +anyio/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +anyio/pytest_plugin.py,sha256=vjGhGRHD31OyMgJRFQrMvExhx3Ea8KbyDqYKmiSDdXA,6712 +anyio/streams/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +anyio/streams/__pycache__/__init__.cpython-312.pyc,, +anyio/streams/__pycache__/buffered.cpython-312.pyc,, +anyio/streams/__pycache__/file.cpython-312.pyc,, +anyio/streams/__pycache__/memory.cpython-312.pyc,, +anyio/streams/__pycache__/stapled.cpython-312.pyc,, +anyio/streams/__pycache__/text.cpython-312.pyc,, +anyio/streams/__pycache__/tls.cpython-312.pyc,, +anyio/streams/buffered.py,sha256=UCldKC168YuLvT7n3HtNPnQ2iWAMSTYQWbZvzLwMwkM,4500 +anyio/streams/file.py,sha256=6uoTNb5KbMoj-6gS3_xrrL8uZN8Q4iIvOS1WtGyFfKw,4383 +anyio/streams/memory.py,sha256=j8AyOExK4-UPaon_Xbhwax25Vqs0DwFg3ZXc-EIiHjY,10550 +anyio/streams/stapled.py,sha256=U09pCrmOw9kkNhe6tKopsm1QIMT1lFTFvtb-A7SIe4k,4302 +anyio/streams/text.py,sha256=6x8w8xlfCZKTUWQoJiMPoMhSSJFUBRKgoBNSBtbd9yg,5094 +anyio/streams/tls.py,sha256=m3AE2LVSpoRHSIwSoSCupiOVL54EvOFoY3CcwTxcZfg,12742 +anyio/to_interpreter.py,sha256=QhTFaSdyUjxpuN_wBJWPWyh8N6kKV4qhkn71Op84AEc,6624 +anyio/to_process.py,sha256=ZvruelRM-HNmqDaql4sdNODg2QD_uSlwSCxnV4OhsfQ,9595 +anyio/to_thread.py,sha256=WM2JQ2MbVsd5D5CM08bQiTwzZIvpsGjfH1Fy247KoDQ,2396 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/WHEEL b/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..ae527e7d64811439e61b93aa375defb30e06edfe --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: setuptools (75.6.0) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/entry_points.txt b/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/entry_points.txt new file mode 100644 index 0000000000000000000000000000000000000000..44dd9bdc3039122cc98014c1439ca254313fd014 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/entry_points.txt @@ -0,0 +1,2 @@ +[pytest11] +anyio = anyio.pytest_plugin diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/top_level.txt b/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..c77c069ecc9b7f8b1f97dbcfec905725db0253a8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/anyio-4.8.0.dist-info/top_level.txt @@ -0,0 +1 @@ +anyio diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/certifi/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/certifi/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b4d7f48c0419ffef55d55dc0256bbdd7bf37694b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/certifi/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/certifi/__pycache__/__main__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/certifi/__pycache__/__main__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..201f3aba659756ae807ec5c4a6f2d7dfc436524a Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/certifi/__pycache__/__main__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/certifi/__pycache__/core.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/certifi/__pycache__/core.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b72867b812a4f5e9a555ff6d3b3882465a82819b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/certifi/__pycache__/core.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/INSTALLER b/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/LICENSE b/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..9725772c7967075d97dc78d60f3735435eccba63 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 TAHRI Ahmed R. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/METADATA b/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..58f3667d80159cddd6afacd050837aed1c892ec8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/METADATA @@ -0,0 +1,721 @@ +Metadata-Version: 2.1 +Name: charset-normalizer +Version: 3.4.1 +Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet. +Author-email: "Ahmed R. TAHRI" +Maintainer-email: "Ahmed R. TAHRI" +License: MIT +Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md +Project-URL: Documentation, https://charset-normalizer.readthedocs.io/ +Project-URL: Code, https://github.com/jawah/charset_normalizer +Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues +Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Text Processing :: Linguistic +Classifier: Topic :: Utilities +Classifier: Typing :: Typed +Requires-Python: >=3.7 +Description-Content-Type: text/markdown +License-File: LICENSE +Provides-Extra: unicode-backport + +

Charset Detection, for Everyone 👋

+ +

+ The Real First Universal Charset Detector
+ + + + + Download Count Total + + + + +

+

+ Featured Packages
+ + Static Badge + + + Static Badge + +

+

+ In other language (unofficial port - by the community)
+ + Static Badge + +

+ +> A library that helps you read text from an unknown charset encoding.
Motivated by `chardet`, +> I'm trying to resolve the issue by taking a new approach. +> All IANA character set names for which the Python core library provides codecs are supported. + +

+ >>>>> 👉 Try Me Online Now, Then Adopt Me 👈 <<<<< +

+ +This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**. + +| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) | +|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:| +| `Fast` | ❌ | ✅ | ✅ | +| `Universal**` | ❌ | ✅ | ❌ | +| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ | +| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ | +| `License` | LGPL-2.1
_restrictive_ | MIT | MPL-1.1
_restrictive_ | +| `Native Python` | ✅ | ✅ | ❌ | +| `Detect spoken language` | ❌ | ✅ | N/A | +| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ | +| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB | +| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 | + +

+Reading Normalized TextCat Reading Text +

+ +*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*
+ +## ⚡ Performance + +This package offer better performance than its counterpart Chardet. Here are some numbers. + +| Package | Accuracy | Mean per file (ms) | File per sec (est) | +|-----------------------------------------------|:--------:|:------------------:|:------------------:| +| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec | +| charset-normalizer | **98 %** | **10 ms** | 100 file/sec | + +| Package | 99th percentile | 95th percentile | 50th percentile | +|-----------------------------------------------|:---------------:|:---------------:|:---------------:| +| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms | +| charset-normalizer | 100 ms | 50 ms | 5 ms | + +_updated as of december 2024 using CPython 3.12_ + +Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload. + +> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows. +> And yes, these results might change at any time. The dataset can be updated to include more files. +> The actual delays heavily depends on your CPU capabilities. The factors should remain the same. +> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability +> (e.g. Supported Encoding) Challenge-them if you want. + +## ✨ Installation + +Using pip: + +```sh +pip install charset-normalizer -U +``` + +## 🚀 Basic Usage + +### CLI +This package comes with a CLI. + +``` +usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD] + file [file ...] + +The Real First Universal Charset Detector. Discover originating encoding used +on text file. Normalize text to unicode. + +positional arguments: + files File(s) to be analysed + +optional arguments: + -h, --help show this help message and exit + -v, --verbose Display complementary information about file if any. + Stdout will contain logs about the detection process. + -a, --with-alternative + Output complementary possibilities if any. Top-level + JSON WILL be a list. + -n, --normalize Permit to normalize input file. If not set, program + does not write anything. + -m, --minimal Only output the charset detected to STDOUT. Disabling + JSON output. + -r, --replace Replace file when trying to normalize it instead of + creating a new one. + -f, --force Replace file without asking if you are sure, use this + flag with caution. + -t THRESHOLD, --threshold THRESHOLD + Define a custom maximum amount of chaos allowed in + decoded content. 0. <= chaos <= 1. + --version Show version information and exit. +``` + +```bash +normalizer ./data/sample.1.fr.srt +``` + +or + +```bash +python -m charset_normalizer ./data/sample.1.fr.srt +``` + +🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format. + +```json +{ + "path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt", + "encoding": "cp1252", + "encoding_aliases": [ + "1252", + "windows_1252" + ], + "alternative_encodings": [ + "cp1254", + "cp1256", + "cp1258", + "iso8859_14", + "iso8859_15", + "iso8859_16", + "iso8859_3", + "iso8859_9", + "latin_1", + "mbcs" + ], + "language": "French", + "alphabets": [ + "Basic Latin", + "Latin-1 Supplement" + ], + "has_sig_or_bom": false, + "chaos": 0.149, + "coherence": 97.152, + "unicode_path": null, + "is_preferred": true +} +``` + +### Python +*Just print out normalized text* +```python +from charset_normalizer import from_path + +results = from_path('./my_subtitle.srt') + +print(str(results.best())) +``` + +*Upgrade your code without effort* +```python +from charset_normalizer import detect +``` + +The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible. + +See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/) + +## 😇 Why + +When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a +reliable alternative using a completely different method. Also! I never back down on a good challenge! + +I **don't care** about the **originating charset** encoding, because **two different tables** can +produce **two identical rendered string.** +What I want is to get readable text, the best I can. + +In a way, **I'm brute forcing text decoding.** How cool is that ? 😎 + +Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode. + +## 🍰 How + + - Discard all charset encoding table that could not fit the binary content. + - Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding. + - Extract matches with the lowest mess detected. + - Additionally, we measure coherence / probe for a language. + +**Wait a minute**, what is noise/mess and coherence according to **YOU ?** + +*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then +**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text). + I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to + improve or rewrite it. + +*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought +that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design. + +## ⚡ Known limitations + + - Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters)) + - Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content. + +## ⚠️ About Python EOLs + +**If you are running:** + +- Python >=2.7,<3.5: Unsupported +- Python 3.5: charset-normalizer < 2.1 +- Python 3.6: charset-normalizer < 3.1 +- Python 3.7: charset-normalizer < 4.0 + +Upgrade your Python interpreter as soon as possible. + +## 👤 Contributing + +Contributions, issues and feature requests are very much welcome.
+Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute. + +## 📝 License + +Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).
+This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed. + +Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/) + +## 💼 For Enterprise + +Professional support for charset-normalizer is available as part of the [Tidelift +Subscription][1]. Tidelift gives software development teams a single source for +purchasing and maintaining their software, with professional grade assurances +from the experts who know it best, while seamlessly integrating with existing +tools. + +[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme + +[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297) + +# Changelog +All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). + +## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24) + +### Changed +- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend. +- Enforce annotation delayed loading for a simpler and consistent types in the project. +- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8 + +### Added +- pre-commit configuration. +- noxfile. + +### Removed +- `build-requirements.txt` as per using `pyproject.toml` native build configuration. +- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile). +- `setup.cfg` in favor of `pyproject.toml` metadata configuration. +- Unused `utils.range_scan` function. + +### Fixed +- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572) +- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+ + +## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08) + +### Added +- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints. +- Support for Python 3.13 (#512) + +### Fixed +- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch. +- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537) +- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381) + +## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31) + +### Fixed +- Unintentional memory usage regression when using large payload that match several encoding (#376) +- Regression on some detection case showcased in the documentation (#371) + +### Added +- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife) + +## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22) + +### Changed +- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8 +- Improved the general detection reliability based on reports from the community + +## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30) + +### Added +- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer` +- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323) + +### Removed +- (internal) Redundant utils.is_ascii function and unused function is_private_use_only +- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant + +### Changed +- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection +- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8 + +### Fixed +- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350) + +## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07) + +### Changed +- Typehint for function `from_path` no longer enforce `PathLike` as its first argument +- Minor improvement over the global detection reliability + +### Added +- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries +- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True) +- Explicit support for Python 3.12 + +### Fixed +- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289) + +## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06) + +### Added +- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262) + +### Removed +- Support for Python 3.6 (PR #260) + +### Changed +- Optional speedup provided by mypy/c 1.0.1 + +## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18) + +### Fixed +- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233) + +### Changed +- Speedup provided by mypy/c 0.990 on Python >= 3.7 + +## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20) + +### Added +- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results +- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES +- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio +- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl) + +### Changed +- Build with static metadata using 'build' frontend +- Make the language detection stricter +- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1 + +### Fixed +- CLI with opt --normalize fail when using full path for files +- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it +- Sphinx warnings when generating the documentation + +### Removed +- Coherence detector no longer return 'Simple English' instead return 'English' +- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese' +- Breaking: Method `first()` and `best()` from CharsetMatch +- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII) +- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches +- Breaking: Top-level function `normalize` +- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch +- Support for the backport `unicodedata2` + +## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18) + +### Added +- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results +- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES +- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio + +### Changed +- Build with static metadata using 'build' frontend +- Make the language detection stricter + +### Fixed +- CLI with opt --normalize fail when using full path for files +- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it + +### Removed +- Coherence detector no longer return 'Simple English' instead return 'English' +- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese' + +## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21) + +### Added +- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl) + +### Removed +- Breaking: Method `first()` and `best()` from CharsetMatch +- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII) + +### Fixed +- Sphinx warnings when generating the documentation + +## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15) + +### Changed +- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1 + +### Removed +- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches +- Breaking: Top-level function `normalize` +- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch +- Support for the backport `unicodedata2` + +## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19) + +### Deprecated +- Function `normalize` scheduled for removal in 3.0 + +### Changed +- Removed useless call to decode in fn is_unprintable (#206) + +### Fixed +- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204) + +## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19) + +### Added +- Output the Unicode table version when running the CLI with `--version` (PR #194) + +### Changed +- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175) +- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183) + +### Fixed +- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175) +- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181) + +### Removed +- Support for Python 3.5 (PR #192) + +### Deprecated +- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194) + +## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12) + +### Fixed +- ASCII miss-detection on rare cases (PR #170) + +## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30) + +### Added +- Explicit support for Python 3.11 (PR #164) + +### Changed +- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165) + +## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04) + +### Fixed +- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154) + +### Changed +- Skipping the language-detection (CD) on ASCII (PR #155) + +## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03) + +### Changed +- Moderating the logging impact (since 2.0.8) for specific environments (PR #147) + +### Fixed +- Wrong logging level applied when setting kwarg `explain` to True (PR #146) + +## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24) +### Changed +- Improvement over Vietnamese detection (PR #126) +- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124) +- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122) +- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129) +- Code style as refactored by Sourcery-AI (PR #131) +- Minor adjustment on the MD around european words (PR #133) +- Remove and replace SRTs from assets / tests (PR #139) +- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135) +- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135) + +### Fixed +- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137) +- Avoid using too insignificant chunk (PR #137) + +### Added +- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135) +- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141) + +## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11) +### Added +- Add support for Kazakh (Cyrillic) language detection (PR #109) + +### Changed +- Further, improve inferring the language from a given single-byte code page (PR #112) +- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116) +- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113) +- Various detection improvement (MD+CD) (PR #117) + +### Removed +- Remove redundant logging entry about detected language(s) (PR #115) + +### Fixed +- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102) + +## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18) +### Fixed +- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100) +- Fix CLI crash when using --minimal output in certain cases (PR #103) + +### Changed +- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101) + +## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14) +### Changed +- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81) +- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82) +- The Unicode detection is slightly improved (PR #93) +- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91) + +### Removed +- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92) + +### Fixed +- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95) +- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96) +- The MANIFEST.in was not exhaustive (PR #78) + +## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30) +### Fixed +- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70) +- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68) +- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72) +- Submatch factoring could be wrong in rare edge cases (PR #72) +- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72) +- Fix line endings from CRLF to LF for certain project files (PR #67) + +### Changed +- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76) +- Allow fallback on specified encoding if any (PR #71) + +## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16) +### Changed +- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63) +- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64) + +## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15) +### Fixed +- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59) + +### Changed +- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57) + +## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13) +### Fixed +- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55) +- Using explain=False permanently disable the verbose output in the current runtime (PR #47) +- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47) +- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52) + +### Changed +- Public function normalize default args values were not aligned with from_bytes (PR #53) + +### Added +- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47) + +## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02) +### Changed +- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet. +- Accent has been made on UTF-8 detection, should perform rather instantaneous. +- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible. +- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time) +- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+ +- utf_7 detection has been reinstated. + +### Removed +- This package no longer require anything when used with Python 3.5 (Dropped cached_property) +- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian. +- The exception hook on UnicodeDecodeError has been removed. + +### Deprecated +- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0 + +### Fixed +- The CLI output used the relative path of the file(s). Should be absolute. + +## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28) +### Fixed +- Logger configuration/usage no longer conflict with others (PR #44) + +## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21) +### Removed +- Using standard logging instead of using the package loguru. +- Dropping nose test framework in favor of the maintained pytest. +- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text. +- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version. +- Stop support for UTF-7 that does not contain a SIG. +- Dropping PrettyTable, replaced with pure JSON output in CLI. + +### Fixed +- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process. +- Not searching properly for the BOM when trying utf32/16 parent codec. + +### Changed +- Improving the package final size by compressing frequencies.json. +- Huge improvement over the larges payload. + +### Added +- CLI now produces JSON consumable output. +- Return ASCII if given sequences fit. Given reasonable confidence. + +## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13) + +### Fixed +- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40) + +## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12) + +### Fixed +- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39) + +## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12) + +### Fixed +- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38) + +## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09) + +### Changed +- Amend the previous release to allow prettytable 2.0 (PR #35) + +## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08) + +### Fixed +- Fix error while using the package with a python pre-release interpreter (PR #33) + +### Changed +- Dependencies refactoring, constraints revised. + +### Added +- Add python 3.9 and 3.10 to the supported interpreters + +MIT License + +Copyright (c) 2025 TAHRI Ahmed R. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/RECORD b/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..7d5e1f98d4769e8cbe229d661f9cf1751ba41e5e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/RECORD @@ -0,0 +1,35 @@ +../../../bin/normalizer,sha256=-2UuJExDmDgELJ7wgnZHFlBKMnRMp3zJeBPD00Gr4gk,274 +charset_normalizer-3.4.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +charset_normalizer-3.4.1.dist-info/LICENSE,sha256=bQ1Bv-FwrGx9wkjJpj4lTQ-0WmDVCoJX0K-SxuJJuIc,1071 +charset_normalizer-3.4.1.dist-info/METADATA,sha256=JbyHzhmqZh_ugEn1Y7TY7CDYZA9FoU6BP25hrCNDf50,35313 +charset_normalizer-3.4.1.dist-info/RECORD,, +charset_normalizer-3.4.1.dist-info/WHEEL,sha256=tRzqFuK6eFjpbf2xTNvU7E3xL2y00S_NWJvyqxej3BA,151 +charset_normalizer-3.4.1.dist-info/entry_points.txt,sha256=8C-Y3iXIfyXQ83Tpir2B8t-XLJYpxF5xbb38d_js-h4,65 +charset_normalizer-3.4.1.dist-info/top_level.txt,sha256=7ASyzePr8_xuZWJsnqJjIBtyV8vhEo0wBCv1MPRRi3Q,19 +charset_normalizer/__init__.py,sha256=OKRxRv2Zhnqk00tqkN0c1BtJjm165fWXLydE52IKuHc,1590 +charset_normalizer/__main__.py,sha256=yzYxMR-IhKRHYwcSlavEv8oGdwxsR89mr2X09qXGdps,109 +charset_normalizer/__pycache__/__init__.cpython-312.pyc,, +charset_normalizer/__pycache__/__main__.cpython-312.pyc,, +charset_normalizer/__pycache__/api.cpython-312.pyc,, +charset_normalizer/__pycache__/cd.cpython-312.pyc,, +charset_normalizer/__pycache__/constant.cpython-312.pyc,, +charset_normalizer/__pycache__/legacy.cpython-312.pyc,, +charset_normalizer/__pycache__/md.cpython-312.pyc,, +charset_normalizer/__pycache__/models.cpython-312.pyc,, +charset_normalizer/__pycache__/utils.cpython-312.pyc,, +charset_normalizer/__pycache__/version.cpython-312.pyc,, +charset_normalizer/api.py,sha256=qBRz8mJ_R5E713R6TOyqHEdnmyxbEDnCSHvx32ubDGg,22617 +charset_normalizer/cd.py,sha256=WKTo1HDb-H9HfCDc3Bfwq5jzS25Ziy9SE2a74SgTq88,12522 +charset_normalizer/cli/__init__.py,sha256=D8I86lFk2-py45JvqxniTirSj_sFyE6sjaY_0-G1shc,136 +charset_normalizer/cli/__main__.py,sha256=VGC9klOoi6_R2z8rmyrc936kv7u2A1udjjHtlmNPDTM,10410 +charset_normalizer/cli/__pycache__/__init__.cpython-312.pyc,, +charset_normalizer/cli/__pycache__/__main__.cpython-312.pyc,, +charset_normalizer/constant.py,sha256=4VuTcZNLew1j_8ixA-Rt_VVqNWD4pwgHOHMCMlr0964,40477 +charset_normalizer/legacy.py,sha256=yhNXsPHkBfqPXKRb-sPXNj3Bscp9-mFGcYOkJ62tg9c,2328 +charset_normalizer/md.cpython-312-x86_64-linux-gnu.so,sha256=W654QTU3QZI6eWJ0fanScAr0_O6sL0I61fyRSdC-39Y,16064 +charset_normalizer/md.py,sha256=iyXXQGWl54nnLQLueMWTmUtlivO0-rTBgVkmJxIIAGU,20036 +charset_normalizer/md__mypyc.cpython-312-x86_64-linux-gnu.so,sha256=02IBduHhrAfIJteTWMlJulQK2gKMGP64dy8bVubEw3M,280904 +charset_normalizer/models.py,sha256=lKXhOnIPtiakbK3i__J9wpOfzx3JDTKj7Dn3Rg0VaRI,12394 +charset_normalizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +charset_normalizer/utils.py,sha256=T5UHo8AS7NVMmgruWoZyqEf0WrZVcQpgUNetRoborSk,12002 +charset_normalizer/version.py,sha256=Ambcj3O8FfvdLfDLc8dkaxZx97O1IM_R4_aKGD_TDdE,115 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/WHEEL b/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..1c04f8bcc72d3a97047d6dd13ae83a509916ce8b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: setuptools (75.6.0) +Root-Is-Purelib: false +Tag: cp312-cp312-manylinux_2_17_x86_64 +Tag: cp312-cp312-manylinux2014_x86_64 + diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/entry_points.txt b/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/entry_points.txt new file mode 100644 index 0000000000000000000000000000000000000000..ec920125215f6bc6aeb0e7072fc0fd8abc0aba1b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +normalizer = charset_normalizer:cli.cli_detect diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/top_level.txt b/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..66958f0a069d7aea7939bed40b9197608e93b243 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/charset_normalizer-3.4.1.dist-info/top_level.txt @@ -0,0 +1 @@ +charset_normalizer diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/cuda_bindings-12.9.4.dist-info/INSTALLER b/URSA/.venv_ursa/lib/python3.12/site-packages/cuda_bindings-12.9.4.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/cuda_bindings-12.9.4.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/cuda_bindings-12.9.4.dist-info/METADATA b/URSA/.venv_ursa/lib/python3.12/site-packages/cuda_bindings-12.9.4.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..1d84b668beada9912dd28d777b9b053c93f390ad --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/cuda_bindings-12.9.4.dist-info/METADATA @@ -0,0 +1,50 @@ +Metadata-Version: 2.4 +Name: cuda-bindings +Version: 12.9.4 +Summary: Python bindings for CUDA +Author-email: NVIDIA Corporation +License-Expression: LicenseRef-NVIDIA-SOFTWARE-LICENSE +Project-URL: Repository, https://github.com/NVIDIA/cuda-python +Project-URL: Documentation, https://nvidia.github.io/cuda-python/ +Classifier: Intended Audience :: Developers +Classifier: Topic :: Database +Classifier: Topic :: Scientific/Engineering +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Environment :: GPU :: NVIDIA CUDA +Description-Content-Type: text/x-rst +License-File: LICENSE +Requires-Dist: cuda-pathfinder~=1.1 +Provides-Extra: all +Requires-Dist: nvidia-cuda-nvcc-cu12; extra == "all" +Requires-Dist: nvidia-cuda-nvrtc-cu12; extra == "all" +Requires-Dist: nvidia-nvjitlink-cu12>=12.3; extra == "all" +Requires-Dist: nvidia-cufile-cu12; sys_platform == "linux" and extra == "all" +Provides-Extra: test +Requires-Dist: cython<3.2,>=3.1; extra == "test" +Requires-Dist: setuptools>=77.0.0; extra == "test" +Requires-Dist: numpy>=1.21.1; extra == "test" +Requires-Dist: pytest>=6.2.4; extra == "test" +Requires-Dist: pytest-benchmark>=3.4.1; extra == "test" +Requires-Dist: pyglet>=2.1.9; extra == "test" +Dynamic: license-file + +.. SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +.. SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +**************************************** +cuda-bindings: Low-level CUDA interfaces +**************************************** + +`cuda.bindings `_ is a standard set of low-level interfaces, providing full coverage of and 1:1 access to the CUDA host APIs from Python. Checkout the `Overview `_ for the workflow and performance results. + +* `Repository `_ +* `Documentation `_ +* `Examples `_ +* `Issue tracker `_ + +For the installation instruction, please refer to the `Installation `_ page. diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/cuda_bindings-12.9.4.dist-info/RECORD b/URSA/.venv_ursa/lib/python3.12/site-packages/cuda_bindings-12.9.4.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..96c59437fcf3689c3f9b9552fa6bbd9da8fb521e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/cuda_bindings-12.9.4.dist-info/RECORD @@ -0,0 +1,145 @@ +__pycache__/_cuda_bindings_redirector.cpython-312.pyc,, +_cuda_bindings_redirector.pth,sha256=KNhlB-eR2oNc-LndIf6YtXm3ezyifjrSe65iObyU92k,195 +_cuda_bindings_redirector.py,sha256=X41i3ukpIUeaqmJKsUBRkJ5bhYvmKA-HmKLShkw-vMQ,873 +cuda/bindings/__init__.pxd,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +cuda/bindings/__init__.py,sha256=MwaiGzsnjQRwjtwFMGS36SnuaElYmFDh_VOT5vAvzhI,246 +cuda/bindings/__pycache__/__init__.cpython-312.pyc,, +cuda/bindings/__pycache__/_version.cpython-312.pyc,, +cuda/bindings/_bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +cuda/bindings/_bindings/__pycache__/__init__.cpython-312.pyc,, +cuda/bindings/_bindings/cydriver.cpp,sha256=3KrAiXZLAk4QExJnwCi0RqRFc9e5MmpyA0pXYP4lvOU,5336682 +cuda/bindings/_bindings/cydriver.cpython-312-x86_64-linux-gnu.so,sha256=9BVsi4L1VJkXJJ4wCw33L-8WJucTvlw01Ikbm_RctZE,598704 +cuda/bindings/_bindings/cydriver.pxd,sha256=U-67ijzur-1tNB_ND9GhOMUr8LYQ90lHo3Pp-WmfSX0,66096 +cuda/bindings/_bindings/cydriver.pyx,sha256=CW_MRbU7X84Nu_tihEZ5htcGJgAaHjS2pl5TsuvNgu8,477099 +cuda/bindings/_bindings/cynvrtc.cpp,sha256=WsjRF9zVYxUNKlURTeQ8sftUpWIKs0TB4o3bvWwtp44,592617 +cuda/bindings/_bindings/cynvrtc.cpython-312-x86_64-linux-gnu.so,sha256=NMM2xFEaeshO4J8dc1cklOI45ApFu6w5t6b5ekKd0Zs,94832 +cuda/bindings/_bindings/cynvrtc.pxd,sha256=xdfpIbU5dZjByiUYSoBVvKd3epaI4b2iH8aB1NqTNus,3262 +cuda/bindings/_bindings/cynvrtc.pyx,sha256=u1vo7xbj5XBP3uVax-jDc3Zoip81EHFJ1hoXUNO_gsQ,19226 +cuda/bindings/_bindings/cyruntime.cpp,sha256=AamVerI0ntAzvBqYKU8TTjatZV7HuGW-dlTMFV8fk5A,2488681 +cuda/bindings/_bindings/cyruntime.cpython-312-x86_64-linux-gnu.so,sha256=WK5_Q8BbIK-Ys3j4HJwsKHt59gSdg5w2lpCKC5LN_RI,916112 +cuda/bindings/_bindings/cyruntime.pxd,sha256=Dlc1C_tTcX_X6klbo8gBJk3e1WhNn1jublCUp4ZxpN8,46820 +cuda/bindings/_bindings/cyruntime.pyx,sha256=XIl_sg9jPe6dZ6QtI85-HIlSeICuUOu70LkMbOqdp7g,104497 +cuda/bindings/_bindings/cyruntime_ptds.cpp,sha256=U9dF0AjV3hpYODcjs3TYIkTU6OFFg9f7ncMkSlqwJPs,749777 +cuda/bindings/_bindings/cyruntime_ptds.cpython-312-x86_64-linux-gnu.so,sha256=nPkzxbeALDKVaY-S-CoMlzt70Tw41QesBLUa0xblBBs,772624 +cuda/bindings/_bindings/cyruntime_ptds.pxd,sha256=dkObccpRHhnjr5UZvHbaiwtVZm53rEDa48xoKTBxcfs,46862 +cuda/bindings/_bindings/cyruntime_ptds.pyx,sha256=BcBTcaRHEW1Li_UiZoa5ZMtwcDcr342tDe8zmUFCiWM,65848 +cuda/bindings/_bindings/loader.cpp,sha256=SbRXr0SecKlJ0EwM_ZIuyY5E5P9HB8xswlVWIZvxbII,10878 +cuda/bindings/_bindings/loader.h,sha256=nv8OzG_tSP4jszgK5o4-ELSW6K1iZEwaEekIECjP1dA,545 +cuda/bindings/_bindings/loader.pxd,sha256=vBMdQU-VrGguCEW58Y8V2HRzaUh5DDC0NkNWLJ_silI,252 +cuda/bindings/_internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +cuda/bindings/_internal/__pycache__/__init__.cpython-312.pyc,, +cuda/bindings/_internal/cufile.cpp,sha256=MWboE-4TqKxCzioiRXFRiPrG01fmGQBdB2MHBxXFFnA,756974 +cuda/bindings/_internal/cufile.cpython-312-x86_64-linux-gnu.so,sha256=u3Ckb4fkOuy735BWldHpmY6oAgElt4-2JejYWWQtU_c,107120 +cuda/bindings/_internal/cufile.pxd,sha256=5wlp4PQbuxiC7e_16UpdG_cKna_dwAklTxxu9-Zgx2s,4513 +cuda/bindings/_internal/cufile.pyx,sha256=ibxisCEGx2_djrUCziJ3Qud2y9UrF4utNJV6FwXhuw8,30854 +cuda/bindings/_internal/cufile_linux.pyx,sha256=ibxisCEGx2_djrUCziJ3Qud2y9UrF4utNJV6FwXhuw8,30854 +cuda/bindings/_internal/nvjitlink.cpp,sha256=CnBhyy4LeHf4B5mjp4cIKh5Zih0D2xasowcoCUkHpjI,530841 +cuda/bindings/_internal/nvjitlink.cpython-312-x86_64-linux-gnu.so,sha256=ZfDsIBD-Bzx2FeQLgZKXpezZYTVG_wbHlVEfKeR7QSk,82544 +cuda/bindings/_internal/nvjitlink.pxd,sha256=r9pGNwOhalRmNvg-Bem7EIJ0Uirgm8jL4LeB5OI5Qv4,2463 +cuda/bindings/_internal/nvjitlink.pyx,sha256=UPKxtW-h6BxX-enAp1Cwn8XpNHScUTLOE_toylcv1Zw,15326 +cuda/bindings/_internal/nvjitlink_linux.pyx,sha256=UPKxtW-h6BxX-enAp1Cwn8XpNHScUTLOE_toylcv1Zw,15326 +cuda/bindings/_internal/nvjitlink_windows.pyx,sha256=SMWwON3_2KYYxcIRwbMRpz1F43wEJO7PUI-07dyhoYw,13972 +cuda/bindings/_internal/nvvm.cpp,sha256=TmkHB9bIyDWvexm1FjdSVRrX32-vgSGv6Ow0ELWxUSI,538397 +cuda/bindings/_internal/nvvm.cpython-312-x86_64-linux-gnu.so,sha256=fQNUUQz3u_VURIFTaJ5V56TMOxvMsMMvz2hIbrEFAOw,82544 +cuda/bindings/_internal/nvvm.pxd,sha256=mBYfe7Ku2ZIRc8EURO3VgUeRVzQDyWm_abyoYuOZKKU,2087 +cuda/bindings/_internal/nvvm.pyx,sha256=U0tuiNy1i5p417vhKHCAfV_cl1307I5a2ryecHejssc,14431 +cuda/bindings/_internal/nvvm_linux.pyx,sha256=U0tuiNy1i5p417vhKHCAfV_cl1307I5a2ryecHejssc,14431 +cuda/bindings/_internal/nvvm_windows.pyx,sha256=hZJFwQRg16bQb1djS95DM14iHbVnwLwlbdVtac0M8ZQ,12619 +cuda/bindings/_internal/utils.cpp,sha256=cdNQMAoo1cK-aVXaOW7uXR68M9RFF9CewaiEi713N38,513913 +cuda/bindings/_internal/utils.cpython-312-x86_64-linux-gnu.so,sha256=FQ6jKmn7NWXw02URkOq64F-t7cSSFXkVGQcL0uHCN9I,122232 +cuda/bindings/_internal/utils.pxd,sha256=IosEOz6zYBBR1Qfnobz-UnzDv0fl-mBtG5Px8tZXVkA,4677 +cuda/bindings/_internal/utils.pyx,sha256=Xb_9y2iK41Htcjgxr7Sex1rUX8CvHUzj706-ICVfFyE,4974 +cuda/bindings/_lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +cuda/bindings/_lib/__pycache__/__init__.cpython-312.pyc,, +cuda/bindings/_lib/cyruntime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +cuda/bindings/_lib/cyruntime/__pycache__/__init__.cpython-312.pyc,, +cuda/bindings/_lib/cyruntime/cyruntime.pxd,sha256=icR65C-YpWJ59uyCRMRg_wGbqNGt6qPQLh2ksFkuTis,4785 +cuda/bindings/_lib/cyruntime/cyruntime.pxi,sha256=xQGllvLx76JE8WEeKS3LjOungvwj335nP6QNmKulr8c,88191 +cuda/bindings/_lib/dlfcn.pxd,sha256=Hey780RLtX6IQwca7kzsBnoMJ6CsizjfaKtcY240GrE,406 +cuda/bindings/_lib/param_packer.h,sha256=ITzzEtJ4ykPY1Py7_SPlf8u1gY9tUlAelvQ6Z3b0vkc,6031 +cuda/bindings/_lib/param_packer.pxd,sha256=9dAhnMLzwYbdDhdiTW6EFXRPfIE5wvfnKwoZTVIhjgs,374 +cuda/bindings/_lib/utils.pxd,sha256=lETNHYHejCh7G9AGyYotT6ChLQlpC6RkrwCds5F2KWw,3401 +cuda/bindings/_lib/utils.pxi,sha256=4wRMcW34p2ozrcPljQJPRvFQCvNWxntWXnJYUw-C7xE,31228 +cuda/bindings/_lib/windll.pxd,sha256=oOzOOJ_hpQNVnqareRx3h_ctJI_zpebBDfaRhxtpSk8,1445 +cuda/bindings/_version.py,sha256=rvnUrtrL194zOPG_wAMMBT-eE5FjDOzy_wKDFBywpPo,190 +cuda/bindings/cufile.cpp,sha256=OB9ceJd4BMKi8aRmz7EQxFSbhHIJ84_EJhpYNewBZXg,2084518 +cuda/bindings/cufile.cpython-312-x86_64-linux-gnu.so,sha256=UkfwKC_LCChGdhaMm3dCIkNvX4hUbUB3f7F0-ucD7R8,419792 +cuda/bindings/cufile.pxd,sha256=VPQ-j1nRfpBndADUxte20syCk_c1Xtz1ecmF_ojzXk8,3434 +cuda/bindings/cufile.pyx,sha256=ziQ1g2TVgRIs8CzY2nl8dQUaT73JZ_XHUa0kBzSx5DI,43453 +cuda/bindings/cycufile.cpp,sha256=u1QAq_xTAbAzbk5Lj_R2uKBQo9p5KzQ5K5Naxp_hTRs,305313 +cuda/bindings/cycufile.cpython-312-x86_64-linux-gnu.so,sha256=GNu7n5O-Smuqo6D4RtRURqHHq_lebtixBwK8THwJgQE,31584 +cuda/bindings/cycufile.pxd,sha256=BviSPZsQpMhZNLj2ZyfcDNcpTxCkHPHfcDR4-ed9bSY,11763 +cuda/bindings/cycufile.pyx,sha256=f01Z8igqJUMJ2hdP_ihiiotdfOFyf_DaCJbTlxWbJwo,6589 +cuda/bindings/cydriver.cpp,sha256=X0ys0YNcfo3xEi2WZ6CGLw9mYfk_NqHNpUqci9veAOw,1532311 +cuda/bindings/cydriver.cpython-312-x86_64-linux-gnu.so,sha256=3wByXaiXbva4I5oj-W9gXcKQO05W6w68d3DUKR10d04,187232 +cuda/bindings/cydriver.pxd,sha256=Evh8Sw1gEEVikeMjwE0viM1KYm9poUr3ny1IbBZCiZQ,158239 +cuda/bindings/cydriver.pyx,sha256=1pfRJ-x00lANlQD4axWq6UXCCdtheQoe7CkmJGch230,99699 +cuda/bindings/cynvjitlink.cpp,sha256=qN3u9yPXa-MBSLjz8ktNeArP1pH66llWdgMCRbrbsHo,227933 +cuda/bindings/cynvjitlink.cpython-312-x86_64-linux-gnu.so,sha256=j2CxLeTQVOM8bpYOGSrSxks_R5QK0GKLiDgJKeL00J4,27488 +cuda/bindings/cynvjitlink.pxd,sha256=_m51JP6NEHgSlbTYYFVM1Px-hVRyg_CegWoYsgqp6uM,4644 +cuda/bindings/cynvjitlink.pyx,sha256=YhLbTMaUSsX8X6MnDhrV3ldIJPLMPzsIy8sWjuIRYJ4,3357 +cuda/bindings/cynvrtc.cpp,sha256=wV_US2TH4tBh8fcsH_aDwzuJjrrYX-zEIMmmw8QLt5s,254856 +cuda/bindings/cynvrtc.cpython-312-x86_64-linux-gnu.so,sha256=oAQuelwCR-I2AxxW_B0a8vdWvxuBLo_bdgec55CXJ9I,31584 +cuda/bindings/cynvrtc.pxd,sha256=I4s3yv7A0_XUvCmELEn0UXvH3U7aAiD7kA72djnE2sg,4139 +cuda/bindings/cynvrtc.pyx,sha256=FsERKZ5aeDW5xtJpHcFc1BiZsR7apr54uT7wHvB1T-k,4732 +cuda/bindings/cynvvm.cpp,sha256=prY9wvdpKPhXZa1piGlkM2XMtqepHxkw0etC_fV3iW8,221819 +cuda/bindings/cynvvm.cpython-312-x86_64-linux-gnu.so,sha256=hbbekcu8CPf0Vv1s1ot3fTFQb3v8esTfETDORew9p8I,23392 +cuda/bindings/cynvvm.pxd,sha256=wSWJO9TW84qnxm7ly-1PbpGTSwI-ZNe6U1SVFQqaLn4,3087 +cuda/bindings/cynvvm.pyx,sha256=XWbq16DfE6xJa954qiwuTj11DKJlroYS1wHvCrtZG18,2864 +cuda/bindings/cyruntime.cpp,sha256=IqsG6VLRSCnElPwGNt2c_9uAhn6GIV9pJwHt89ycVsY,1245949 +cuda/bindings/cyruntime.cpython-312-x86_64-linux-gnu.so,sha256=UPZnyR1q-_4GKnxa6N2JR_YkS-jWHklzsfCeTg-q9pI,146432 +cuda/bindings/cyruntime.pxd,sha256=n2iZWpQyGFWtMfE7N2tMTq6ysKL4UaRYoa-tlZYtrag,60423 +cuda/bindings/cyruntime.pyx,sha256=hDqUiDEwsfpZVmQ4hn2f1M9-QOId26UKIqgXxAXdWs4,75058 +cuda/bindings/cyruntime_functions.pxi,sha256=lQEdUsLiVmk4bxcenqnVxZZS1j1gds6oJX7Dw_evzW0,35448 +cuda/bindings/cyruntime_types.pxi,sha256=5iRgbkt6Twc0gbbInSMMEw7_X5KYFGZGJvcwKYyOa7A,51509 +cuda/bindings/driver.cpp,sha256=Dlvp22_bNPykRQLQrvATxLpXzLCo57GzEwOtZsuQgaw,37317473 +cuda/bindings/driver.cpython-312-x86_64-linux-gnu.so,sha256=RudCqwzHrHpQbeQdjG167Mksr_MwMulrxJyBsgTVr9o,7889944 +cuda/bindings/driver.pxd,sha256=F08_ME3UhVVGqKgDtbIp8jzgJZCyrCQHcLLfeQVQ0hU,198314 +cuda/bindings/driver.pyx,sha256=Sd4WFR-ZautasvRwm56KETFDVkpkvQBimlcgmva5DfY,2078692 +cuda/bindings/nvjitlink.cpp,sha256=Uq32HyNO8Oq3jpxuNCFXWPBKvUmhVGH1hNASyjcdC2c,602964 +cuda/bindings/nvjitlink.cpython-312-x86_64-linux-gnu.so,sha256=VkclrscHmQZlhA0a2t8tWuoYTvj8LFiIvMyNZYjbbqg,111920 +cuda/bindings/nvjitlink.pxd,sha256=neeTYLL2zuTrOYPITKb1FTMcwSZeyf2hILckXgQlBy8,1631 +cuda/bindings/nvjitlink.pyx,sha256=KkvUtPWtAIpl_cZ3D0WwenePF_ZhDDnMtztpX1t9bgQ,10030 +cuda/bindings/nvrtc.cpp,sha256=oFu5q-2oE9Y7Mn8YMJDGHi0MCehzIZvJOQ4L7KZ1BjI,2794280 +cuda/bindings/nvrtc.cpython-312-x86_64-linux-gnu.so,sha256=2QwvDs1vODvw1zefF2VdP45ld4SuU2QCc1tgcc1lFk8,436952 +cuda/bindings/nvrtc.pxd,sha256=KdQ-OO-azJAstRWyZZA4_UMJHLQep6gUG_QCgIOQ3wg,759 +cuda/bindings/nvrtc.pyx,sha256=SowESyJ-WLm63xdwvOBDNiJMUvbGVj0OIMpy9h4bI8Q,35757 +cuda/bindings/nvvm.cpp,sha256=itqhIi8x0Q0yzwsXZkZPAVdHi_6kT2jfeBkskXTZShU,545426 +cuda/bindings/nvvm.cpython-312-x86_64-linux-gnu.so,sha256=lL-uR1E73VrGGeOEJuWeJswPBWB6QZ2CnvHoZ8vWnuQ,103696 +cuda/bindings/nvvm.pxd,sha256=cmQzv-UpKehfXemlk1blXuYMTjD3pK0Iijb5BY39Xdo,1517 +cuda/bindings/nvvm.pyx,sha256=Pjg5vEuvTlTYV7PVXw-5Wj7BwufggKRHNHrypXKq6ao,9185 +cuda/bindings/runtime.cpp,sha256=NAN9Jhb4zbnstVjGlOcQpci34CGRY5Bserj3rw4iNVw,27202710 +cuda/bindings/runtime.cpython-312-x86_64-linux-gnu.so,sha256=9BRn7mqgiopuQ969pT6H10C4ZJSa4mOjX8txS7mpPOc,5525848 +cuda/bindings/runtime.pxd,sha256=jx4qPfbd16uDx1c6R6VIKrLuhUse9tg-IjDSsfbzbIk,97943 +cuda/bindings/runtime.pyx,sha256=DCwWAwkhinkEJ4Zpu78VuGH0YZb2dDHUOenY_q-IkiU,1424771 +cuda/bindings/utils/__init__.py,sha256=AcONSPwlZfQ8W54dBR2lQ0sy975Km3Nn0-E4KXIxvRE,879 +cuda/bindings/utils/__pycache__/__init__.cpython-312.pyc,, +cuda/bindings/utils/__pycache__/_ptx_utils.cpython-312.pyc,, +cuda/bindings/utils/_ptx_utils.py,sha256=JvxiAoZLla1DvJvw3twYQ8ofSqrCE-fQAsMAfr_vxP4,3165 +cuda/ccuda.cpp,sha256=CC0Idd0Q1lSEcxmaLsy_zeKCiCtOBAv_rLGJbKbGG6A,238723 +cuda/ccuda.cpython-312-x86_64-linux-gnu.so,sha256=Yr322bx1gkbvuo2qBfiGViHcEFmt4ptsiBqkbYTLvB8,23392 +cuda/ccuda.pxd,sha256=-rRZuvxjFwtTHqBd116rqnBcwT5OsXxEGXJ6K92XNCk,646 +cuda/ccuda.pyx,sha256=TSFMKwjKEEHq-_y5KoPF9gCTLQvy8WPkDztvXukNidM,290 +cuda/ccudart.cpp,sha256=PnMrP_jzJXLuY1MewxP6eOndKpj1i2AARj97-qoptHk,243939 +cuda/ccudart.cpython-312-x86_64-linux-gnu.so,sha256=0667Nk5ImEck50vQYGzvZu6QhxNAEf6seeePNlU6lNY,23392 +cuda/ccudart.pxd,sha256=1q5ib_kgDoDOBrUFArxEHOr7NYvGQCDFol_-CW0iUsk,653 +cuda/ccudart.pyx,sha256=gP-IiOGV4yJDVKUQv8lkHPfZ335hm01iDnhZSvUlI74,294 +cuda/cnvrtc.cpp,sha256=NdZLw6YvIa_naKZ8g1UFFtdpE9pMPiL5KXAo_bgiVFs,197889 +cuda/cnvrtc.cpython-312-x86_64-linux-gnu.so,sha256=Sqzg5xBZSm-Xb5eJkCTk-PXKK0wl-a_ahNiioGna6Mc,23392 +cuda/cnvrtc.pxd,sha256=io3eEqzkRWTs19EL7Dsr6v9nBHB9GP_0Li0xbSphUUg,645 +cuda/cnvrtc.pyx,sha256=Rtx0GqywLHNki6ZxNhJ2pvBPXQW3n4VCJuVMwzplNDg,286 +cuda/cuda.cpp,sha256=APGhUtjXuBGJdWgxGmIZSm4srVEMU3lZ2v1AA8POfPI,207100 +cuda/cuda.cpython-312-x86_64-linux-gnu.so,sha256=LbhTgVNpp5a1quJ63ngg60kA-nRr5DgSyc4FaVqQ6Ik,23456 +cuda/cuda.pyx,sha256=koAGbl98xuqaFLzfSiMdf0PsZwIRMgSqBCD0VvO0KBo,873 +cuda/cudart.cpp,sha256=T72eHviTA5O0tjJgNyfDZDQjmsM5lXp7kOAMvCI6GDI,207194 +cuda/cudart.cpython-312-x86_64-linux-gnu.so,sha256=rx8PQUstfAA0Zou1PLGutrK7aid1ff--z9iS7fVOAoM,23456 +cuda/cudart.pyx,sha256=aO3CHl2FX5OokueAhJZoUzumAxHZbjSZcIyaf-unxWQ,883 +cuda/nvrtc.cpp,sha256=tm9yH1eiiWjqwc1V1nK16-ZWiHbtomix0MfkCGoUPfQ,207123 +cuda/nvrtc.cpython-312-x86_64-linux-gnu.so,sha256=T29GMPIjfwlF8phY8t2Kq-OjDG5wOw8RvdmxqkT4nA0,23456 +cuda/nvrtc.pyx,sha256=jswEwshPcySgcbFLkCiT5wXzA3462rfbBpaXK1Ty2cI,872 +cuda_bindings-12.9.4.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +cuda_bindings-12.9.4.dist-info/METADATA,sha256=6JZh9BTr07MltmSp9nEnTA4OoYieuLAsRB7g1TdSScQ,2578 +cuda_bindings-12.9.4.dist-info/RECORD,, +cuda_bindings-12.9.4.dist-info/WHEEL,sha256=LpQoElFRmdjMbJKp2FHd8t88QuEtEHUH_crLCk0WHuI,152 +cuda_bindings-12.9.4.dist-info/licenses/LICENSE,sha256=Jakdbt-2T7eAoBPcHEAMIkmqaZjIyLlN_6yV2Z5dvww,11096 +cuda_bindings-12.9.4.dist-info/top_level.txt,sha256=U5vpnvwNpaJF8bl4KnoUluMDTRt0J972FipwXjgNQ3A,5 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/cuda_bindings-12.9.4.dist-info/WHEEL b/URSA/.venv_ursa/lib/python3.12/site-packages/cuda_bindings-12.9.4.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..d9747de579da942662e3820798278b587cc841d1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/cuda_bindings-12.9.4.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: setuptools (80.9.0) +Root-Is-Purelib: false +Tag: cp312-cp312-manylinux_2_24_x86_64 +Tag: cp312-cp312-manylinux_2_28_x86_64 + diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/cuda_bindings-12.9.4.dist-info/top_level.txt b/URSA/.venv_ursa/lib/python3.12/site-packages/cuda_bindings-12.9.4.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..089f044f442ab95aabf9de9eb52b63efbac9f7b9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/cuda_bindings-12.9.4.dist-info/top_level.txt @@ -0,0 +1 @@ +cuda diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/INSTALLER b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/METADATA b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..5993c04b8554660b4359d168e98b47853771286c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/METADATA @@ -0,0 +1,374 @@ +Metadata-Version: 2.4 +Name: diffusers +Version: 0.37.0 +Summary: State-of-the-art diffusion in PyTorch and JAX. +Home-page: https://github.com/huggingface/diffusers +Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/diffusers/graphs/contributors) +Author-email: diffusers@huggingface.co +License: Apache 2.0 License +Keywords: deep learning diffusion jax pytorch stable diffusion audioldm +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Education +Classifier: Intended Audience :: Science/Research +Classifier: License :: OSI Approved :: Apache Software License +Classifier: Operating System :: OS Independent +Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Requires-Python: >=3.10.0 +Description-Content-Type: text/markdown +License-File: LICENSE +Requires-Dist: importlib_metadata +Requires-Dist: filelock +Requires-Dist: httpx<1.0.0 +Requires-Dist: huggingface-hub<2.0,>=0.34.0 +Requires-Dist: numpy +Requires-Dist: regex!=2019.12.17 +Requires-Dist: requests +Requires-Dist: safetensors>=0.3.1 +Requires-Dist: Pillow +Provides-Extra: quality +Requires-Dist: urllib3<=2.0.0; extra == "quality" +Requires-Dist: isort>=5.5.4; extra == "quality" +Requires-Dist: ruff==0.9.10; extra == "quality" +Requires-Dist: hf-doc-builder>=0.3.0; extra == "quality" +Provides-Extra: docs +Requires-Dist: hf-doc-builder>=0.3.0; extra == "docs" +Provides-Extra: training +Requires-Dist: accelerate>=0.31.0; extra == "training" +Requires-Dist: datasets; extra == "training" +Requires-Dist: protobuf<4,>=3.20.3; extra == "training" +Requires-Dist: tensorboard; extra == "training" +Requires-Dist: Jinja2; extra == "training" +Requires-Dist: peft>=0.17.0; extra == "training" +Requires-Dist: timm; extra == "training" +Provides-Extra: test +Requires-Dist: compel==0.1.8; extra == "test" +Requires-Dist: ftfy; extra == "test" +Requires-Dist: GitPython<3.1.19; extra == "test" +Requires-Dist: datasets; extra == "test" +Requires-Dist: Jinja2; extra == "test" +Requires-Dist: invisible-watermark>=0.2.0; extra == "test" +Requires-Dist: librosa; extra == "test" +Requires-Dist: parameterized; extra == "test" +Requires-Dist: protobuf<4,>=3.20.3; extra == "test" +Requires-Dist: pytest; extra == "test" +Requires-Dist: pytest-timeout; extra == "test" +Requires-Dist: pytest-xdist; extra == "test" +Requires-Dist: requests-mock==1.10.0; extra == "test" +Requires-Dist: safetensors>=0.3.1; extra == "test" +Requires-Dist: sentencepiece!=0.1.92,>=0.1.91; extra == "test" +Requires-Dist: scipy; extra == "test" +Requires-Dist: tiktoken>=0.7.0; extra == "test" +Requires-Dist: torchsde; extra == "test" +Requires-Dist: torchvision; extra == "test" +Requires-Dist: transformers>=4.41.2; extra == "test" +Requires-Dist: phonemizer; extra == "test" +Provides-Extra: torch +Requires-Dist: torch>=1.4; extra == "torch" +Requires-Dist: accelerate>=0.31.0; extra == "torch" +Provides-Extra: bitsandbytes +Requires-Dist: bitsandbytes>=0.43.3; extra == "bitsandbytes" +Requires-Dist: accelerate>=0.31.0; extra == "bitsandbytes" +Provides-Extra: gguf +Requires-Dist: gguf>=0.10.0; extra == "gguf" +Requires-Dist: accelerate>=0.31.0; extra == "gguf" +Provides-Extra: optimum-quanto +Requires-Dist: optimum_quanto>=0.2.6; extra == "optimum-quanto" +Requires-Dist: accelerate>=0.31.0; extra == "optimum-quanto" +Provides-Extra: torchao +Requires-Dist: torchao>=0.7.0; extra == "torchao" +Requires-Dist: accelerate>=0.31.0; extra == "torchao" +Provides-Extra: nvidia-modelopt +Requires-Dist: nvidia_modelopt[hf]>=0.33.1; extra == "nvidia-modelopt" +Provides-Extra: flax +Requires-Dist: jax>=0.4.1; extra == "flax" +Requires-Dist: jaxlib>=0.4.1; extra == "flax" +Requires-Dist: flax>=0.4.1; extra == "flax" +Provides-Extra: dev +Requires-Dist: urllib3<=2.0.0; extra == "dev" +Requires-Dist: isort>=5.5.4; extra == "dev" +Requires-Dist: ruff==0.9.10; extra == "dev" +Requires-Dist: hf-doc-builder>=0.3.0; extra == "dev" +Requires-Dist: compel==0.1.8; extra == "dev" +Requires-Dist: ftfy; extra == "dev" +Requires-Dist: GitPython<3.1.19; extra == "dev" +Requires-Dist: datasets; extra == "dev" +Requires-Dist: Jinja2; extra == "dev" +Requires-Dist: invisible-watermark>=0.2.0; extra == "dev" +Requires-Dist: librosa; extra == "dev" +Requires-Dist: parameterized; extra == "dev" +Requires-Dist: protobuf<4,>=3.20.3; extra == "dev" +Requires-Dist: pytest; extra == "dev" +Requires-Dist: pytest-timeout; extra == "dev" +Requires-Dist: pytest-xdist; extra == "dev" +Requires-Dist: requests-mock==1.10.0; extra == "dev" +Requires-Dist: safetensors>=0.3.1; extra == "dev" +Requires-Dist: sentencepiece!=0.1.92,>=0.1.91; extra == "dev" +Requires-Dist: scipy; extra == "dev" +Requires-Dist: tiktoken>=0.7.0; extra == "dev" +Requires-Dist: torchsde; extra == "dev" +Requires-Dist: torchvision; extra == "dev" +Requires-Dist: transformers>=4.41.2; extra == "dev" +Requires-Dist: phonemizer; extra == "dev" +Requires-Dist: accelerate>=0.31.0; extra == "dev" +Requires-Dist: datasets; extra == "dev" +Requires-Dist: protobuf<4,>=3.20.3; extra == "dev" +Requires-Dist: tensorboard; extra == "dev" +Requires-Dist: Jinja2; extra == "dev" +Requires-Dist: peft>=0.17.0; extra == "dev" +Requires-Dist: timm; extra == "dev" +Requires-Dist: hf-doc-builder>=0.3.0; extra == "dev" +Requires-Dist: torch>=1.4; extra == "dev" +Requires-Dist: accelerate>=0.31.0; extra == "dev" +Requires-Dist: jax>=0.4.1; extra == "dev" +Requires-Dist: jaxlib>=0.4.1; extra == "dev" +Requires-Dist: flax>=0.4.1; extra == "dev" +Dynamic: author +Dynamic: author-email +Dynamic: classifier +Dynamic: description +Dynamic: description-content-type +Dynamic: home-page +Dynamic: keywords +Dynamic: license +Dynamic: license-file +Dynamic: provides-extra +Dynamic: requires-dist +Dynamic: requires-python +Dynamic: summary + + + +

+
+ +
+

+

+ GitHub + GitHub release + GitHub release + Contributor Covenant + X account +

+ +🤗 Diffusers is the go-to library for state-of-the-art pretrained diffusion models for generating images, audio, and even 3D structures of molecules. Whether you're looking for a simple inference solution or training your own diffusion models, 🤗 Diffusers is a modular toolbox that supports both. Our library is designed with a focus on [usability over performance](https://huggingface.co/docs/diffusers/conceptual/philosophy#usability-over-performance), [simple over easy](https://huggingface.co/docs/diffusers/conceptual/philosophy#simple-over-easy), and [customizability over abstractions](https://huggingface.co/docs/diffusers/conceptual/philosophy#tweakable-contributorfriendly-over-abstraction). + +🤗 Diffusers offers three core components: + +- State-of-the-art [diffusion pipelines](https://huggingface.co/docs/diffusers/api/pipelines/overview) that can be run in inference with just a few lines of code. +- Interchangeable noise [schedulers](https://huggingface.co/docs/diffusers/api/schedulers/overview) for different diffusion speeds and output quality. +- Pretrained [models](https://huggingface.co/docs/diffusers/api/models/overview) that can be used as building blocks, and combined with schedulers, for creating your own end-to-end diffusion systems. + +## Installation + +We recommend installing 🤗 Diffusers in a virtual environment from PyPI or Conda. For more details about installing [PyTorch](https://pytorch.org/get-started/locally/), please refer to their official documentation. + +### PyTorch + +With `pip` (official package): + +```bash +pip install --upgrade diffusers[torch] +``` + +With `conda` (maintained by the community): + +```sh +conda install -c conda-forge diffusers +``` + +### Apple Silicon (M1/M2) support + +Please refer to the [How to use Stable Diffusion in Apple Silicon](https://huggingface.co/docs/diffusers/optimization/mps) guide. + +## Quickstart + +Generating outputs is super easy with 🤗 Diffusers. To generate an image from text, use the `from_pretrained` method to load any pretrained diffusion model (browse the [Hub](https://huggingface.co/models?library=diffusers&sort=downloads) for 30,000+ checkpoints): + +```python +from diffusers import DiffusionPipeline +import torch + +pipeline = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16) +pipeline.to("cuda") +pipeline("An image of a squirrel in Picasso style").images[0] +``` + +You can also dig into the models and schedulers toolbox to build your own diffusion system: + +```python +from diffusers import DDPMScheduler, UNet2DModel +from PIL import Image +import torch + +scheduler = DDPMScheduler.from_pretrained("google/ddpm-cat-256") +model = UNet2DModel.from_pretrained("google/ddpm-cat-256").to("cuda") +scheduler.set_timesteps(50) + +sample_size = model.config.sample_size +noise = torch.randn((1, 3, sample_size, sample_size), device="cuda") +input = noise + +for t in scheduler.timesteps: + with torch.no_grad(): + noisy_residual = model(input, t).sample + prev_noisy_sample = scheduler.step(noisy_residual, t, input).prev_sample + input = prev_noisy_sample + +image = (input / 2 + 0.5).clamp(0, 1) +image = image.cpu().permute(0, 2, 3, 1).numpy()[0] +image = Image.fromarray((image * 255).round().astype("uint8")) +image +``` + +Check out the [Quickstart](https://huggingface.co/docs/diffusers/quicktour) to launch your diffusion journey today! + +## How to navigate the documentation + +| **Documentation** | **What can I learn?** | +|---------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| [Tutorial](https://huggingface.co/docs/diffusers/tutorials/tutorial_overview) | A basic crash course for learning how to use the library's most important features like using models and schedulers to build your own diffusion system, and training your own diffusion model. | +| [Loading](https://huggingface.co/docs/diffusers/using-diffusers/loading) | Guides for how to load and configure all the components (pipelines, models, and schedulers) of the library, as well as how to use different schedulers. | +| [Pipelines for inference](https://huggingface.co/docs/diffusers/using-diffusers/overview_techniques) | Guides for how to use pipelines for different inference tasks, batched generation, controlling generated outputs and randomness, and how to contribute a pipeline to the library. | +| [Optimization](https://huggingface.co/docs/diffusers/optimization/fp16) | Guides for how to optimize your diffusion model to run faster and consume less memory. | +| [Training](https://huggingface.co/docs/diffusers/training/overview) | Guides for how to train a diffusion model for different tasks with different training techniques. | +## Contribution + +We ❤️ contributions from the open-source community! +If you want to contribute to this library, please check out our [Contribution guide](https://github.com/huggingface/diffusers/blob/main/CONTRIBUTING.md). +You can look out for [issues](https://github.com/huggingface/diffusers/issues) you'd like to tackle to contribute to the library. +- See [Good first issues](https://github.com/huggingface/diffusers/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22) for general opportunities to contribute +- See [New model/pipeline](https://github.com/huggingface/diffusers/issues?q=is%3Aopen+is%3Aissue+label%3A%22New+pipeline%2Fmodel%22) to contribute exciting new diffusion models / diffusion pipelines +- See [New scheduler](https://github.com/huggingface/diffusers/issues?q=is%3Aopen+is%3Aissue+label%3A%22New+scheduler%22) + +Also, say 👋 in our public Discord channel Join us on Discord. We discuss the hottest trends about diffusion models, help each other with contributions, personal projects or just hang out ☕. + + +## Popular Tasks & Pipelines + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TaskPipeline🤗 Hub
Unconditional Image Generation DDPM google/ddpm-ema-church-256
Text-to-ImageStable Diffusion Text-to-Image stable-diffusion-v1-5/stable-diffusion-v1-5
Text-to-ImageunCLIP kakaobrain/karlo-v1-alpha
Text-to-ImageDeepFloyd IF DeepFloyd/IF-I-XL-v1.0
Text-to-ImageKandinsky kandinsky-community/kandinsky-2-2-decoder
Text-guided Image-to-ImageControlNet lllyasviel/sd-controlnet-canny
Text-guided Image-to-ImageInstructPix2Pix timbrooks/instruct-pix2pix
Text-guided Image-to-ImageStable Diffusion Image-to-Image stable-diffusion-v1-5/stable-diffusion-v1-5
Text-guided Image InpaintingStable Diffusion Inpainting stable-diffusion-v1-5/stable-diffusion-inpainting
Image VariationStable Diffusion Image Variation lambdalabs/sd-image-variations-diffusers
Super ResolutionStable Diffusion Upscale stabilityai/stable-diffusion-x4-upscaler
Super ResolutionStable Diffusion Latent Upscale stabilityai/sd-x2-latent-upscaler
+ +## Popular libraries using 🧨 Diffusers + +- https://github.com/microsoft/TaskMatrix +- https://github.com/invoke-ai/InvokeAI +- https://github.com/InstantID/InstantID +- https://github.com/apple/ml-stable-diffusion +- https://github.com/Sanster/lama-cleaner +- https://github.com/IDEA-Research/Grounded-Segment-Anything +- https://github.com/ashawkey/stable-dreamfusion +- https://github.com/deep-floyd/IF +- https://github.com/bentoml/BentoML +- https://github.com/bmaltais/kohya_ss +- +14,000 other amazing GitHub repositories 💪 + +Thank you for using us ❤️. + +## Credits + +This library concretizes previous work by many different authors and would not have been possible without their great research and implementations. We'd like to thank, in particular, the following implementations which have helped us in our development and without which the API could not have been as polished today: + +- @CompVis' latent diffusion models library, available [here](https://github.com/CompVis/latent-diffusion) +- @hojonathanho original DDPM implementation, available [here](https://github.com/hojonathanho/diffusion) as well as the extremely useful translation into PyTorch by @pesser, available [here](https://github.com/pesser/pytorch_diffusion) +- @ermongroup's DDIM implementation, available [here](https://github.com/ermongroup/ddim) +- @yang-song's Score-VE and Score-VP implementations, available [here](https://github.com/yang-song/score_sde_pytorch) + +We also want to thank @heejkoo for the very helpful overview of papers, code and resources on diffusion models, available [here](https://github.com/heejkoo/Awesome-Diffusion-Models) as well as @crowsonkb and @rromb for useful discussions and insights. + +## Citation + +```bibtex +@misc{von-platen-etal-2022-diffusers, + author = {Patrick von Platen and Suraj Patil and Anton Lozhkov and Pedro Cuenca and Nathan Lambert and Kashif Rasul and Mishig Davaadorj and Dhruv Nair and Sayak Paul and William Berman and Yiyi Xu and Steven Liu and Thomas Wolf}, + title = {Diffusers: State-of-the-art diffusion models}, + year = {2022}, + publisher = {GitHub}, + journal = {GitHub repository}, + howpublished = {\url{https://github.com/huggingface/diffusers}} +} +``` diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/RECORD b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..0452e4c36598c2d8c93756ef03685d58065b1a5c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/RECORD @@ -0,0 +1,1703 @@ +../../../bin/diffusers-cli,sha256=JxWbm8RPB4w-Lddn7TgcguuA90MMf8cmHuJQIipAhro,279 +diffusers-0.37.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +diffusers-0.37.0.dist-info/METADATA,sha256=cuQ0IurI0o0_hrf_NamH0DR2jOVX-fK89ujPQHYPi_c,20378 +diffusers-0.37.0.dist-info/RECORD,, +diffusers-0.37.0.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +diffusers-0.37.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91 +diffusers-0.37.0.dist-info/entry_points.txt,sha256=_1bvshKV_6_b63_FAkcUs9W6tUKGeIoQ3SHEZsovEWs,72 +diffusers-0.37.0.dist-info/licenses/LICENSE,sha256=-eIHDCR1F7Hd9l97EbOTSEoY2pGpWP0YqXvQ8kHDElw,11357 +diffusers-0.37.0.dist-info/top_level.txt,sha256=axJl2884vMSvhzrFrSoht36QXA_6gZN9cKtg4xOO72o,10 +diffusers/__init__.py,sha256=sCfcWQ-XkIOL9gBptQEFCQTVAmFtpLVWYJjBsRhc3RI,58322 +diffusers/__pycache__/__init__.cpython-312.pyc,, +diffusers/__pycache__/callbacks.cpython-312.pyc,, +diffusers/__pycache__/configuration_utils.cpython-312.pyc,, +diffusers/__pycache__/dependency_versions_check.cpython-312.pyc,, +diffusers/__pycache__/dependency_versions_table.cpython-312.pyc,, +diffusers/__pycache__/image_processor.cpython-312.pyc,, +diffusers/__pycache__/optimization.cpython-312.pyc,, +diffusers/__pycache__/training_utils.cpython-312.pyc,, +diffusers/__pycache__/video_processor.cpython-312.pyc,, +diffusers/callbacks.py,sha256=TpZ0y-RUnaYDczxviSUccV5oYO8lzv3f2kr0-22_ziw,10270 +diffusers/commands/__init__.py,sha256=KQXlWjcjH6qmwD5B6be8kpniSOPbyWZPTghdXrNquXU,920 +diffusers/commands/__pycache__/__init__.cpython-312.pyc,, +diffusers/commands/__pycache__/custom_blocks.cpython-312.pyc,, +diffusers/commands/__pycache__/diffusers_cli.cpython-312.pyc,, +diffusers/commands/__pycache__/env.cpython-312.pyc,, +diffusers/commands/__pycache__/fp16_safetensors.cpython-312.pyc,, +diffusers/commands/custom_blocks.py,sha256=o_wIPXnCSbdUHipWP1ItEHhIViNd15RpoNU0kEen0C4,5169 +diffusers/commands/diffusers_cli.py,sha256=NPeI0w_qB50bKhsITxGENNdQA7bXp5mck7IXMSagxXw,1425 +diffusers/commands/env.py,sha256=IZ2e2id8ZjNQ-BNEG3qt58TyEsM6Qs337kL5KdZmqZ0,6224 +diffusers/commands/fp16_safetensors.py,sha256=flWe2XsmMZuT375rLZcI1Fc6lhHQlNfpfxUtDECa0z4,5419 +diffusers/configuration_utils.py,sha256=Snr5vkiRPt-nfz0yw3XJl_oVtTw_BO_Vw3GzbFxsGWA,36239 +diffusers/dependency_versions_check.py,sha256=PcT_deWuvIKrNkjkCnQKi0ZTWCl77tHC02lhttbqQHM,1271 +diffusers/dependency_versions_table.py,sha256=zfu6krZ4s8eE406FWuNq5jZYlEnv2zpPJJQJoYRCKck,1856 +diffusers/experimental/__init__.py,sha256=0C9ExG0XYiGZuzFJkZuJ53K6Ix5ylF2kWe4PGASchtY,38 +diffusers/experimental/__pycache__/__init__.cpython-312.pyc,, +diffusers/experimental/rl/__init__.py,sha256=Gcoznw9rYjfMvswH0seXekKYDAAN1YXXxZ-RWMdzvrE,57 +diffusers/experimental/rl/__pycache__/__init__.cpython-312.pyc,, +diffusers/experimental/rl/__pycache__/value_guided_sampling.cpython-312.pyc,, +diffusers/experimental/rl/value_guided_sampling.py,sha256=a1F7YC--u1OgoGC8WGgi2pDUhYBI6XYnF2-RUzPD3dI,6033 +diffusers/guiders/__init__.py,sha256=5lBpwnpMFGj0D2iQ9iuBjA07bPOH8lnPkGhzBqhlkAE,1484 +diffusers/guiders/__pycache__/__init__.cpython-312.pyc,, +diffusers/guiders/__pycache__/adaptive_projected_guidance.cpython-312.pyc,, +diffusers/guiders/__pycache__/adaptive_projected_guidance_mix.cpython-312.pyc,, +diffusers/guiders/__pycache__/auto_guidance.cpython-312.pyc,, +diffusers/guiders/__pycache__/classifier_free_guidance.cpython-312.pyc,, +diffusers/guiders/__pycache__/classifier_free_zero_star_guidance.cpython-312.pyc,, +diffusers/guiders/__pycache__/frequency_decoupled_guidance.cpython-312.pyc,, +diffusers/guiders/__pycache__/guider_utils.cpython-312.pyc,, +diffusers/guiders/__pycache__/magnitude_aware_guidance.cpython-312.pyc,, +diffusers/guiders/__pycache__/perturbed_attention_guidance.cpython-312.pyc,, +diffusers/guiders/__pycache__/skip_layer_guidance.cpython-312.pyc,, +diffusers/guiders/__pycache__/smoothed_energy_guidance.cpython-312.pyc,, +diffusers/guiders/__pycache__/tangential_classifier_free_guidance.cpython-312.pyc,, +diffusers/guiders/adaptive_projected_guidance.py,sha256=e-aGva9ZCd8JSwwnAPjIQUeUFbDF0Zt6wSmoAr19_qo,9755 +diffusers/guiders/adaptive_projected_guidance_mix.py,sha256=FwROXwa8NRVdfiXnC14HSKRNEnL5MSOH7_SD9diFvgw,12467 +diffusers/guiders/auto_guidance.py,sha256=oSbmrjOzpC2gnyJ_Zm1uXfFHHnW5hLnU8Tma98AkASk,9363 +diffusers/guiders/classifier_free_guidance.py,sha256=PzfEZGFXOs1XIgPZYZnG04JFp456WEM0smdH4X7pEH8,6348 +diffusers/guiders/classifier_free_zero_star_guidance.py,sha256=x-6aR1mV3lC7t1lXJbkHOCkzBg58LU2q2mkvV6kC4Ro,7097 +diffusers/guiders/frequency_decoupled_guidance.py,sha256=AB_UAKkqwjK1CmhJ37jq1m8iohp11hCBq1s-3dOZqLY,17353 +diffusers/guiders/guider_utils.py,sha256=7jff1fyIjlCU1CPCi9Zmcik8Jka_p3NzW0BKQQHJvqM,18045 +diffusers/guiders/magnitude_aware_guidance.py,sha256=j9llX8gqoMwUsiWW5mpNZOMpbJbRnWOXNwecntnjGy0,6413 +diffusers/guiders/perturbed_attention_guidance.py,sha256=5JKg-DgiLqJZzFDkVUscJvL-VhV1h22N0UBK4oG_NW4,14195 +diffusers/guiders/skip_layer_guidance.py,sha256=88kKP6vrWq--Xar9-0uMOMWXtxl1CmhrxenIldUktRA,13747 +diffusers/guiders/smoothed_energy_guidance.py,sha256=tHzxASjLP8tZmzuQDJ2OQFuc8WZgCuKtv0EK1zEYMl0,13280 +diffusers/guiders/tangential_classifier_free_guidance.py,sha256=046tDOT6bKcOq0-3GAVaj2IQS5ApMLzhoCavSjizTmA,6172 +diffusers/hooks/__init__.py,sha256=yJ1A_buTdZAzAfbRDQJ94JXISrkJ0mrmRsxemmG0t6Q,1463 +diffusers/hooks/__pycache__/__init__.cpython-312.pyc,, +diffusers/hooks/__pycache__/_common.cpython-312.pyc,, +diffusers/hooks/__pycache__/_helpers.cpython-312.pyc,, +diffusers/hooks/__pycache__/context_parallel.cpython-312.pyc,, +diffusers/hooks/__pycache__/faster_cache.cpython-312.pyc,, +diffusers/hooks/__pycache__/first_block_cache.cpython-312.pyc,, +diffusers/hooks/__pycache__/group_offloading.cpython-312.pyc,, +diffusers/hooks/__pycache__/hooks.cpython-312.pyc,, +diffusers/hooks/__pycache__/layer_skip.cpython-312.pyc,, +diffusers/hooks/__pycache__/layerwise_casting.cpython-312.pyc,, +diffusers/hooks/__pycache__/mag_cache.cpython-312.pyc,, +diffusers/hooks/__pycache__/pyramid_attention_broadcast.cpython-312.pyc,, +diffusers/hooks/__pycache__/smoothed_energy_guidance_utils.cpython-312.pyc,, +diffusers/hooks/__pycache__/taylorseer_cache.cpython-312.pyc,, +diffusers/hooks/__pycache__/utils.cpython-312.pyc,, +diffusers/hooks/_common.py,sha256=rvP68tVJElkamqpwH2_ZSMpHSU0cnMmeRFQ-OZgFJMA,2162 +diffusers/hooks/_helpers.py,sha256=lZ0tpl4dk4Rn8W2sESn7nQswz5mun_TXiXC7m06B-_0,14133 +diffusers/hooks/context_parallel.py,sha256=THt4RKq0neWGdk1Ynset2JD064sJCuGX_USqosZv_uw,16413 +diffusers/hooks/faster_cache.py,sha256=NRYs5F61z_dtLj_EL_Cm3iX42yE6tvEmY_h0CkeDfzw,35030 +diffusers/hooks/first_block_cache.py,sha256=mDmJGh4aJAI2Dt8SQd7Ozh39sAAm4HmL8zZOEs1SjjU,11270 +diffusers/hooks/group_offloading.py,sha256=m-qpoJOA6vwW9gnd-s5ZzRrif65QfDbYN6RpppKBsJ8,46810 +diffusers/hooks/hooks.py,sha256=KMltUcsRkJD_53VfTKzNQHHa3Et72f_zBrL5Zx31j2M,11091 +diffusers/hooks/layer_skip.py,sha256=DtK8z-Y-a1svbrF0pjGa1CjrWphsmF0LzDKGG_VUioI,11919 +diffusers/hooks/layerwise_casting.py,sha256=IsOcL6tJRUsKIvl_fjPyUV9qGopHvCHbKYT4L2ynMPY,10420 +diffusers/hooks/mag_cache.py,sha256=RvcCBgeRO-s-3dx7H9ulIkCRVG9L8sK0IPQ7nCVyJcQ,18521 +diffusers/hooks/pyramid_attention_broadcast.py,sha256=VMlzL_bLBgcPPTJ6XT1TsWV11dBiuWZdVbdbEj30eT8,15540 +diffusers/hooks/smoothed_energy_guidance_utils.py,sha256=puvqJjte3bPL3is_miYVD0YSHpfJy5I31-u3ZEaF3fM,7391 +diffusers/hooks/taylorseer_cache.py,sha256=cBTu1tt2Udgv6dj4XVeBZRuNH9pDnZYxZhLOktUcDnQ,14155 +diffusers/hooks/utils.py,sha256=JU_bbr5S_XxPqE_4kdX5vfJdqJup95mHrYIqDpC9TMM,1848 +diffusers/image_processor.py,sha256=xpyUIHNB8zmwj0dx4HHuBSovySOy_UsMiqu_dfpZaR8,58397 +diffusers/loaders/__init__.py,sha256=ssmO0LcOpXWPuMPf2KnFfqlwxSy41y2NqeULwGR8yQc,5902 +diffusers/loaders/__pycache__/__init__.cpython-312.pyc,, +diffusers/loaders/__pycache__/ip_adapter.cpython-312.pyc,, +diffusers/loaders/__pycache__/lora_base.cpython-312.pyc,, +diffusers/loaders/__pycache__/lora_conversion_utils.cpython-312.pyc,, +diffusers/loaders/__pycache__/lora_pipeline.cpython-312.pyc,, +diffusers/loaders/__pycache__/peft.cpython-312.pyc,, +diffusers/loaders/__pycache__/single_file.cpython-312.pyc,, +diffusers/loaders/__pycache__/single_file_model.cpython-312.pyc,, +diffusers/loaders/__pycache__/single_file_utils.cpython-312.pyc,, +diffusers/loaders/__pycache__/textual_inversion.cpython-312.pyc,, +diffusers/loaders/__pycache__/transformer_flux.cpython-312.pyc,, +diffusers/loaders/__pycache__/transformer_sd3.cpython-312.pyc,, +diffusers/loaders/__pycache__/unet.cpython-312.pyc,, +diffusers/loaders/__pycache__/unet_loader_utils.cpython-312.pyc,, +diffusers/loaders/__pycache__/utils.cpython-312.pyc,, +diffusers/loaders/ip_adapter.py,sha256=SALWDDiEevbC5f-AXCy1X-WcomoByZdVh9JNmSo2HBE,57113 +diffusers/loaders/lora_base.py,sha256=bDslTDvtoovyglU66Ae9YMCNtAX4h7WzuIDcdrCdDCQ,46227 +diffusers/loaders/lora_conversion_utils.py,sha256=uD6ChXmiCcEcb_E8hxLsFuhYmxLcYAIk0Mgnd7m9dBw,128147 +diffusers/loaders/lora_pipeline.py,sha256=dWfnFsU5y-J2pMFPAnFGO9amRd7O8xzQ7C40-uNuJFE,250779 +diffusers/loaders/peft.py,sha256=Ti1W7KbV_-Y6mVjsokwLqbeBWtmJdLbBxv_B-yt-G4U,40236 +diffusers/loaders/single_file.py,sha256=lk4MOIAv_yXdDn0g2DmHTK8b9sEaajMJ2eTduYIE1Ec,24997 +diffusers/loaders/single_file_model.py,sha256=KmLWeG6b51zAr65fz2S7M2Yy21GCVDwK8Oa8bIyGV1Y,24002 +diffusers/loaders/single_file_utils.py,sha256=N97LHrdvoTx7LraNm_islmxdLdyFxnhtMlJwX0x2lXI,186838 +diffusers/loaders/textual_inversion.py,sha256=nMiftLCsl2LkQ0cj-ER9iDPSawOESATuEUk85LX3USo,27765 +diffusers/loaders/transformer_flux.py,sha256=XakxzS3znQhzp-l-PjOtmQ-DCKw6cElMXvMRdedREAc,8021 +diffusers/loaders/transformer_sd3.py,sha256=Ux2cdYUpNa-UeuoyulTG63J2jj7miMRNsltAP7BFUv8,8612 +diffusers/loaders/unet.py,sha256=9zAMuDOLL4S3cLhXesEMxtm9H5qnfAONg5YTN_zLfns,45779 +diffusers/loaders/unet_loader_utils.py,sha256=VIlD_l_5iJdcKAmsET4EVvsL6qLRyBo6UNQy0uQ3UKE,6228 +diffusers/loaders/utils.py,sha256=E67b8d9S8K5020MuDTMcbyCKrU-_9Lj-fIhl7ibQP_I,2399 +diffusers/models/__init__.py,sha256=On4cewJSYsVdXoa5mHdquSO2H-7N1IAr_hbKHKr-Rvo,14851 +diffusers/models/__pycache__/__init__.cpython-312.pyc,, +diffusers/models/__pycache__/_modeling_parallel.cpython-312.pyc,, +diffusers/models/__pycache__/activations.cpython-312.pyc,, +diffusers/models/__pycache__/adapter.cpython-312.pyc,, +diffusers/models/__pycache__/attention.cpython-312.pyc,, +diffusers/models/__pycache__/attention_dispatch.cpython-312.pyc,, +diffusers/models/__pycache__/attention_flax.cpython-312.pyc,, +diffusers/models/__pycache__/attention_processor.cpython-312.pyc,, +diffusers/models/__pycache__/auto_model.cpython-312.pyc,, +diffusers/models/__pycache__/cache_utils.cpython-312.pyc,, +diffusers/models/__pycache__/downsampling.cpython-312.pyc,, +diffusers/models/__pycache__/embeddings.cpython-312.pyc,, +diffusers/models/__pycache__/embeddings_flax.cpython-312.pyc,, +diffusers/models/__pycache__/lora.cpython-312.pyc,, +diffusers/models/__pycache__/model_loading_utils.cpython-312.pyc,, +diffusers/models/__pycache__/modeling_flax_pytorch_utils.cpython-312.pyc,, +diffusers/models/__pycache__/modeling_flax_utils.cpython-312.pyc,, +diffusers/models/__pycache__/modeling_outputs.cpython-312.pyc,, +diffusers/models/__pycache__/modeling_pytorch_flax_utils.cpython-312.pyc,, +diffusers/models/__pycache__/modeling_utils.cpython-312.pyc,, +diffusers/models/__pycache__/normalization.cpython-312.pyc,, +diffusers/models/__pycache__/resnet.cpython-312.pyc,, +diffusers/models/__pycache__/resnet_flax.cpython-312.pyc,, +diffusers/models/__pycache__/upsampling.cpython-312.pyc,, +diffusers/models/__pycache__/vae_flax.cpython-312.pyc,, +diffusers/models/__pycache__/vq_model.cpython-312.pyc,, +diffusers/models/_modeling_parallel.py,sha256=MP6xwQIl8lBrtA3JTBiosD2WZ_jOhWse-DXcBMcrTe4,13072 +diffusers/models/activations.py,sha256=qxdn6OROfUvxyxgpm6M2VDKeJxH6mDsUI_xP4S3iw6s,6511 +diffusers/models/adapter.py,sha256=Pg_dORe2mDUB_9rKPfRVaL9StNvSfClwVj7mPPo1TpQ,24458 +diffusers/models/attention.py,sha256=AZKZjTl5UzzbyO75yPeD0LSkpUfeyeQ4ihfJrjscjqg,73696 +diffusers/models/attention_dispatch.py,sha256=Ox7_gvhgcd7kBxT9zlDK5Cb_jvClPMbbpJoVbpJdFlo,122737 +diffusers/models/attention_flax.py,sha256=jVUS_FuQZNDnuxk6d1hXCEAouiguzvbXu2HQjLYUXYw,21526 +diffusers/models/attention_processor.py,sha256=kGFy0DBMC6XcsJzK2uE2VEmoPQggQOtZeo8mq0XZcqg,239322 +diffusers/models/auto_model.py,sha256=81RayUxSPql96CJ5TZuNshrYIOoumsVAWOVU-8A-Tk8,17773 +diffusers/models/autoencoders/__init__.py,sha256=XVch9hjyfcKMTU5d5gEdD2lF8GoxaXAdfv1-2LJhjgw,1350 +diffusers/models/autoencoders/__pycache__/__init__.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_asym_kl.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_dc.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_kl.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_kl_allegro.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_kl_cogvideox.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_kl_cosmos.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_kl_flux2.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_kl_hunyuan_video.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_kl_hunyuanimage.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_kl_hunyuanimage_refiner.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_kl_hunyuanvideo15.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_kl_ltx.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_kl_ltx2.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_kl_ltx2_audio.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_kl_magvit.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_kl_mochi.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_kl_qwenimage.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_kl_temporal_decoder.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_kl_wan.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_oobleck.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_rae.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/autoencoder_tiny.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/consistency_decoder_vae.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/vae.cpython-312.pyc,, +diffusers/models/autoencoders/__pycache__/vq_model.cpython-312.pyc,, +diffusers/models/autoencoders/autoencoder_asym_kl.py,sha256=g32l1pGP5dijC3VwCC84Spx5N4kqahbxWZEpAPEbg0c,7670 +diffusers/models/autoencoders/autoencoder_dc.py,sha256=fjJ7qHIU9YhF1YZU_NZAMSIuW6p5sVLuNwPh3O1ZIC8,30316 +diffusers/models/autoencoders/autoencoder_kl.py,sha256=koS-FI4YFU8Qdb9ryFiM1isw8rBTydMbYEmmlpU5XdY,21156 +diffusers/models/autoencoders/autoencoder_kl_allegro.py,sha256=n40dKjTLapWC-ETfc_l6SzYBJ1h8vfxJXuuZfhiCRy8,43965 +diffusers/models/autoencoders/autoencoder_kl_cogvideox.py,sha256=-bjluYAW3gDT-V0O36Ap5fwWJ6QBWLieG0o55EWFI6E,59459 +diffusers/models/autoencoders/autoencoder_kl_cosmos.py,sha256=XWdZlq6qIsY3UFl8Ia6H1eHN5b2T6AfT2EPqPyty-1E,52874 +diffusers/models/autoencoders/autoencoder_kl_flux2.py,sha256=3yBru86S1sRAUHyM8lK4iGkVy9PgJTKu4jErWV0d0LM,20876 +diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py,sha256=1A5RRFGode1z5KhEuOK0OFQeUlpNl7JAgZUcukGwxYU,44546 +diffusers/models/autoencoders/autoencoder_kl_hunyuanimage.py,sha256=ZY97vLW3e8QlzdMLcG1yhbA6gIz9WyTm46vsAqCDGuA,25766 +diffusers/models/autoencoders/autoencoder_kl_hunyuanimage_refiner.py,sha256=PYghsWilgn6wK6xvTeHqgZ_EfPRD183TX8YImWIV6PM,36506 +diffusers/models/autoencoders/autoencoder_kl_hunyuanvideo15.py,sha256=PIxDHPk46dGGlqERlzuMy7tG5tcyEa6S0qvvYzza4ZE,37252 +diffusers/models/autoencoders/autoencoder_kl_ltx.py,sha256=2jNB2eNEVzEpQA3xD0FFduN-XAHOPH8aBXQD_fz1VwU,63471 +diffusers/models/autoencoders/autoencoder_kl_ltx2.py,sha256=p1vKDEwusblbkVp_j6Z7-gIbvgmNvBNyee7SAowTqIo,63971 +diffusers/models/autoencoders/autoencoder_kl_ltx2_audio.py,sha256=8E4Er60zipEVzwUjnHKsBS5ClmsWzS3bpIDJe31cd28,30783 +diffusers/models/autoencoders/autoencoder_kl_magvit.py,sha256=H6Fu2uYydOZJNjjB1rz5hpZovjdRn4DuNG_z1SWtyfM,44203 +diffusers/models/autoencoders/autoencoder_kl_mochi.py,sha256=dUQ2xMpmDAcktkUAx9d1l_fEkkkCRJLV-Z5ixCfw5eo,45897 +diffusers/models/autoencoders/autoencoder_kl_qwenimage.py,sha256=zrPjELRRqd_0MLIMYPSh3Jw_6u_7lEpMPxCKObmpVcQ,41300 +diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py,sha256=k5bsQDeq-7bG0bKL0X1Mj0VtRBTwOfHJOW6Xy_qoFTU,12018 +diffusers/models/autoencoders/autoencoder_kl_wan.py,sha256=Vm8u7jTaKrxzhqG2wrhwukluTEamUNkLLtEd3OnXmcg,53456 +diffusers/models/autoencoders/autoencoder_oobleck.py,sha256=qMGv7vzyPY0-vX6290qIIvMPqJSv-yHZvA4jF2sbZLQ,16516 +diffusers/models/autoencoders/autoencoder_rae.py,sha256=b_9VP1KAu-yLuqeQUe8R6EvfP2rIJMrn2nTVURtmXNc,28792 +diffusers/models/autoencoders/autoencoder_tiny.py,sha256=Nf9fEJGwsO0fd8exvBgGUEMW8oq9PC0pw_dUH0uO3YU,14623 +diffusers/models/autoencoders/consistency_decoder_vae.py,sha256=ocsYsTaaP0eC185BzWIf18CdCZYEL4NxCjyQFykVe8c,15440 +diffusers/models/autoencoders/vae.py,sha256=kPbbbtBbOmvWGrGr78BBTrrPcw-JE15sSyFVtSwAHXI,33990 +diffusers/models/autoencoders/vq_model.py,sha256=ctDK39iYOP8v0Vwk3Z70PEDfHN0S2_x7eT9_DfwcWQw,7893 +diffusers/models/cache_utils.py,sha256=mTid6shlJm4oXiMwObzciVSa8jmObw2uwf_FnE8-uqM,6292 +diffusers/models/controlnets/__init__.py,sha256=-M8_FIEy7exiUNGxZRDDxK021_S2aGomds5ZElM-4FE,1267 +diffusers/models/controlnets/__pycache__/__init__.cpython-312.pyc,, +diffusers/models/controlnets/__pycache__/controlnet.cpython-312.pyc,, +diffusers/models/controlnets/__pycache__/controlnet_cosmos.cpython-312.pyc,, +diffusers/models/controlnets/__pycache__/controlnet_flax.cpython-312.pyc,, +diffusers/models/controlnets/__pycache__/controlnet_flux.cpython-312.pyc,, +diffusers/models/controlnets/__pycache__/controlnet_hunyuan.cpython-312.pyc,, +diffusers/models/controlnets/__pycache__/controlnet_qwenimage.cpython-312.pyc,, +diffusers/models/controlnets/__pycache__/controlnet_sana.cpython-312.pyc,, +diffusers/models/controlnets/__pycache__/controlnet_sd3.cpython-312.pyc,, +diffusers/models/controlnets/__pycache__/controlnet_sparsectrl.cpython-312.pyc,, +diffusers/models/controlnets/__pycache__/controlnet_union.cpython-312.pyc,, +diffusers/models/controlnets/__pycache__/controlnet_xs.cpython-312.pyc,, +diffusers/models/controlnets/__pycache__/controlnet_z_image.cpython-312.pyc,, +diffusers/models/controlnets/__pycache__/multicontrolnet.cpython-312.pyc,, +diffusers/models/controlnets/__pycache__/multicontrolnet_union.cpython-312.pyc,, +diffusers/models/controlnets/controlnet.py,sha256=lUXp7XSW3vXFm3kxcfCM6bSKeNi7cp3KAMvNhchE-oU,40398 +diffusers/models/controlnets/controlnet_cosmos.py,sha256=CI1Pr3Uft9K0mqKiz12czqoG91dYZ0M86veEanudJBU,13070 +diffusers/models/controlnets/controlnet_flax.py,sha256=ceh9WPdqt-JauGWFQw_aZb4gfbKjIszkUvJQRG8SClQ,17115 +diffusers/models/controlnets/controlnet_flux.py,sha256=ZX9C0QgR_IeXtOWp_saHB1hxL8ClEGAihXChybWsEDQ,19421 +diffusers/models/controlnets/controlnet_hunyuan.py,sha256=fURjSCV-TsPsnv0rgzlb1bl2fGRpk4P42OOjGu1jaCc,16868 +diffusers/models/controlnets/controlnet_qwenimage.py,sha256=Q70XF_3Hz8Ri0vDzaMneXpBJYmu_lswY0gYmPO7gxhs,14567 +diffusers/models/controlnets/controlnet_sana.py,sha256=RMqikgShfyQfZ1MmkfISgUwGioFO8UtJGLLP_1NGZow,8937 +diffusers/models/controlnets/controlnet_sd3.py,sha256=t7dX0rDj7Dcex8z4ZMRkj94RmsCFrtBxy5Ww_Ka9L3o,19554 +diffusers/models/controlnets/controlnet_sparsectrl.py,sha256=7PlqoFMmlaruomkVMg9dCgXi_c8jRoQcoJaOY5YeEos,35500 +diffusers/models/controlnets/controlnet_union.py,sha256=ZDQy71wDGZF8P-dNyAn8ZMcHlKF14gfvYu9MKBc5j-Y,38846 +diffusers/models/controlnets/controlnet_xs.py,sha256=oK5IIXw3OBqX2ooaomVp2xsfVGXaap1IV7RjgGOs2p8,82311 +diffusers/models/controlnets/controlnet_z_image.py,sha256=ZrrG5f8DgjBbbPqrsr44XRoVYD_-LsUwbDdeAljALjU,34356 +diffusers/models/controlnets/multicontrolnet.py,sha256=jtWnN2iA7KBqpCiiF-g_nGo-wRsLJrdsYF6s7cEHMJQ,9241 +diffusers/models/controlnets/multicontrolnet_union.py,sha256=3oUH-iTIZxafAuXbk0ZPYWsl4YfEX1pDpJNX2mlW-24,10073 +diffusers/models/downsampling.py,sha256=KFL3h6Ofb2eVeLmZLDexFMS3bLlEm0cyZ0kEKcIsOWw,15772 +diffusers/models/embeddings.py,sha256=16kO95lWnj8Pq0HK3eHsugI6vQU6-VbAIrv8CXZiowI,104176 +diffusers/models/embeddings_flax.py,sha256=oI0n9mEQ2pWIA4edgUpLliL_hlyzD1h9Bm0rBq6KU94,4824 +diffusers/models/lora.py,sha256=QoIH_r8zY0MrDSTyAsTwAyi-dBZ0px_kCDPvexbSaMs,18761 +diffusers/models/model_loading_utils.py,sha256=Cy9Q75ZFmMTWp_tuXPvCDN1RZhpE_dhhQgT2ubGILN0,30355 +diffusers/models/modeling_flax_pytorch_utils.py,sha256=f-j9Y-AhcrRp9UvLldjZOHPYBluMC5BXwxmAi6gS1rA,5332 +diffusers/models/modeling_flax_utils.py,sha256=SlBbtuPZVDH9IH3wiKFaWmTcxIgEf5nzzH06CqFIim4,27085 +diffusers/models/modeling_outputs.py,sha256=XH3sJO34MRW6UuWqqKo05mVqxGSBFRazpap_-YLwO2I,1042 +diffusers/models/modeling_pytorch_flax_utils.py,sha256=Hz5IoBV0vygRekVw8OZc9Jji22gyuEZ84uAWzXpYLvs,6973 +diffusers/models/modeling_utils.py,sha256=76SaIVHBaMnVSapuxydg28FieyJtOdbqWViZ1shpEPU,95474 +diffusers/models/normalization.py,sha256=6S67sTAIJXjzz_M2GJGhraoyMxNH57sJ5McLIIoKYFk,24518 +diffusers/models/resnet.py,sha256=tgHkXXimQOZrBYItPQCOQKA5uSuxJqvRqb3vYSRmbYQ,32512 +diffusers/models/resnet_flax.py,sha256=M1FbYLg0xjNalE0h3jLlMjUwNIXbgbzBS36CHM_b1qo,4729 +diffusers/models/transformers/__init__.py,sha256=ZDY3Zs40e2FY0hQHSFMowRXMiKTV5HGAoETmREMPpsw,3270 +diffusers/models/transformers/__pycache__/__init__.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/auraflow_transformer_2d.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/cogvideox_transformer_3d.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/consisid_transformer_3d.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/dit_transformer_2d.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/dual_transformer_2d.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/hunyuan_transformer_2d.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/latte_transformer_3d.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/lumina_nextdit2d.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/pixart_transformer_2d.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/prior_transformer.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/sana_transformer.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/stable_audio_transformer.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/t5_film_transformer.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_2d.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_allegro.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_bria.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_bria_fibo.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_chroma.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_chronoedit.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_cogview3plus.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_cogview4.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_cosmos.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_easyanimate.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_flux.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_flux2.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_glm_image.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_helios.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_hidream_image.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_hunyuan_video.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_hunyuan_video15.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_hunyuan_video_framepack.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_hunyuanimage.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_kandinsky.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_longcat_image.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_ltx.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_ltx2.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_lumina2.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_mochi.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_omnigen.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_ovis_image.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_prx.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_qwenimage.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_sana_video.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_sd3.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_skyreels_v2.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_temporal.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_wan.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_wan_animate.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_wan_vace.cpython-312.pyc,, +diffusers/models/transformers/__pycache__/transformer_z_image.cpython-312.pyc,, +diffusers/models/transformers/auraflow_transformer_2d.py,sha256=bjO8iQI8iqXwHhvKPk29KLCE84y46zGzFa-CJ5oDw3s,19917 +diffusers/models/transformers/cogvideox_transformer_3d.py,sha256=_ysTXhdUPHgwqF09m2GPVAYOHuJSKHa54Y9QJaoZd68,19254 +diffusers/models/transformers/consisid_transformer_3d.py,sha256=eQy2r8zeNHEapznR8YwH-vOBnOUqaKBLPfxug46JNv4,32514 +diffusers/models/transformers/dit_transformer_2d.py,sha256=xvD_g0TDFgDh08B-gh97Ucbv04DBJest4_XsjN4Z10E,10516 +diffusers/models/transformers/dual_transformer_2d.py,sha256=wwgK3t1TdmBl3wwi99KAc07882WL3OMRHIqRBVzYYM4,7667 +diffusers/models/transformers/hunyuan_transformer_2d.py,sha256=_eCdmAJzx1L46ZIoXwmoWFmmmDfDPY3xNJQV7QOSNr0,21436 +diffusers/models/transformers/latte_transformer_3d.py,sha256=uNexGGObKbRkRdFmnWWwqLV4WyC79FoxbEAjQ8VYmos,15633 +diffusers/models/transformers/lumina_nextdit2d.py,sha256=D7BUSyDQg_B-GM7BRMGuf6aCtAqs09s-IXND7q7dIcU,14478 +diffusers/models/transformers/pixart_transformer_2d.py,sha256=Bsbm_73Udnrg3ZTGFx92JqpL77X7BfBY3-4PfZkGz-E,18072 +diffusers/models/transformers/prior_transformer.py,sha256=TJVXVVWvp0_qrpCT5doXJmuhJMVQyNcUl-cyEUoMtSg,14640 +diffusers/models/transformers/sana_transformer.py,sha256=TNz16tx5zGsfQ8Tj7D-J0Hhs8rxKD58YrgURtw7josM,21661 +diffusers/models/transformers/stable_audio_transformer.py,sha256=RZjyAmLXxBOiFh-SCfYG8Pi-x-ZUKzZN49O8afC10ZE,15798 +diffusers/models/transformers/t5_film_transformer.py,sha256=9eCpFRCdaXeh3hYEzPq-y4AgjzxsmoBgdaPODoBcFEM,15978 +diffusers/models/transformers/transformer_2d.py,sha256=dCgkCdPaJlu5maXPUnRrPcs92VW3QP7UiqIA4doULDY,28250 +diffusers/models/transformers/transformer_allegro.py,sha256=bIuwqTkVyVTijFxa6VnSw0PT8dhl8iFBdj1W-nVzBdQ,17126 +diffusers/models/transformers/transformer_bria.py,sha256=syEWwg00MRfXxcF-SiKMYct8SH36Iz3FKokamkRhbVg,29668 +diffusers/models/transformers/transformer_bria_fibo.py,sha256=FPPmWmp-JZpdl3Vhz9PCjD4_a-T3fGgLFryorew6zA0,26855 +diffusers/models/transformers/transformer_chroma.py,sha256=k58xPu9A7TZ8QFQyAEoVcLMyCfAfyd_77Gsvy1MCE5g,26180 +diffusers/models/transformers/transformer_chronoedit.py,sha256=tbrFMWqMgXVudiNYj7Zl7wEEpYSK97q_AywJz2tmYaY,30659 +diffusers/models/transformers/transformer_cogview3plus.py,sha256=bp6X2DlOPswGmRL5hBANltCarPU1askkfTnbwhllY2k,13051 +diffusers/models/transformers/transformer_cogview4.py,sha256=P_HiDURP-_ntNUwPaTtAlAmHoX85QcPtSoaWtol6c-Y,34035 +diffusers/models/transformers/transformer_cosmos.py,sha256=qZjWc7gbckRcQsr98TuYMxWFhg-gZuZHHqxsbfP06Fk,34362 +diffusers/models/transformers/transformer_easyanimate.py,sha256=4mDzZJj-TiQW5oyusdgsmvgH1ytEigOlCyHptFjvzH4,21910 +diffusers/models/transformers/transformer_flux.py,sha256=xTfWkMYyGZedKtaGpIDzVT1yEukVyMVQuTqfOz78voM,32726 +diffusers/models/transformers/transformer_flux2.py,sha256=AQe41MEn2HU-LBuevzsnXIjQQqeWxIPVx5ZZJMFqnu8,37133 +diffusers/models/transformers/transformer_glm_image.py,sha256=9t3hMBUXSrE1jv4IQ0I8bLL4Jh-5whdls0Z0Uj4s908,27399 +diffusers/models/transformers/transformer_helios.py,sha256=Dvgee6VOc4V1ROiHAiI-MhGViKDxGMWojLxLgoeQKtw,33784 +diffusers/models/transformers/transformer_hidream_image.py,sha256=bHIL1Jxjh6JvWCXOhZ4LsgiDRQbp4BJcOVIxS5pJO0Y,38600 +diffusers/models/transformers/transformer_hunyuan_video.py,sha256=VZ9mIZGjPiexOtBTSM7pN4EVq1BTH3zVP6HCwWaKHE8,43991 +diffusers/models/transformers/transformer_hunyuan_video15.py,sha256=kPuaucGBv009iSEo9XP87MEWcZxLbAF60EaEj4kNTUI,30777 +diffusers/models/transformers/transformer_hunyuan_video_framepack.py,sha256=kOJT8DBqM5sfHub9DfZDCxDC775kk-Qhc_0q1XxkYMU,17908 +diffusers/models/transformers/transformer_hunyuanimage.py,sha256=yEGofkbQQIMS9ImgIv2q2EmhAvvG-VLHpi3bFRUrvjM,35973 +diffusers/models/transformers/transformer_kandinsky.py,sha256=Ue3x-rC_nqRfIsGJ4_vG1X5QDnT3lTGQ9Sv7XveCzvk,24676 +diffusers/models/transformers/transformer_longcat_image.py,sha256=10ZCOK7IJ55uD-4nfkppkQFuEcWjGIO4V9p6znBlle4,22290 +diffusers/models/transformers/transformer_ltx.py,sha256=o83fMAAuKmD9H2SXt18dnZkQWMYT8nbIySyAviczUw4,22141 +diffusers/models/transformers/transformer_ltx2.py,sha256=xZP0B622ULQE9dpoW-Tethrv4R5R53wOv2QLhiFHZHA,60831 +diffusers/models/transformers/transformer_lumina2.py,sha256=gv4_ZlRIGNoQVEtekG5qy0R87WjrQgAS276rY28Qb38,21250 +diffusers/models/transformers/transformer_mochi.py,sha256=mb1J2FPmfZy6YxnRlzPSEfZIOOhOFEw2IkZCJle2QV4,17715 +diffusers/models/transformers/transformer_omnigen.py,sha256=u1G2ozBN5FV_Z-4Sw4IdZkv1u1LIUd-a4KiTAapHvWo,19955 +diffusers/models/transformers/transformer_ovis_image.py,sha256=RW2KA_wQ1yNUxvLOLkI8eVEC5yHSVERVu_8vSjjxBKE,23788 +diffusers/models/transformers/transformer_prx.py,sha256=nqbjAq7cqK8tjH6F5MKn2uStwbRg-sY8WaeWwk1wN48,30974 +diffusers/models/transformers/transformer_qwenimage.py,sha256=LxR6rFn39i63Bmi-OWH6Nn-7gVuN7yX1ii9HCffzfjU,45123 +diffusers/models/transformers/transformer_sana_video.py,sha256=8TIiwaMlD3dXCZQ97KoFrY1l-1wmFRReXkViuYqIk-M,28111 +diffusers/models/transformers/transformer_sd3.py,sha256=2vfCCzokVcHP6Bco7gVWnNxyoFjnWzxR_U2F4cnRALA,15565 +diffusers/models/transformers/transformer_skyreels_v2.py,sha256=5PrI2f0EsJ62zmfba6X6MfemQUGTM7Fp6pLRbjae04I,32007 +diffusers/models/transformers/transformer_temporal.py,sha256=bDZeSSlZjtJ-MCVTn5aLpwOi1XLUo6Q4M3uXeROnYCQ,16757 +diffusers/models/transformers/transformer_wan.py,sha256=JUDXcE5bvS9lAC8yomyvKlIVdBB187qYxlJ-WLFgjBU,29464 +diffusers/models/transformers/transformer_wan_animate.py,sha256=1DTgHO9zBPcLE_NluKULt2Pm8zzgEwMUaZ0Xoq-EuXI,53009 +diffusers/models/transformers/transformer_wan_vace.py,sha256=zJvPT5LI2cfsQ0xeU0Qk6WV0Tx9jDzBaWp5UAF560DI,15838 +diffusers/models/transformers/transformer_z_image.py,sha256=1RpISWP80DXBmfZQDVPMXOEKQgdkYYIIOr9UvTUoIAg,41985 +diffusers/models/unets/__init__.py,sha256=srYFA7zEcDY7LxyUB2jz3TdRgsLz8elrWCpT6Y4YXuU,695 +diffusers/models/unets/__pycache__/__init__.cpython-312.pyc,, +diffusers/models/unets/__pycache__/unet_1d.cpython-312.pyc,, +diffusers/models/unets/__pycache__/unet_1d_blocks.cpython-312.pyc,, +diffusers/models/unets/__pycache__/unet_2d.cpython-312.pyc,, +diffusers/models/unets/__pycache__/unet_2d_blocks.cpython-312.pyc,, +diffusers/models/unets/__pycache__/unet_2d_blocks_flax.cpython-312.pyc,, +diffusers/models/unets/__pycache__/unet_2d_condition.cpython-312.pyc,, +diffusers/models/unets/__pycache__/unet_2d_condition_flax.cpython-312.pyc,, +diffusers/models/unets/__pycache__/unet_3d_blocks.cpython-312.pyc,, +diffusers/models/unets/__pycache__/unet_3d_condition.cpython-312.pyc,, +diffusers/models/unets/__pycache__/unet_i2vgen_xl.cpython-312.pyc,, +diffusers/models/unets/__pycache__/unet_kandinsky3.cpython-312.pyc,, +diffusers/models/unets/__pycache__/unet_motion_model.cpython-312.pyc,, +diffusers/models/unets/__pycache__/unet_spatio_temporal_condition.cpython-312.pyc,, +diffusers/models/unets/__pycache__/unet_stable_cascade.cpython-312.pyc,, +diffusers/models/unets/__pycache__/uvit_2d.cpython-312.pyc,, +diffusers/models/unets/unet_1d.py,sha256=2Fz_ICN15OtWqv3bHx3QWzZG2iols4fuhAMQf2qxBB4,11331 +diffusers/models/unets/unet_1d_blocks.py,sha256=WJOQjshU4b6pZTbUthAxAZRgy6-dHtPFCLT2BAsiPPY,26684 +diffusers/models/unets/unet_2d.py,sha256=7L68lrAflWBsSzNVhjk9Mt8XdFMYVuetdLoW221w9zM,16819 +diffusers/models/unets/unet_2d_blocks.py,sha256=ijYdGTOT5G0ZM2RPM9Nym-zmNhVFJtFTLXDnt-3nJQI,141380 +diffusers/models/unets/unet_2d_blocks_flax.py,sha256=MhU6lugf0Nq09__ESAE9uIp5jEBxabrEReUgNhLpjHg,16758 +diffusers/models/unets/unet_2d_condition.py,sha256=BSUGygUDoGZXyxgWwnj3sjUgyiKzp0pqxjJQpsxxGiY,63582 +diffusers/models/unets/unet_2d_condition_flax.py,sha256=daPGP6FmpsroSld1yNtPH0Dm-r9HUzeX7cX7uVh5WyI,22438 +diffusers/models/unets/unet_3d_blocks.py,sha256=ArPbjA8zq14AqtBoVRdQztewe6eTDnu5noeRvQkoBJI,51462 +diffusers/models/unets/unet_3d_condition.py,sha256=fpnApxr0b2CkBpH_xvWeZccwYLtDd4aKu1fumYkLTCk,31459 +diffusers/models/unets/unet_i2vgen_xl.py,sha256=M8mZdOxlY8cXLO1TrYdRhSn5MP0x3IpNygQa9bf21JY,29579 +diffusers/models/unets/unet_kandinsky3.py,sha256=9iHaMGWy1FJ_BrQNsFoNrYgd8wmDTOxrUqCZ3zTo0ro,17983 +diffusers/models/unets/unet_motion_model.py,sha256=to1cGwxxwnFuJ3y9LB6xEXVH3nKJLfOAp0bgDzSqpP0,96219 +diffusers/models/unets/unet_spatio_temporal_condition.py,sha256=V-5Xn5_kOfbZGDgOWXaAAKw-iYExGfggBagIy-Zj1YQ,20611 +diffusers/models/unets/unet_stable_cascade.py,sha256=kb36UklqEmMzR9aUzghhFEBVlZQuE7SbBZ85f4kAClI,27170 +diffusers/models/unets/uvit_2d.py,sha256=qbh0IFJBKlkIVJURxgBMp6rmrX7-21546O202HT0d08,14528 +diffusers/models/upsampling.py,sha256=VRcyN3gPJSGf4USY1d2xyCqF-1bdYgATPwK4W98F7O4,19510 +diffusers/models/vae_flax.py,sha256=ZvOP7XzygvLYwpSTvdnfA_QFWC0cxJte3-XOGD9v1Mk,34150 +diffusers/models/vq_model.py,sha256=J6dVRtVAQNUtHbl5LY5KjHYYsab55AbuoATHQttDGX4,1524 +diffusers/modular_pipelines/__init__.py,sha256=Gm3Y7xZRf_cQxPDJ09QsyCSdxiUqSdPYELJVDdaPVb4,4868 +diffusers/modular_pipelines/__pycache__/__init__.cpython-312.pyc,, +diffusers/modular_pipelines/__pycache__/components_manager.cpython-312.pyc,, +diffusers/modular_pipelines/__pycache__/mellon_node_utils.cpython-312.pyc,, +diffusers/modular_pipelines/__pycache__/modular_pipeline.cpython-312.pyc,, +diffusers/modular_pipelines/__pycache__/modular_pipeline_utils.cpython-312.pyc,, +diffusers/modular_pipelines/components_manager.py,sha256=lEXLxpfapupoe_nHwF6gMmbtoURDj6D_apitSE1jHg8,47343 +diffusers/modular_pipelines/flux/__init__.py,sha256=dcfP-PrgaiOaZth__ACzxTG8NknsGALHOmDREqq2cv8,1640 +diffusers/modular_pipelines/flux/__pycache__/__init__.cpython-312.pyc,, +diffusers/modular_pipelines/flux/__pycache__/before_denoise.cpython-312.pyc,, +diffusers/modular_pipelines/flux/__pycache__/decoders.cpython-312.pyc,, +diffusers/modular_pipelines/flux/__pycache__/denoise.cpython-312.pyc,, +diffusers/modular_pipelines/flux/__pycache__/encoders.cpython-312.pyc,, +diffusers/modular_pipelines/flux/__pycache__/inputs.cpython-312.pyc,, +diffusers/modular_pipelines/flux/__pycache__/modular_blocks_flux.cpython-312.pyc,, +diffusers/modular_pipelines/flux/__pycache__/modular_blocks_flux_kontext.cpython-312.pyc,, +diffusers/modular_pipelines/flux/__pycache__/modular_pipeline.cpython-312.pyc,, +diffusers/modular_pipelines/flux/before_denoise.py,sha256=tlhA0IgZPV7KVxtDOGzTw-LZRsgdaLkrlFA5h994D44,24732 +diffusers/modular_pipelines/flux/decoders.py,sha256=Uzi1MR4gOmnnU5CwlIvlKxOQVQJDnBTEhTqsUo1b0-k,3886 +diffusers/modular_pipelines/flux/denoise.py,sha256=OlBZLEx962hESn2ZvjDP7GzjdwawEMuRRdxJ3iHjBZY,12036 +diffusers/modular_pipelines/flux/encoders.py,sha256=OVjtS1DJ15Au7dyE7UPDBq1X-jDYIns-B50_Uvf-kg4,19360 +diffusers/modular_pipelines/flux/inputs.py,sha256=tdyh3eVLzZqiHlYgIexiHcvYiRKSRxtLtDJNlQok8bg,15165 +diffusers/modular_pipelines/flux/modular_blocks_flux.py,sha256=XzjdMCM0_FWaOMbxkkDr298IGXO0YGcvWUe8cxOHMLs,22544 +diffusers/modular_pipelines/flux/modular_blocks_flux_kontext.py,sha256=TdLEd6OGFpAvpiqhtLUrjKxXnaLJnr88fXyKy8f2_Hs,23153 +diffusers/modular_pipelines/flux/modular_pipeline.py,sha256=9o0fiRAQgyWRTHqtH_I4O4Klx1uzjpjI-hfw-l2J-ls,2133 +diffusers/modular_pipelines/flux2/__init__.py,sha256=X4eftOdHN7wBRx7DEihQ6gm44hVkjQS4YXd-n4u2-tk,2025 +diffusers/modular_pipelines/flux2/__pycache__/__init__.cpython-312.pyc,, +diffusers/modular_pipelines/flux2/__pycache__/before_denoise.cpython-312.pyc,, +diffusers/modular_pipelines/flux2/__pycache__/decoders.cpython-312.pyc,, +diffusers/modular_pipelines/flux2/__pycache__/denoise.cpython-312.pyc,, +diffusers/modular_pipelines/flux2/__pycache__/encoders.cpython-312.pyc,, +diffusers/modular_pipelines/flux2/__pycache__/inputs.cpython-312.pyc,, +diffusers/modular_pipelines/flux2/__pycache__/modular_blocks_flux2.cpython-312.pyc,, +diffusers/modular_pipelines/flux2/__pycache__/modular_blocks_flux2_klein.cpython-312.pyc,, +diffusers/modular_pipelines/flux2/__pycache__/modular_blocks_flux2_klein_base.cpython-312.pyc,, +diffusers/modular_pipelines/flux2/__pycache__/modular_pipeline.cpython-312.pyc,, +diffusers/modular_pipelines/flux2/before_denoise.py,sha256=1AVTAAgpHe2bO2hsPcghdwFsuTBCUmmLZR_AvSU_A_w,22238 +diffusers/modular_pipelines/flux2/decoders.py,sha256=Zo5zmUTBnDLLtTtcbyZjxosPPd59IR6lKx5Aj3SD3Qo,6324 +diffusers/modular_pipelines/flux2/denoise.py,sha256=yd3-QVp4w8KyQcnMRlPMTSN0eOOo8pgDECR1-6CwJq4,18739 +diffusers/modular_pipelines/flux2/encoders.py,sha256=bVgXxQ3zt1woGJoj3eyFO1yaAcu5R2GCuGw8WDhSDsM,22362 +diffusers/modular_pipelines/flux2/inputs.py,sha256=SPJJvcrxTrmqNr-RqIsAVwXY18swvj4v0Bm0-0Xr2XQ,9132 +diffusers/modular_pipelines/flux2/modular_blocks_flux2.py,sha256=OuYriRK1e0r77ftWAkOtT5N5PC8D8UgeV3Q5XvMerzI,12356 +diffusers/modular_pipelines/flux2/modular_blocks_flux2_klein.py,sha256=Qf0Bu6PtyXDw0qzkE1aR4bGK5orvukrEj-5z0n2OhlE,13975 +diffusers/modular_pipelines/flux2/modular_blocks_flux2_klein_base.py,sha256=g6fIDiedjGDbu-VHW_RqgsO8DJi_fJemXH45yDoGlfE,14920 +diffusers/modular_pipelines/flux2/modular_pipeline.py,sha256=RiYzuWl14j1yRFJAjs37CkkMQGd8SkePXWoHJieT-e8,3210 +diffusers/modular_pipelines/mellon_node_utils.py,sha256=g-MYJQoypr5jR6U0l2qAHTEL_kvo2zswFiTopWLX6VM,41060 +diffusers/modular_pipelines/modular_pipeline.py,sha256=_A7S9HXqshPDLVPPRKs7HmmOM6j_WYFAAHCfOhDrpJ0,126203 +diffusers/modular_pipelines/modular_pipeline_utils.py,sha256=t5vPFfZPU_X9sJ75SCIATyTFHkYVARkm6NbEB_0XnQk,53856 +diffusers/modular_pipelines/qwenimage/__init__.py,sha256=3e81BXodv6ATpn-P7nfonLkWu2UpSRogcR99hoU-Fak,2270 +diffusers/modular_pipelines/qwenimage/__pycache__/__init__.cpython-312.pyc,, +diffusers/modular_pipelines/qwenimage/__pycache__/before_denoise.cpython-312.pyc,, +diffusers/modular_pipelines/qwenimage/__pycache__/decoders.cpython-312.pyc,, +diffusers/modular_pipelines/qwenimage/__pycache__/denoise.cpython-312.pyc,, +diffusers/modular_pipelines/qwenimage/__pycache__/encoders.cpython-312.pyc,, +diffusers/modular_pipelines/qwenimage/__pycache__/inputs.cpython-312.pyc,, +diffusers/modular_pipelines/qwenimage/__pycache__/modular_blocks_qwenimage.cpython-312.pyc,, +diffusers/modular_pipelines/qwenimage/__pycache__/modular_blocks_qwenimage_edit.cpython-312.pyc,, +diffusers/modular_pipelines/qwenimage/__pycache__/modular_blocks_qwenimage_edit_plus.cpython-312.pyc,, +diffusers/modular_pipelines/qwenimage/__pycache__/modular_blocks_qwenimage_layered.cpython-312.pyc,, +diffusers/modular_pipelines/qwenimage/__pycache__/modular_pipeline.cpython-312.pyc,, +diffusers/modular_pipelines/qwenimage/__pycache__/prompt_templates.cpython-312.pyc,, +diffusers/modular_pipelines/qwenimage/before_denoise.py,sha256=6xJMn7un_DDC3MfTzdKcuoXLQth4soEU4UITHcDPkng,52605 +diffusers/modular_pipelines/qwenimage/decoders.py,sha256=clFF06xThdWVW4c4Uw8MgYHkUvivhdjW0nCXtH-4QjM,17046 +diffusers/modular_pipelines/qwenimage/denoise.py,sha256=NhIdPx22Sse0qYLIjupswM7LpF4rIgbC2t-UDL4eUws,39402 +diffusers/modular_pipelines/qwenimage/encoders.py,sha256=5y1ExYkHrlp_XoYJ84SbmSVmN5ARpzuV8LG60ohHvvM,64371 +diffusers/modular_pipelines/qwenimage/inputs.py,sha256=8artoDbssvwuCelEzXEqHVwjeALi7u5IAVfqnEICHrA,43904 +diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py,sha256=tRePoTvTUkJchsJmhdm4e0URWlsSa-isM8lBvbJqspk,51624 +diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit.py,sha256=a041peYbF_dtoH5JggdVDS_GsY-ldHlu0ci-bO6BlJg,31413 +diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit_plus.py,sha256=95iylmYvU3ZO7yyIuGBtSkyD-voDYsiADhAoKsrcVbY,15775 +diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_layered.py,sha256=s29nSv1ML07lrS8Z2PN10kxRL-AxhZXn-LtyJnz_tKA,14518 +diffusers/modular_pipelines/qwenimage/modular_pipeline.py,sha256=51pXdlbBgUNm_X4I-q6Sh3-wutahRPDagYYQPYxizHA,10065 +diffusers/modular_pipelines/qwenimage/prompt_templates.py,sha256=-iXfVXl16IWf6jwgPsMPue2dn4urGYe0qNlTViiMBaw,6059 +diffusers/modular_pipelines/stable_diffusion_xl/__init__.py,sha256=KPyWt0itiTJQtx3kwql6Y1Ynsgcbp4PTTrO2ber-raM,1512 +diffusers/modular_pipelines/stable_diffusion_xl/__pycache__/__init__.cpython-312.pyc,, +diffusers/modular_pipelines/stable_diffusion_xl/__pycache__/before_denoise.cpython-312.pyc,, +diffusers/modular_pipelines/stable_diffusion_xl/__pycache__/decoders.cpython-312.pyc,, +diffusers/modular_pipelines/stable_diffusion_xl/__pycache__/denoise.cpython-312.pyc,, +diffusers/modular_pipelines/stable_diffusion_xl/__pycache__/encoders.cpython-312.pyc,, +diffusers/modular_pipelines/stable_diffusion_xl/__pycache__/modular_blocks_stable_diffusion_xl.cpython-312.pyc,, +diffusers/modular_pipelines/stable_diffusion_xl/__pycache__/modular_pipeline.cpython-312.pyc,, +diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py,sha256=GVRlet3Haj0TnHZo2kYHGds2CmckDtznA8yKzsAz4fA,85972 +diffusers/modular_pipelines/stable_diffusion_xl/decoders.py,sha256=Ob6YezeLDDANRymfU6XTvd_ZmzBw5v41L65-e6aiRFM,7763 +diffusers/modular_pipelines/stable_diffusion_xl/denoise.py,sha256=mV7957g6DuPp6dUUaa24TIwR7gs_EF1dxCkRzxiLuZQ,35621 +diffusers/modular_pipelines/stable_diffusion_xl/encoders.py,sha256=e9DDLqzTlRVq6jqElu3ur0fWPFQNpmOo40OqaHCkGbk,41208 +diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks_stable_diffusion_xl.py,sha256=Em487BZ58aW4kL5zN78E8MmKWtA8Lj8XFxzh52q0he8,25136 +diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py,sha256=PnOsayduYXLIqPA__ut99UPrKJhKL0XEeKeYDxFdfdg,15512 +diffusers/modular_pipelines/wan/__init__.py,sha256=uHMJ5LWhqz-VxN-veiaPDLvbRnpbbUUU4uARyxOD1CY,2108 +diffusers/modular_pipelines/wan/__pycache__/__init__.cpython-312.pyc,, +diffusers/modular_pipelines/wan/__pycache__/before_denoise.cpython-312.pyc,, +diffusers/modular_pipelines/wan/__pycache__/decoders.cpython-312.pyc,, +diffusers/modular_pipelines/wan/__pycache__/denoise.cpython-312.pyc,, +diffusers/modular_pipelines/wan/__pycache__/encoders.cpython-312.pyc,, +diffusers/modular_pipelines/wan/__pycache__/modular_blocks_wan.cpython-312.pyc,, +diffusers/modular_pipelines/wan/__pycache__/modular_blocks_wan22.cpython-312.pyc,, +diffusers/modular_pipelines/wan/__pycache__/modular_blocks_wan22_i2v.cpython-312.pyc,, +diffusers/modular_pipelines/wan/__pycache__/modular_blocks_wan_i2v.cpython-312.pyc,, +diffusers/modular_pipelines/wan/__pycache__/modular_pipeline.cpython-312.pyc,, +diffusers/modular_pipelines/wan/before_denoise.py,sha256=ruVheXLcuGJ51T58eftjNnU1AjkYh-4JSZYb-Oe7LUo,24177 +diffusers/modular_pipelines/wan/decoders.py,sha256=TKSrzwY6yFtotEhhfNjVmIWZ0MxyzVDkGBM8FR1twx0,3482 +diffusers/modular_pipelines/wan/denoise.py,sha256=5LHzVD3_o00F3LSpVpAxPULk_Y90TnKa_1Rsyinwmjo,22227 +diffusers/modular_pipelines/wan/encoders.py,sha256=trprgj5raiSmF9A6cafD10tsSREVxvnKVwr81sw2BDs,28411 +diffusers/modular_pipelines/wan/modular_blocks_wan.py,sha256=RoG35LJhEzFCPnQuToEz6wrqNEVfm2YFWl0OwVn7-J4,5330 +diffusers/modular_pipelines/wan/modular_blocks_wan22.py,sha256=h4lf2P5gv9FgpUidDn5HcSSf58JUpWbKUdDfUCV1qVc,5870 +diffusers/modular_pipelines/wan/modular_blocks_wan22_i2v.py,sha256=yDQn8mj3yjTbx1fFYhYJYKUUDLQnuAGv6hm_y81h_O4,8047 +diffusers/modular_pipelines/wan/modular_blocks_wan_i2v.py,sha256=tGsxrcrWdKqhI45t9cNN0R1zH8vawrIwifgSXpEtFVo,17379 +diffusers/modular_pipelines/wan/modular_pipeline.py,sha256=kN6qn9izRJpmt-MxPJY7LBWPLE-iat9sJ0ErPEmlTxc,4387 +diffusers/modular_pipelines/z_image/__init__.py,sha256=NJli-n1f0xix2kbD-dj4R51OuNLEZ5NPr5V_elnHRvQ,1444 +diffusers/modular_pipelines/z_image/__pycache__/__init__.cpython-312.pyc,, +diffusers/modular_pipelines/z_image/__pycache__/before_denoise.cpython-312.pyc,, +diffusers/modular_pipelines/z_image/__pycache__/decoders.cpython-312.pyc,, +diffusers/modular_pipelines/z_image/__pycache__/denoise.cpython-312.pyc,, +diffusers/modular_pipelines/z_image/__pycache__/encoders.cpython-312.pyc,, +diffusers/modular_pipelines/z_image/__pycache__/modular_blocks_z_image.cpython-312.pyc,, +diffusers/modular_pipelines/z_image/__pycache__/modular_pipeline.cpython-312.pyc,, +diffusers/modular_pipelines/z_image/before_denoise.py,sha256=3Ro512C-twuYFa_6oivfIKLWYBSFl_FANRu_ya2xWBE,26240 +diffusers/modular_pipelines/z_image/decoders.py,sha256=AJdaF_K-vEsMaCS5tSXHSOsBJy0N5fq0VbUETsc2Olc,3072 +diffusers/modular_pipelines/z_image/denoise.py,sha256=tQWeemZHtM6PXupY4f2D9gUQM1hxRMO3TNA_sZT6LLg,12634 +diffusers/modular_pipelines/z_image/encoders.py,sha256=w_xjvbvQXDmqnMjrj5pfmdw4v5HAqcnZgYWEbEQae-o,12479 +diffusers/modular_pipelines/z_image/modular_blocks_z_image.py,sha256=lfcwz_NyYFPZG3Bq4FrUsl2rrgKBtx74f8yxUKHm5wQ,12487 +diffusers/modular_pipelines/z_image/modular_pipeline.py,sha256=NWNOi_pK73V0dk64I-KN2Mzk4TXWWEE1SjckAC640CY,2355 +diffusers/optimization.py,sha256=vseFZIHe6rvw2irqr-Bb_uiX3a90zwWH2KKhVoqhlrQ,14691 +diffusers/pipelines/__init__.py,sha256=zFY1SqHzf3DxOgLrgtCRExxgy-bnF-7_12NyjKOhZfU,37206 +diffusers/pipelines/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/__pycache__/auto_pipeline.cpython-312.pyc,, +diffusers/pipelines/__pycache__/free_init_utils.cpython-312.pyc,, +diffusers/pipelines/__pycache__/free_noise_utils.cpython-312.pyc,, +diffusers/pipelines/__pycache__/onnx_utils.cpython-312.pyc,, +diffusers/pipelines/__pycache__/pipeline_flax_utils.cpython-312.pyc,, +diffusers/pipelines/__pycache__/pipeline_loading_utils.cpython-312.pyc,, +diffusers/pipelines/__pycache__/pipeline_utils.cpython-312.pyc,, +diffusers/pipelines/__pycache__/transformers_loading_utils.cpython-312.pyc,, +diffusers/pipelines/allegro/__init__.py,sha256=T1MLZgDf8Fhh6YunF8a4Ta6NNIqneWsJIvmBhiy1ABM,1290 +diffusers/pipelines/allegro/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/allegro/__pycache__/pipeline_allegro.cpython-312.pyc,, +diffusers/pipelines/allegro/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/allegro/pipeline_allegro.py,sha256=5ytOoKiPzTAnpxGBypYrOwaUF1Vp5CFSiPkn7_JpaJo,45284 +diffusers/pipelines/allegro/pipeline_output.py,sha256=0512KSKEFYrMrE8xnQsdjLElVIyyq8ECLFOV1CSiyKE,686 +diffusers/pipelines/amused/__init__.py,sha256=pzqLeLosNQ29prMLhTxvPpmoIDPB3OFMQMlErOIRkmI,1793 +diffusers/pipelines/amused/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/amused/__pycache__/pipeline_amused.cpython-312.pyc,, +diffusers/pipelines/amused/__pycache__/pipeline_amused_img2img.cpython-312.pyc,, +diffusers/pipelines/amused/__pycache__/pipeline_amused_inpaint.cpython-312.pyc,, +diffusers/pipelines/amused/pipeline_amused.py,sha256=-KRK-VX7VucsB7dGo2J34paYFHfpWYiEbmktQgdcmUw,15920 +diffusers/pipelines/amused/pipeline_amused_img2img.py,sha256=1fL6tfJNmGjvlztJ1Ez6lZGU1ucA8dsNehlwtS2ENfw,17406 +diffusers/pipelines/amused/pipeline_amused_inpaint.py,sha256=ZZgraZQhKfJ6xNNYxN8oaCqHmCbVUiPImWlkkDBWyUA,19092 +diffusers/pipelines/animatediff/__init__.py,sha256=8e7xkGr1MrQerNXsfFBaDT8f7ELe5aoPX9v2qRN1hvg,2324 +diffusers/pipelines/animatediff/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/animatediff/__pycache__/pipeline_animatediff.cpython-312.pyc,, +diffusers/pipelines/animatediff/__pycache__/pipeline_animatediff_controlnet.cpython-312.pyc,, +diffusers/pipelines/animatediff/__pycache__/pipeline_animatediff_sdxl.cpython-312.pyc,, +diffusers/pipelines/animatediff/__pycache__/pipeline_animatediff_sparsectrl.cpython-312.pyc,, +diffusers/pipelines/animatediff/__pycache__/pipeline_animatediff_video2video.cpython-312.pyc,, +diffusers/pipelines/animatediff/__pycache__/pipeline_animatediff_video2video_controlnet.cpython-312.pyc,, +diffusers/pipelines/animatediff/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/animatediff/pipeline_animatediff.py,sha256=lSYDKRKkV14_c7dNRpAzz0nIHjUIouclUSAU1H-RZDs,42270 +diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py,sha256=ky8lh2oOa6kWx8FfgoMU80NTT45ykn_s4UZYulXW06w,55786 +diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py,sha256=hMY0AInHBA6Wau9NFUmVxitLJB1g8smXnSpgg-UOMaQ,66326 +diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py,sha256=KFcmcFEKrvQ1b_cGwZE9O8UAp94DSSMjjDTr68GQ2N0,51385 +diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py,sha256=AsywXAcyKIOD38GkkjnK-LwXFRYe16MRuwu6srTewQQ,52177 +diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py,sha256=M4Fqp7ZE6fmqOXDVAWHxL7kskAS9qWVe8sT8g31Sb-g,67497 +diffusers/pipelines/animatediff/pipeline_output.py,sha256=vCs0uMVjmWRZsyRFc5SmWcclf8i2uXemtNJmocZrkBw,693 +diffusers/pipelines/audioldm/__init__.py,sha256=HMUjKqEf7OAtgIeV2CQoGIoDE6oY7b26N55yn4qCIpU,1419 +diffusers/pipelines/audioldm/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/audioldm/__pycache__/pipeline_audioldm.cpython-312.pyc,, +diffusers/pipelines/audioldm/pipeline_audioldm.py,sha256=FxJj9fj-yd8hlV8gbEtaxxof6gGvx0TRnPdpfeXHo7U,26283 +diffusers/pipelines/audioldm2/__init__.py,sha256=gR7gTyh-YGI4uxTCPnz_LnCGbErpFGtNMEzM_CQdqgE,1605 +diffusers/pipelines/audioldm2/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/audioldm2/__pycache__/modeling_audioldm2.cpython-312.pyc,, +diffusers/pipelines/audioldm2/__pycache__/pipeline_audioldm2.cpython-312.pyc,, +diffusers/pipelines/audioldm2/modeling_audioldm2.py,sha256=MTj4xj5_kK-1xthc-VZ_3x1PNDHwqrLe2gTTG4ip06g,67492 +diffusers/pipelines/audioldm2/pipeline_audioldm2.py,sha256=zX-ckxULNspiO1btDFwxbpniuwR8mtOwSWJYArxQEZg,55899 +diffusers/pipelines/aura_flow/__init__.py,sha256=TOGRbwqwr7j1XIVGAxIBwAp4lM2zt21C_hYm5dFb76o,1296 +diffusers/pipelines/aura_flow/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/aura_flow/__pycache__/pipeline_aura_flow.cpython-312.pyc,, +diffusers/pipelines/aura_flow/pipeline_aura_flow.py,sha256=OR4pOzqmMmH6mSvAytswuI69aD8qWh2FCuTcuEQ5juI,32754 +diffusers/pipelines/auto_pipeline.py,sha256=BagihgCV0Z95UE2T0NXLQPqG9J8y0KVjnPrT3cR7Lsk,60504 +diffusers/pipelines/blip_diffusion/__init__.py,sha256=wdrT-vdGnM-Qzv0jo2YCTrj_hjI93BYDO6HEh7Mbqms,656 +diffusers/pipelines/blip_diffusion/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/blip_diffusion/__pycache__/blip_image_processing.cpython-312.pyc,, +diffusers/pipelines/blip_diffusion/__pycache__/modeling_blip2.cpython-312.pyc,, +diffusers/pipelines/blip_diffusion/__pycache__/modeling_ctx_clip.cpython-312.pyc,, +diffusers/pipelines/blip_diffusion/__pycache__/pipeline_blip_diffusion.cpython-312.pyc,, +diffusers/pipelines/blip_diffusion/blip_image_processing.py,sha256=Jliaw06g33-bbzdusmb0kNEiXsNQUDhjkha2jsNS2LU,16598 +diffusers/pipelines/blip_diffusion/modeling_blip2.py,sha256=EVd2MQ7ERmTQPJs63NC4dFoZzA-nbT89C72Aoihrq3I,27048 +diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py,sha256=32U1BP3yI_KwoCXU0C2hnKfjQiDzBOu5hYSBZA_C5xU,8899 +diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py,sha256=GAflj1AnC9ZvEMRJNtGZDxEbkqeDPN2ONJji_BQhbT8,15245 +diffusers/pipelines/bria/__init__.py,sha256=8RG0C8O1kfqyszPxF5O15mww6b3PWTskpT8VaQIOavM,1278 +diffusers/pipelines/bria/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/bria/__pycache__/pipeline_bria.cpython-312.pyc,, +diffusers/pipelines/bria/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/bria/pipeline_bria.py,sha256=4HpSrxTT1ezqcOXt8x_X4lCaSxGGCTyFHa0YvZO3c1Q,34781 +diffusers/pipelines/bria/pipeline_output.py,sha256=bGR_U3U4IhprPhv0HphMLsH2IbJRIpbD21i2Gv8Gs7U,549 +diffusers/pipelines/bria_fibo/__init__.py,sha256=_doQYTy-HEUyRWenTyLod6eYjGWss5api_AjhF0ptnc,1439 +diffusers/pipelines/bria_fibo/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/bria_fibo/__pycache__/pipeline_bria_fibo.cpython-312.pyc,, +diffusers/pipelines/bria_fibo/__pycache__/pipeline_bria_fibo_edit.cpython-312.pyc,, +diffusers/pipelines/bria_fibo/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/bria_fibo/pipeline_bria_fibo.py,sha256=Ajt9uKM1GibwbqYfsISb4H5r3E1fowkvKXSqqaZHsyI,38431 +diffusers/pipelines/bria_fibo/pipeline_bria_fibo_edit.py,sha256=1IeroZDVOzf7vc2slP-fETTwgei8ydTm0SEXMmWwUkk,49644 +diffusers/pipelines/bria_fibo/pipeline_output.py,sha256=1C2N6xec7CndBca8NYSynhilR7C26h4gyvVA1I01Qm8,556 +diffusers/pipelines/chroma/__init__.py,sha256=DeAfblpT8853bjNYeYJIX0yEW1DDrmhSjmNXp4sPrks,1761 +diffusers/pipelines/chroma/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/chroma/__pycache__/pipeline_chroma.cpython-312.pyc,, +diffusers/pipelines/chroma/__pycache__/pipeline_chroma_img2img.cpython-312.pyc,, +diffusers/pipelines/chroma/__pycache__/pipeline_chroma_inpainting.cpython-312.pyc,, +diffusers/pipelines/chroma/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/chroma/pipeline_chroma.py,sha256=d9IDwp8EhNwWkygcM1MhFcEzuJUzWM9cpC6JaXXe7M8,46026 +diffusers/pipelines/chroma/pipeline_chroma_img2img.py,sha256=gJGPAzXcRf0JhEG1Pd6P0pRaY2mpa0-0s5DGTQzSF3k,50279 +diffusers/pipelines/chroma/pipeline_chroma_inpainting.py,sha256=FikVkZLwyug5ZeFM3zVR5xf-pratsSAx4AClEziYVCk,55987 +diffusers/pipelines/chroma/pipeline_output.py,sha256=6b2iVKkB5vUwN6ons_yTF2-S8mFZzYKkfX9f06nH2jk,563 +diffusers/pipelines/chronoedit/__init__.py,sha256=NJr-fHIU3IfE8z9Vk1BZs2AHEfjWibVZ0UWvSO4rdHg,1301 +diffusers/pipelines/chronoedit/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/chronoedit/__pycache__/pipeline_chronoedit.cpython-312.pyc,, +diffusers/pipelines/chronoedit/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/chronoedit/pipeline_chronoedit.py,sha256=5eqk0OEy-s-k9OEWtmQGf-lwnCBcNAk6ZQz7Gsevmsk,36001 +diffusers/pipelines/chronoedit/pipeline_output.py,sha256=L9iV9C2IFpTsOPmRTaXHRPWsDhQyK4UnFLPNhY-_bC4,619 +diffusers/pipelines/cogvideo/__init__.py,sha256=84bmJbrCvjUtEXFgyCKvX5N4HNtAWorMjQTNvWPh8ZU,1816 +diffusers/pipelines/cogvideo/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/cogvideo/__pycache__/pipeline_cogvideox.cpython-312.pyc,, +diffusers/pipelines/cogvideo/__pycache__/pipeline_cogvideox_fun_control.cpython-312.pyc,, +diffusers/pipelines/cogvideo/__pycache__/pipeline_cogvideox_image2video.cpython-312.pyc,, +diffusers/pipelines/cogvideo/__pycache__/pipeline_cogvideox_video2video.cpython-312.pyc,, +diffusers/pipelines/cogvideo/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/cogvideo/pipeline_cogvideox.py,sha256=vVHtdjgDoIYFWOOJMXjrkzXlh6UU5onexh927393KE8,37738 +diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py,sha256=Y1PtSEiXVLisXBA1nIipoQKCKL0tzHYa0BF3xymbpJ8,40296 +diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py,sha256=Wtl5RCVERrcFgxmed6w0I80tHtW4uhDYNxOqPm4ysYo,42717 +diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py,sha256=fPHbEIyT2EzlMzDxACvASqT1TsV-Ehiid-bofacQrSM,41335 +diffusers/pipelines/cogvideo/pipeline_output.py,sha256=hEgkXUsI7l6D6dkeVZanL9ODkGAHsHGkDCq3zt_Ima0,616 +diffusers/pipelines/cogview3/__init__.py,sha256=ophRMlB8W7AocUEWUJLbmK1o4yJpHEfKvwyDceyMu00,1497 +diffusers/pipelines/cogview3/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/cogview3/__pycache__/pipeline_cogview3plus.cpython-312.pyc,, +diffusers/pipelines/cogview3/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/cogview3/pipeline_cogview3plus.py,sha256=9y143Ro3VK6W5RhfclmN77JUe6flaEKXXrx3KRwR6gk,33836 +diffusers/pipelines/cogview3/pipeline_output.py,sha256=X7W4zr79wxM2NV-wOS4h0ih5i-FBSzGmPw52LsiPMjI,557 +diffusers/pipelines/cogview4/__init__.py,sha256=DSW0f5XIu2bcirGYm6FE9lhyxADbV5nbiayq1x2ttJg,1633 +diffusers/pipelines/cogview4/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/cogview4/__pycache__/pipeline_cogview4.cpython-312.pyc,, +diffusers/pipelines/cogview4/__pycache__/pipeline_cogview4_control.cpython-312.pyc,, +diffusers/pipelines/cogview4/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/cogview4/pipeline_cogview4.py,sha256=zJvbG0KR7erc24wkP1fJNavBZ71BhJQXPXCkuHCYTPE,33802 +diffusers/pipelines/cogview4/pipeline_cogview4_control.py,sha256=NDQFkcPxZoK5mkBOQUOqkJMiFVeO3gfDYu-o68L42rY,35278 +diffusers/pipelines/cogview4/pipeline_output.py,sha256=0jlTqxjcB3l2jpVe0pOQ2ati9VmLBqWFcrNJYlpkeEs,557 +diffusers/pipelines/consisid/__init__.py,sha256=wi4mmbsztby5LLgmrtDhz857JWT_4Jbc2sRzRyL0EpY,1367 +diffusers/pipelines/consisid/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/consisid/__pycache__/consisid_utils.cpython-312.pyc,, +diffusers/pipelines/consisid/__pycache__/pipeline_consisid.cpython-312.pyc,, +diffusers/pipelines/consisid/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/consisid/consisid_utils.py,sha256=Qmsoq4CYzOTOaH3PWB5iG6ZpI4JuF6rVe4AEOHvdTbk,14522 +diffusers/pipelines/consisid/pipeline_consisid.py,sha256=XTSMFyrUTILsjdywo4MhEdroVwwJ7jQAjGNXISBZrrM,46505 +diffusers/pipelines/consisid/pipeline_output.py,sha256=EckPZ5hE4nMfg8dYQ9cZnB6h9cOfkaHGMW2DVDTUI5E,615 +diffusers/pipelines/consistency_models/__init__.py,sha256=q_nrLK9DH0_kLcLmRIvgvLP-vDVwloC3lBus776596c,484 +diffusers/pipelines/consistency_models/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/consistency_models/__pycache__/pipeline_consistency_models.cpython-312.pyc,, +diffusers/pipelines/consistency_models/pipeline_consistency_models.py,sha256=MnOpg9KPMWgx40JTG6EbGIKxCqA52rTTlIW_mNmijZA,12530 +diffusers/pipelines/controlnet/__init__.py,sha256=pqndp8HbyQ2D45STcpMp37nO5M4SagpfwADCCOC_2CU,4057 +diffusers/pipelines/controlnet/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/controlnet/__pycache__/multicontrolnet.cpython-312.pyc,, +diffusers/pipelines/controlnet/__pycache__/pipeline_controlnet.cpython-312.pyc,, +diffusers/pipelines/controlnet/__pycache__/pipeline_controlnet_blip_diffusion.cpython-312.pyc,, +diffusers/pipelines/controlnet/__pycache__/pipeline_controlnet_img2img.cpython-312.pyc,, +diffusers/pipelines/controlnet/__pycache__/pipeline_controlnet_inpaint.cpython-312.pyc,, +diffusers/pipelines/controlnet/__pycache__/pipeline_controlnet_inpaint_sd_xl.cpython-312.pyc,, +diffusers/pipelines/controlnet/__pycache__/pipeline_controlnet_sd_xl.cpython-312.pyc,, +diffusers/pipelines/controlnet/__pycache__/pipeline_controlnet_sd_xl_img2img.cpython-312.pyc,, +diffusers/pipelines/controlnet/__pycache__/pipeline_controlnet_union_inpaint_sd_xl.cpython-312.pyc,, +diffusers/pipelines/controlnet/__pycache__/pipeline_controlnet_union_sd_xl.cpython-312.pyc,, +diffusers/pipelines/controlnet/__pycache__/pipeline_controlnet_union_sd_xl_img2img.cpython-312.pyc,, +diffusers/pipelines/controlnet/__pycache__/pipeline_flax_controlnet.cpython-312.pyc,, +diffusers/pipelines/controlnet/multicontrolnet.py,sha256=-sluVPEM3oDV_dscvtnklZf3S_KsbRK98-V1fwGKUtg,684 +diffusers/pipelines/controlnet/pipeline_controlnet.py,sha256=nFjx7mqoO_yIagELJmBZtyDF58W6Eg5cwnvq0oZvwDY,69252 +diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py,sha256=iKHQlwaloO7CVaCTE9uhexews8hZfNkRnS57hugQN-I,17608 +diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py,sha256=YCpHswGttgFvYsxj3EufmkBgwBgoU8lYdWdQSRFc7cA,67435 +diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py,sha256=1PNlhUhVTZ6q4P8sb15nmHt_r8V68U1k5oATXou5QTk,76484 +diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py,sha256=ckYhiDY-FtYhRe3txc2jM4KF6ivRH5ccGAa1sq8YKlk,94650 +diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py,sha256=s2BfuoXUNDObfcEW3pBX__GVOCxMroSzdx6JL6kO2NM,82256 +diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py,sha256=ObgVWTYwwsnKizCrtQJkXb38vlZ3hEymHG7CefHiI1Y,86665 +diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py,sha256=qs7CyzcxYLlIiLqWPoyxhnam_GR9ZliXxmBGX2N4QD4,97065 +diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py,sha256=2hu4H0n_Pn4SR4AWsavKtVoOEjzOAjuaeT0FuwHOY88,82904 +diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py,sha256=WY3uC3YYneuvm8wUN7UEFW1Zm40qSCejyEnckMyA7_8,88197 +diffusers/pipelines/controlnet/pipeline_flax_controlnet.py,sha256=wabt3Rngu7ti6VbkVWn7LSN0NymQr-qk6ie-5mIEMqw,22603 +diffusers/pipelines/controlnet_hunyuandit/__init__.py,sha256=LvB-TNhPTnUIdinVZfxzUX40RFWvNWxrjAzsDDiLBfM,1344 +diffusers/pipelines/controlnet_hunyuandit/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/controlnet_hunyuandit/__pycache__/pipeline_hunyuandit_controlnet.cpython-312.pyc,, +diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py,sha256=ZzS9Fsnk9OzYVihfNvvLr07WwtGIMRni9CLp9PA7Hkk,50754 +diffusers/pipelines/controlnet_sd3/__init__.py,sha256=_-t5_Jac1hvUKbjACSwVDVWx1lFIBQDCeCE9CVMbsW0,1903 +diffusers/pipelines/controlnet_sd3/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/controlnet_sd3/__pycache__/pipeline_stable_diffusion_3_controlnet.cpython-312.pyc,, +diffusers/pipelines/controlnet_sd3/__pycache__/pipeline_stable_diffusion_3_controlnet_inpainting.cpython-312.pyc,, +diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py,sha256=Peus7D2kj_mLbjxMRiBwErVNf1euoneHMPp3OXwgbvg,62475 +diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py,sha256=x9YvT1sllQYak86vzbCwZ7aLB06apin0QYH6Yxy7MAg,70363 +diffusers/pipelines/controlnet_xs/__init__.py,sha256=TuIgTKgY4MVB6zaoNTduQAEVRsNptBZQZhnxxQ3hpyg,2403 +diffusers/pipelines/controlnet_xs/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/controlnet_xs/__pycache__/pipeline_controlnet_xs.cpython-312.pyc,, +diffusers/pipelines/controlnet_xs/__pycache__/pipeline_controlnet_xs_sd_xl.cpython-312.pyc,, +diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py,sha256=jBYVQQuVA3vgzQf5VSJPGvcZvkHiaCpfXq5UV8G2yQQ,45920 +diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py,sha256=1hTzlkpSEfnHMQ3pTB4bO2irb9RGGhwkV07z-h4mqOI,56763 +diffusers/pipelines/cosmos/__init__.py,sha256=W21LehITKypbEDaJSvdObf7hBtOvhLtJOMa12pfdCag,2203 +diffusers/pipelines/cosmos/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/cosmos/__pycache__/pipeline_cosmos2_5_predict.cpython-312.pyc,, +diffusers/pipelines/cosmos/__pycache__/pipeline_cosmos2_5_transfer.cpython-312.pyc,, +diffusers/pipelines/cosmos/__pycache__/pipeline_cosmos2_text2image.cpython-312.pyc,, +diffusers/pipelines/cosmos/__pycache__/pipeline_cosmos2_video2world.cpython-312.pyc,, +diffusers/pipelines/cosmos/__pycache__/pipeline_cosmos_text2world.cpython-312.pyc,, +diffusers/pipelines/cosmos/__pycache__/pipeline_cosmos_video2world.cpython-312.pyc,, +diffusers/pipelines/cosmos/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/cosmos/pipeline_cosmos2_5_predict.py,sha256=V3Caaou8RtYZUvzP1hAg2kgFfA5psMtCoV5YnuqcLno,43314 +diffusers/pipelines/cosmos/pipeline_cosmos2_5_transfer.py,sha256=LcfSPFhpNbIr3r35sZLmqbR0YXyR-fVICurv41qD8tA,50293 +diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py,sha256=repYdgme_23vK-NT9saHZ5qEN5Djxo2F3zl234kboG0,33447 +diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py,sha256=5AQ40g70xPd5rVNx4sv_f_DGTkYf-Qj_U09zFHhLFkM,39766 +diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py,sha256=WlScX5eNRwJggqJC06LG2VlbiB9oUAxR4s3YwKKiYY8,32124 +diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py,sha256=wkbV-wodmTqG0ECP3DCkPHxx1pYfUVVsdNy9cYOBVZw,40465 +diffusers/pipelines/cosmos/pipeline_output.py,sha256=QCRm2-0K2vgIs0o-oD8H2PHqPTk6hUfZm5qPu5lavwc,1177 +diffusers/pipelines/dance_diffusion/__init__.py,sha256=SOwr8mpuw34oKEUuy4uVLlhjfHuLRCP0kpMjoSPXADU,453 +diffusers/pipelines/dance_diffusion/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/dance_diffusion/__pycache__/pipeline_dance_diffusion.cpython-312.pyc,, +diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py,sha256=2nKGaK9nwCBZjFUrdlaG-jvjpFelyQAWjowui5ATW4o,6665 +diffusers/pipelines/ddim/__init__.py,sha256=-zCVlqBSKWZdwY5HSsoiRT4nUEuT6dckiD_KIFen3bs,411 +diffusers/pipelines/ddim/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/ddim/__pycache__/pipeline_ddim.cpython-312.pyc,, +diffusers/pipelines/ddim/pipeline_ddim.py,sha256=XwPZTJtaGIveNWOGzr9T7Fv4rgJkXNy6D_yzNxIrrUw,6840 +diffusers/pipelines/ddpm/__init__.py,sha256=DAj0i0-iba7KACShx0bzGa9gqAV7yxGgf9sy_Hf095Q,425 +diffusers/pipelines/ddpm/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/ddpm/__pycache__/pipeline_ddpm.cpython-312.pyc,, +diffusers/pipelines/ddpm/pipeline_ddpm.py,sha256=a28rTwK2iPHp0-YXied7cFWdypqh6r80qkQdHWTMBA4,5277 +diffusers/pipelines/deepfloyd_if/__init__.py,sha256=gh1fQ5u6q0d-o3XGExCGD0jPaUK-gWCturfHU-TYIi8,2975 +diffusers/pipelines/deepfloyd_if/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_if.cpython-312.pyc,, +diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_if_img2img.cpython-312.pyc,, +diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_if_img2img_superresolution.cpython-312.pyc,, +diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_if_inpainting.cpython-312.pyc,, +diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_if_inpainting_superresolution.cpython-312.pyc,, +diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_if_superresolution.cpython-312.pyc,, +diffusers/pipelines/deepfloyd_if/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/deepfloyd_if/__pycache__/safety_checker.cpython-312.pyc,, +diffusers/pipelines/deepfloyd_if/__pycache__/timesteps.cpython-312.pyc,, +diffusers/pipelines/deepfloyd_if/__pycache__/watermark.cpython-312.pyc,, +diffusers/pipelines/deepfloyd_if/pipeline_if.py,sha256=febvPrctHID7DxqkmHc0QyClKphXcUV4iBLhrwQHHUU,35505 +diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py,sha256=qVpg2irPZr509fqB2US0xL0EwTrachev8K2cEVHF128,39936 +diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py,sha256=J27_mUorbSphVzQan5d3Es3Zn3Rlkz-ZDRetn9CaF70,44971 +diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py,sha256=eL63-x9oCQPutgzeAEOlhRf9h-MmFxJ2q4rrTqN38Oo,45150 +diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py,sha256=-05tWhzEa9B7tG8-PAaayZ91Bu2op05Nvnwr_ZhWoks,49984 +diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py,sha256=WO7vX7jHDlV0P6WudhT-2yXhti3iwoLQh6XAz4BgR44,39797 +diffusers/pipelines/deepfloyd_if/pipeline_output.py,sha256=F_t9ieFpQyvvZOYXT61HqJ_cWhPfZZ-mDHTxkQ8BPnA,1092 +diffusers/pipelines/deepfloyd_if/safety_checker.py,sha256=zqN0z4Mvf7AtrxlUb6qAoiw_QuxGdDk-6js5YuarxTo,2117 +diffusers/pipelines/deepfloyd_if/timesteps.py,sha256=JO8b-8zlcvk_Tb6s6GGY7MgRPRADs35y0KBcSkqmNDM,5164 +diffusers/pipelines/deepfloyd_if/watermark.py,sha256=bKDkcbG5SE5XZIaQFyguXSrfABGm7MmTtlxGpIrFGz4,1576 +diffusers/pipelines/deprecated/__init__.py,sha256=mXBnea22TkkUdiGxUpZDXTSb1RlURczuRcGeIzn9DcQ,5470 +diffusers/pipelines/deprecated/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/deprecated/alt_diffusion/__init__.py,sha256=1SiGoNJytgnMwGmR48q8erVnU9JP5uz5E6XgHvlFDTc,1783 +diffusers/pipelines/deprecated/alt_diffusion/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/deprecated/alt_diffusion/__pycache__/modeling_roberta_series.cpython-312.pyc,, +diffusers/pipelines/deprecated/alt_diffusion/__pycache__/pipeline_alt_diffusion.cpython-312.pyc,, +diffusers/pipelines/deprecated/alt_diffusion/__pycache__/pipeline_alt_diffusion_img2img.cpython-312.pyc,, +diffusers/pipelines/deprecated/alt_diffusion/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py,sha256=_S0O-OuWm_qfp2IepXlTETRYMoxL2fYSt4BSUVVwuFA,5453 +diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py,sha256=ozKoTDHS-JoKCh1iLIVyiH-CX2F9r-qdIyJoyd2Fg7o,50164 +diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py,sha256=uMt8SJRN36sqAK1iM_Ht5F8u5Wd-u3wJIPgXm1h7cy4,52884 +diffusers/pipelines/deprecated/alt_diffusion/pipeline_output.py,sha256=WI8smxV9jplOL1M7eh4qKHBtnnkEISVpgGySD_R3JB4,878 +diffusers/pipelines/deprecated/audio_diffusion/__init__.py,sha256=SiFqPmeNbqOYTwuTx2WUaMIpMzgSnJ2SZ_97tIDryOE,507 +diffusers/pipelines/deprecated/audio_diffusion/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/deprecated/audio_diffusion/__pycache__/mel.cpython-312.pyc,, +diffusers/pipelines/deprecated/audio_diffusion/__pycache__/pipeline_audio_diffusion.cpython-312.pyc,, +diffusers/pipelines/deprecated/audio_diffusion/mel.py,sha256=BjN1eYahKBBWn3FTSlHpn5VqbC1pym253k7u8WfucXw,5764 +diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py,sha256=sEkz9jCIz0kDbk7a9v9l66isEQQGm_MIln6j1vWko8I,13160 +diffusers/pipelines/deprecated/latent_diffusion_uncond/__init__.py,sha256=ZWWt671s-zbWawgtJNoIstZsvOE5ucP2M_vp7OMUMeM,448 +diffusers/pipelines/deprecated/latent_diffusion_uncond/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/deprecated/latent_diffusion_uncond/__pycache__/pipeline_latent_diffusion_uncond.cpython-312.pyc,, +diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py,sha256=2MR0vgtpvpC1voqnKugoVRuOTkiD4X-RYOtTnoOTutU,5315 +diffusers/pipelines/deprecated/pndm/__init__.py,sha256=R8RavcZ5QXU-fR4o4HT_xvypifWUcqRKF3bduCgieEI,412 +diffusers/pipelines/deprecated/pndm/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/deprecated/pndm/__pycache__/pipeline_pndm.cpython-312.pyc,, +diffusers/pipelines/deprecated/pndm/pipeline_pndm.py,sha256=rt477EvRfzxamdDHcylvykLdCEN9tl_1mBfMjJvUqp0,4595 +diffusers/pipelines/deprecated/repaint/__init__.py,sha256=mlHI_qG20VS7yuags8W0HXpbHkZgObu-jUBuYnOfffo,425 +diffusers/pipelines/deprecated/repaint/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/deprecated/repaint/__pycache__/pipeline_repaint.cpython-312.pyc,, +diffusers/pipelines/deprecated/repaint/pipeline_repaint.py,sha256=4Kt9YFLug3vfgXyb-EF7JpSWKTi_wrk-GXfat6Qp8VU,10005 +diffusers/pipelines/deprecated/score_sde_ve/__init__.py,sha256=7CLXxU1JqmMFbdm0bLwCHxGUjGJFvS64xueOQdD2X7s,441 +diffusers/pipelines/deprecated/score_sde_ve/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/deprecated/score_sde_ve/__pycache__/pipeline_score_sde_ve.cpython-312.pyc,, +diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py,sha256=WkTAj9WYVhwthggTScnCMDacfATTSElhjahh0smiXIA,4323 +diffusers/pipelines/deprecated/spectrogram_diffusion/__init__.py,sha256=lOJEU-CHJhv0N2BCEM9-dzKmm1Y-HPt1FuF9lGBgIpg,2588 +diffusers/pipelines/deprecated/spectrogram_diffusion/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/deprecated/spectrogram_diffusion/__pycache__/continuous_encoder.cpython-312.pyc,, +diffusers/pipelines/deprecated/spectrogram_diffusion/__pycache__/midi_utils.cpython-312.pyc,, +diffusers/pipelines/deprecated/spectrogram_diffusion/__pycache__/notes_encoder.cpython-312.pyc,, +diffusers/pipelines/deprecated/spectrogram_diffusion/__pycache__/pipeline_spectrogram_diffusion.cpython-312.pyc,, +diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py,sha256=7hqiAFb5gYob4MPvNMDaDJPX1IlJNlHzHRfnC52yhGQ,3100 +diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py,sha256=f-n8HWh2e4JvAGVgnUKgYW5GwJTSCZbTEOyvjh-hpWg,25046 +diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py,sha256=1dphTvTIpodYsic9Rn9DpX9MOchZW_PjyS626MIWw1A,2923 +diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py,sha256=FBLynLfcNwSsmzpxU7jlq21D4tpI9ngVPmpRe6oX7Gg,11486 +diffusers/pipelines/deprecated/stable_diffusion_variants/__init__.py,sha256=mnIQupN59oc3JmKGaQZia7MO92E08wswJrP9QITzWQs,2111 +diffusers/pipelines/deprecated/stable_diffusion_variants/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/deprecated/stable_diffusion_variants/__pycache__/pipeline_cycle_diffusion.cpython-312.pyc,, +diffusers/pipelines/deprecated/stable_diffusion_variants/__pycache__/pipeline_onnx_stable_diffusion_inpaint_legacy.cpython-312.pyc,, +diffusers/pipelines/deprecated/stable_diffusion_variants/__pycache__/pipeline_stable_diffusion_inpaint_legacy.cpython-312.pyc,, +diffusers/pipelines/deprecated/stable_diffusion_variants/__pycache__/pipeline_stable_diffusion_model_editing.cpython-312.pyc,, +diffusers/pipelines/deprecated/stable_diffusion_variants/__pycache__/pipeline_stable_diffusion_paradigms.cpython-312.pyc,, +diffusers/pipelines/deprecated/stable_diffusion_variants/__pycache__/pipeline_stable_diffusion_pix2pix_zero.cpython-312.pyc,, +diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py,sha256=hkYHu6GZRoxrazf_1ZeKUOd-eyoVqSRvRR7XBdXTRlo,47970 +diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py,sha256=AfcKja4LidF22nDdW8hVI1kWVS9Y3dGdEZbxth-5Wic,27761 +diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py,sha256=9wvl2QXDzRJ9kbmH3rKzq9_E9JfY6jem-3Jt21xr0G8,42488 +diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py,sha256=33R8hwSFECMaKrDAN-QR6ZnCCVBgMtTkowDxgppYMcQ,41439 +diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py,sha256=FDJaWTa-wnNaYp7_aU690zJkzI7bK8dsXYt_wPTPxFU,41184 +diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py,sha256=pk8q70gxUoU2ljDjIhtr-5ZR86Ew1lwPesyL4I7Xx1I,63438 +diffusers/pipelines/deprecated/stochastic_karras_ve/__init__.py,sha256=WOKqWaBgVgNkDUUf4ZL1--TauXKeaPqtGf3P2fTFYMw,453 +diffusers/pipelines/deprecated/stochastic_karras_ve/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/deprecated/stochastic_karras_ve/__pycache__/pipeline_stochastic_karras_ve.cpython-312.pyc,, +diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py,sha256=wSyt6Uf_2b2RCBb2yyZW1gVn0boTL-NttQhAVYDASLk,5210 +diffusers/pipelines/deprecated/versatile_diffusion/__init__.py,sha256=_CRp2PIJD6loFlES3hMcPigZNOUMf2OgTaRFgoit7hc,2838 +diffusers/pipelines/deprecated/versatile_diffusion/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/deprecated/versatile_diffusion/__pycache__/modeling_text_unet.cpython-312.pyc,, +diffusers/pipelines/deprecated/versatile_diffusion/__pycache__/pipeline_versatile_diffusion.cpython-312.pyc,, +diffusers/pipelines/deprecated/versatile_diffusion/__pycache__/pipeline_versatile_diffusion_dual_guided.cpython-312.pyc,, +diffusers/pipelines/deprecated/versatile_diffusion/__pycache__/pipeline_versatile_diffusion_image_variation.cpython-312.pyc,, +diffusers/pipelines/deprecated/versatile_diffusion/__pycache__/pipeline_versatile_diffusion_text_to_image.cpython-312.pyc,, +diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py,sha256=jHcjb8HF5EVThqokWJriaresHItuo5ePLoeWWZKVFyI,112669 +diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py,sha256=YXX1fikPViW3LFbzAI3Ylkvf283Pk1DPTMvWh_t1DUg,21779 +diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py,sha256=SWemYFFkMFuaOLekxgAEfvWGDNoeAQAtlFTdMa6XIOY,27159 +diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py,sha256=WtOtloOh-dVhSyYC0ZVsVFpRhKfo70JkBo9HxfcbkLY,19648 +diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py,sha256=703gWYvolAieow2bm5F4bFjRYQsiz07DSxsAGSxtNQg,22851 +diffusers/pipelines/deprecated/vq_diffusion/__init__.py,sha256=CD0X20a3_61pBaOzDxgU_33PLjxN1W8V46TCAwykUgE,1650 +diffusers/pipelines/deprecated/vq_diffusion/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/deprecated/vq_diffusion/__pycache__/pipeline_vq_diffusion.cpython-312.pyc,, +diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py,sha256=8Q69le4NHYPdwyQyqBF6dS7IuRIgRxq-b-8KIQ83mIA,15378 +diffusers/pipelines/dit/__init__.py,sha256=w6yUFMbGzaUGPKpLfEfvHlYmrKD0UErczwsHDaDtLuQ,408 +diffusers/pipelines/dit/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/dit/__pycache__/pipeline_dit.cpython-312.pyc,, +diffusers/pipelines/dit/pipeline_dit.py,sha256=VbfpM3_jVRRg_FHSWCRGH4LcAciMWFHPLmqakyVgdGQ,10394 +diffusers/pipelines/easyanimate/__init__.py,sha256=CeJHlQus6mhlN2rmFk1LA44ygm4jYJVEyMYWMckcxZI,1634 +diffusers/pipelines/easyanimate/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/easyanimate/__pycache__/pipeline_easyanimate.cpython-312.pyc,, +diffusers/pipelines/easyanimate/__pycache__/pipeline_easyanimate_control.cpython-312.pyc,, +diffusers/pipelines/easyanimate/__pycache__/pipeline_easyanimate_inpaint.cpython-312.pyc,, +diffusers/pipelines/easyanimate/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/easyanimate/pipeline_easyanimate.py,sha256=NOdLhHCFXzg7JTn4C8k-T54hw3k18yXjHSzjj-4xg2I,35864 +diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py,sha256=kqIMzPOeFmsnUbCETkjevO5I7KS2Wjl0Nzs_WJh-jow,46071 +diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py,sha256=e8WKl-P8fpVFitNTlIkJxF68JAkZ6UYFBem-awxXPs4,58460 +diffusers/pipelines/easyanimate/pipeline_output.py,sha256=45l9nWtgZDe_NhLy4mArgy5nPEY5lt8VegrpY2wGl-U,621 +diffusers/pipelines/flux/__init__.py,sha256=heCaE43tK4Ss6WrvDTWtglW9rLHf2AGzPqVpxGcQTH4,3445 +diffusers/pipelines/flux/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/flux/__pycache__/modeling_flux.cpython-312.pyc,, +diffusers/pipelines/flux/__pycache__/pipeline_flux.cpython-312.pyc,, +diffusers/pipelines/flux/__pycache__/pipeline_flux_control.cpython-312.pyc,, +diffusers/pipelines/flux/__pycache__/pipeline_flux_control_img2img.cpython-312.pyc,, +diffusers/pipelines/flux/__pycache__/pipeline_flux_control_inpaint.cpython-312.pyc,, +diffusers/pipelines/flux/__pycache__/pipeline_flux_controlnet.cpython-312.pyc,, +diffusers/pipelines/flux/__pycache__/pipeline_flux_controlnet_image_to_image.cpython-312.pyc,, +diffusers/pipelines/flux/__pycache__/pipeline_flux_controlnet_inpainting.cpython-312.pyc,, +diffusers/pipelines/flux/__pycache__/pipeline_flux_fill.cpython-312.pyc,, +diffusers/pipelines/flux/__pycache__/pipeline_flux_img2img.cpython-312.pyc,, +diffusers/pipelines/flux/__pycache__/pipeline_flux_inpaint.cpython-312.pyc,, +diffusers/pipelines/flux/__pycache__/pipeline_flux_kontext.cpython-312.pyc,, +diffusers/pipelines/flux/__pycache__/pipeline_flux_kontext_inpaint.cpython-312.pyc,, +diffusers/pipelines/flux/__pycache__/pipeline_flux_prior_redux.cpython-312.pyc,, +diffusers/pipelines/flux/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/flux/modeling_flux.py,sha256=Aux3Jrjd0yj5Qk414ofAViBgVGA_VFLsS0saCSQRf6Q,1518 +diffusers/pipelines/flux/pipeline_flux.py,sha256=Nue7-rhcvSpImeQ4XzcwGTUJZ7igjXkeS4YQVrQMFLs,48755 +diffusers/pipelines/flux/pipeline_flux_control.py,sha256=fSrat2UcRSHVH16Sp8ckaYd64AznkHCHaRjJyTxzC-E,41823 +diffusers/pipelines/flux/pipeline_flux_control_img2img.py,sha256=HDWzU1hIi5OmMlzdI_5rg_6yyRL56SrvoyHiK8cAlVU,44542 +diffusers/pipelines/flux/pipeline_flux_control_inpaint.py,sha256=z1orbNiP8YLGlfQQAPYr6Cyp1rg5cNTi1X99yN5Wq24,54883 +diffusers/pipelines/flux/pipeline_flux_controlnet.py,sha256=ZavXBqpNHvJFeS_wAUZgkBPYtZtxQmt50be4j4w8YDA,57827 +diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py,sha256=RqokoqMxpQh-Djq46FtZkmWryXVYqq5mG87CDLd99jQ,46442 +diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py,sha256=1_PLsO3uih1q5LzDf5tCTheqyyS9bAORYmGZTcVQTqU,55467 +diffusers/pipelines/flux/pipeline_flux_fill.py,sha256=FprE9ll2Mmo1LLwVORRBqbHG-TBldm0cNaORG6Bxd1M,50616 +diffusers/pipelines/flux/pipeline_flux_img2img.py,sha256=DKofRI2rQjXpxIOcUcMcJolTepYKVzYXT5x1ktJAd_A,53451 +diffusers/pipelines/flux/pipeline_flux_inpaint.py,sha256=U7fTjO8fFuFh24qyXSV1E-AutYQP_5Iz51P9JMoiuw4,59186 +diffusers/pipelines/flux/pipeline_flux_kontext.py,sha256=v36gU_JQRwVC5fAmB0aC-P3zNXDtLRENBx8_4joZxWw,56121 +diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py,sha256=9e9Awtbp2634ato-aShbXfUq52qdXR6kg5oCsiWn12Q,72860 +diffusers/pipelines/flux/pipeline_flux_prior_redux.py,sha256=WO93S6ju0bwGODpE-yBPGnY9UlJoOdUmSdJ-0Gh3ZWQ,21696 +diffusers/pipelines/flux/pipeline_output.py,sha256=XffniwI1Qh7X6_Blgs4JpUNlt7fT8BKUQlniCLgtrgk,1246 +diffusers/pipelines/flux2/__init__.py,sha256=1CCHYHNJUGRqPdbln_PPbr8Mz6ZWQdfaFEy0wqngKCo,1594 +diffusers/pipelines/flux2/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/flux2/__pycache__/image_processor.cpython-312.pyc,, +diffusers/pipelines/flux2/__pycache__/pipeline_flux2.cpython-312.pyc,, +diffusers/pipelines/flux2/__pycache__/pipeline_flux2_klein.cpython-312.pyc,, +diffusers/pipelines/flux2/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/flux2/__pycache__/system_messages.cpython-312.pyc,, +diffusers/pipelines/flux2/image_processor.py,sha256=FV5RlPhWteT6z5ucbLJp53UcrfsJHbMsyHgWuQrOYlU,6552 +diffusers/pipelines/flux2/pipeline_flux2.py,sha256=MpYNANl4edHmqFLNaXdVBRtidh_rPKqITU1PhMPA8Xw,44103 +diffusers/pipelines/flux2/pipeline_flux2_klein.py,sha256=5iVGBEnaUYGbqUDbKWcwalVHKLBXJ1cZ-k5tZ1kvRMA,40755 +diffusers/pipelines/flux2/pipeline_output.py,sha256=POB74wmVrae_eIwV60YmC3WOuganWDXsrBEv6uqxe94,743 +diffusers/pipelines/flux2/system_messages.py,sha256=igB95rZpZPhPTRXgowiOTnqoadYqNZg-q8iwFqyX53A,1904 +diffusers/pipelines/free_init_utils.py,sha256=2BMSzPZXFMvw2uKp5dOOGcW0PJDvpizYPn_tdQTh3AY,7661 +diffusers/pipelines/free_noise_utils.py,sha256=WzKjuwHoBVQxIOiRLHQ_VI5bvzDJsLIdcv-cqHhbako,29596 +diffusers/pipelines/glm_image/__init__.py,sha256=FpjRQim2Cj3sIp_Y2HEneLDzB_qX5jhMuRDt9-a0N5Q,1967 +diffusers/pipelines/glm_image/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/glm_image/__pycache__/pipeline_glm_image.cpython-312.pyc,, +diffusers/pipelines/glm_image/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/glm_image/pipeline_glm_image.py,sha256=nRelXlxnWHRdtvIoKBQwEPgER7012P5qMyMAcS7Blyw,47796 +diffusers/pipelines/glm_image/pipeline_output.py,sha256=5i1thFPEzaom6XzPQhtmkzUdTjYgJvGAmQtelSauGqQ,557 +diffusers/pipelines/helios/__init__.py,sha256=WcBeSHOMrzjDkPOu7-OYZmrKWXxZ9WNqVPcLqgJ84-E,1428 +diffusers/pipelines/helios/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/helios/__pycache__/pipeline_helios.cpython-312.pyc,, +diffusers/pipelines/helios/__pycache__/pipeline_helios_pyramid.cpython-312.pyc,, +diffusers/pipelines/helios/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/helios/pipeline_helios.py,sha256=XTwo3xVw1Wzov1p1wiXMF4xyHSTo68_ztuFKZcxZhMs,42871 +diffusers/pipelines/helios/pipeline_helios_pyramid.py,sha256=UCE1k8YjkP7AI4h025rTsB3DFakwLZeOfotigsP0pgM,50440 +diffusers/pipelines/helios/pipeline_output.py,sha256=mFtnDLsCSYa-GaDTAyROjTFzeNJQGmVtkgXWO-7OLos,611 +diffusers/pipelines/hidream_image/__init__.py,sha256=SeMI0Ae_K8guNBe8doZIX_vJndXz6jWFWhCqtH1s5I0,1499 +diffusers/pipelines/hidream_image/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/hidream_image/__pycache__/pipeline_hidream_image.cpython-312.pyc,, +diffusers/pipelines/hidream_image/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/hidream_image/pipeline_hidream_image.py,sha256=ZGsxDipdT187BpPe88jMW8L45kbjYjcagcfDdq49Jno,52375 +diffusers/pipelines/hidream_image/pipeline_output.py,sha256=rOKNTvqXu_Oe_vBhZpJ6RRQL3CuHKO9fX71jd6MnEpU,1192 +diffusers/pipelines/hunyuan_image/__init__.py,sha256=B-oIcGS8g8UsudBZ_NXICZyDJS9qFsQB9igL0KKHk74,1478 +diffusers/pipelines/hunyuan_image/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/hunyuan_image/__pycache__/pipeline_hunyuanimage.cpython-312.pyc,, +diffusers/pipelines/hunyuan_image/__pycache__/pipeline_hunyuanimage_refiner.cpython-312.pyc,, +diffusers/pipelines/hunyuan_image/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/hunyuan_image/pipeline_hunyuanimage.py,sha256=4lezLoHqK1TF-ORJ7_Dc129N7ic_WB_9a_3XryjnWEo,42885 +diffusers/pipelines/hunyuan_image/pipeline_hunyuanimage_refiner.py,sha256=ed6Xc4ipapcnzAV58Hl9-EGbp7muyWsqgGBkqxTGxm8,37157 +diffusers/pipelines/hunyuan_image/pipeline_output.py,sha256=TbLrNttRZQKkIUSqRJzAf_NNFt_bFL-s2FkhBhOX0Ig,564 +diffusers/pipelines/hunyuan_video/__init__.py,sha256=2IhKqUmvwUZpkBWZI1HBbsq15U-_YEC3aSwiNDMogDE,1878 +diffusers/pipelines/hunyuan_video/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/hunyuan_video/__pycache__/pipeline_hunyuan_skyreels_image2video.cpython-312.pyc,, +diffusers/pipelines/hunyuan_video/__pycache__/pipeline_hunyuan_video.cpython-312.pyc,, +diffusers/pipelines/hunyuan_video/__pycache__/pipeline_hunyuan_video_framepack.cpython-312.pyc,, +diffusers/pipelines/hunyuan_video/__pycache__/pipeline_hunyuan_video_image2video.cpython-312.pyc,, +diffusers/pipelines/hunyuan_video/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py,sha256=Eo_nI8UIad8kby4hnO7mfBW4ePD27WtgU0zXQ5nwU0w,39988 +diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py,sha256=BKKX9SWFYH3lg80oWtnqZm-AVSTYyiDAP1yIEgCplVE,36748 +diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py,sha256=wO-nz3t4G8fz8cIjII46OlEIoBajZJ_C3fxjrSKLXj0,54943 +diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py,sha256=wijxH2A7O-4Aj6H6WADMVZ4AnrW_RXQCvOTZA0hr5FQ,47336 +diffusers/pipelines/hunyuan_video/pipeline_output.py,sha256=AV73vuIvrgAHkU-oEZKkygJ_W3kEYESLDndrRZftWmU,1382 +diffusers/pipelines/hunyuan_video1_5/__init__.py,sha256=G0f5dX_WkxVQqLDArcDEX_HJ1B_IDdPScTqnyGZKQp0,1520 +diffusers/pipelines/hunyuan_video1_5/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/hunyuan_video1_5/__pycache__/image_processor.cpython-312.pyc,, +diffusers/pipelines/hunyuan_video1_5/__pycache__/pipeline_hunyuan_video1_5.cpython-312.pyc,, +diffusers/pipelines/hunyuan_video1_5/__pycache__/pipeline_hunyuan_video1_5_image2video.cpython-312.pyc,, +diffusers/pipelines/hunyuan_video1_5/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/hunyuan_video1_5/image_processor.py,sha256=l8qoHagA7VQYfX9SWcxng0bRtJ1BECflZ1XimbW_ld8,4085 +diffusers/pipelines/hunyuan_video1_5/pipeline_hunyuan_video1_5.py,sha256=k4Hs9nyCaUF5r_81JrPPJXlofKYmOhZBuWs_DH0mSSE,39205 +diffusers/pipelines/hunyuan_video1_5/pipeline_hunyuan_video1_5_image2video.py,sha256=Lb6krFIBrmctiOxOyZuKETcEdNZAkWtI0YzKHeU5WjI,44610 +diffusers/pipelines/hunyuan_video1_5/pipeline_output.py,sha256=ihD3jVwm146aO79MFo96repRdjm0sWkQWYYEjpw-ppA,628 +diffusers/pipelines/hunyuandit/__init__.py,sha256=Zby0yEsLNAoa4cf6W92QXIzyGoijI54xXRVhmrHGHsc,1302 +diffusers/pipelines/hunyuandit/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/hunyuandit/__pycache__/pipeline_hunyuandit.cpython-312.pyc,, +diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py,sha256=WtuxGRMlCWjmSXAxCA00x_WQaicpT47l2yeK1x2gWtk,43236 +diffusers/pipelines/i2vgen_xl/__init__.py,sha256=5Stj50A-AIJ1pPhilpDRx1PARMs_n8OKTDl64cq0LAY,1307 +diffusers/pipelines/i2vgen_xl/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/i2vgen_xl/__pycache__/pipeline_i2vgen_xl.cpython-312.pyc,, +diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py,sha256=YjKS5TP7WjbyCIh3z8nlQaMpDQdw5gsxbhrNUVHIUi0,37306 +diffusers/pipelines/kandinsky/__init__.py,sha256=wrxuhSw_CunNhm7TdzA_fm__092mibGxp5_ep1boZmQ,2312 +diffusers/pipelines/kandinsky/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/kandinsky/__pycache__/pipeline_kandinsky.cpython-312.pyc,, +diffusers/pipelines/kandinsky/__pycache__/pipeline_kandinsky_combined.cpython-312.pyc,, +diffusers/pipelines/kandinsky/__pycache__/pipeline_kandinsky_img2img.cpython-312.pyc,, +diffusers/pipelines/kandinsky/__pycache__/pipeline_kandinsky_inpaint.cpython-312.pyc,, +diffusers/pipelines/kandinsky/__pycache__/pipeline_kandinsky_prior.cpython-312.pyc,, +diffusers/pipelines/kandinsky/__pycache__/text_encoder.cpython-312.pyc,, +diffusers/pipelines/kandinsky/pipeline_kandinsky.py,sha256=1RIjmE1O4i8n6RA_MhsFWDJV8YdJb7v2b-F8RgzpS2E,17817 +diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py,sha256=eOF98cOvaT--qy6pP92i2xBejDzEIfC5J-yoDHRk-wo,39407 +diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py,sha256=4iHiEs_sLXVbKFGOPY5O-sGQxWCEhZp8x_W1mkQ-jos,21846 +diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py,sha256=qvoul4MVwvTzUZaRtXYDngbYNHfp2yBJrTrq4_fyzSg,28651 +diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py,sha256=WgdYoahScxfTdNNS1KaCT_0l8leyMSM6fFc8a-P4DeE,23936 +diffusers/pipelines/kandinsky/text_encoder.py,sha256=T4RgjvWg2FIEWweF1QM3P3mQHgAfs7wFuQup5VTTq-4,1090 +diffusers/pipelines/kandinsky2_2/__init__.py,sha256=WeV8KWoCLj6KTvJ-f3Do87IoX_dR_AZNylBz7_Iu87s,2796 +diffusers/pipelines/kandinsky2_2/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/kandinsky2_2/__pycache__/pipeline_kandinsky2_2.cpython-312.pyc,, +diffusers/pipelines/kandinsky2_2/__pycache__/pipeline_kandinsky2_2_combined.cpython-312.pyc,, +diffusers/pipelines/kandinsky2_2/__pycache__/pipeline_kandinsky2_2_controlnet.cpython-312.pyc,, +diffusers/pipelines/kandinsky2_2/__pycache__/pipeline_kandinsky2_2_controlnet_img2img.cpython-312.pyc,, +diffusers/pipelines/kandinsky2_2/__pycache__/pipeline_kandinsky2_2_img2img.cpython-312.pyc,, +diffusers/pipelines/kandinsky2_2/__pycache__/pipeline_kandinsky2_2_inpainting.cpython-312.pyc,, +diffusers/pipelines/kandinsky2_2/__pycache__/pipeline_kandinsky2_2_prior.cpython-312.pyc,, +diffusers/pipelines/kandinsky2_2/__pycache__/pipeline_kandinsky2_2_prior_emb2emb.cpython-312.pyc,, +diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py,sha256=9FCkoyFo-OBP9p66qCuhFYs1mU6_g5vZMjX1XlcYr2U,14307 +diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py,sha256=qgx8EX-P636YbYsFxkJAKLV7MbJYTgn-VnjAI1mC-Bo,43992 +diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py,sha256=PZPQ_pZ-he7qRKRJkrIMgRQexZ6NqoDZWkefLb1cOWY,14289 +diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py,sha256=YbH9FTt-tl0aNx8L5at2xRiA6_s8ulE09_K-fsJHpWc,16795 +diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py,sha256=q2_U8IAIuNTLPFT7Fos_A5H74RYMFwRRHkLeDPmZ2KI,17101 +diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py,sha256=jt5jEOe7UOUUc-d7qbnBHctH2vPj0YXGhBjgivxaJK0,24938 +diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py,sha256=X5BDyiMX0eX09YLd4gSKTNOhMh-JpIJqCrzJg5EN-Pk,25538 +diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py,sha256=CxwFANSdfvOeQYgOQ_CGKlb19kOHTxtzMFqLDFR5QXY,25079 +diffusers/pipelines/kandinsky3/__init__.py,sha256=7Mv8Ov-XstHMLmRQU7psdheFn_e_qXJWWTYV7z7uj4U,1461 +diffusers/pipelines/kandinsky3/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/kandinsky3/__pycache__/convert_kandinsky3_unet.cpython-312.pyc,, +diffusers/pipelines/kandinsky3/__pycache__/pipeline_kandinsky3.cpython-312.pyc,, +diffusers/pipelines/kandinsky3/__pycache__/pipeline_kandinsky3_img2img.cpython-312.pyc,, +diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py,sha256=FJ8psagvZtQHJupm0hgMUI2mto3IHEXjaoLDXip1LMA,3273 +diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py,sha256=kzLK21B4xZcE8GooqtSICGsL3YBoMp2IobzV7nN5Wz8,27691 +diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py,sha256=P1ZkAXNPyNWI07zOmu6c6agE3HmTztg5HS4rlvVjkfc,30409 +diffusers/pipelines/kandinsky5/__init__.py,sha256=9jKyIiVlaYw1e2BbE5oNl5S-UKgAUGslSi2g3YPWFus,1732 +diffusers/pipelines/kandinsky5/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/kandinsky5/__pycache__/pipeline_kandinsky.cpython-312.pyc,, +diffusers/pipelines/kandinsky5/__pycache__/pipeline_kandinsky_i2i.cpython-312.pyc,, +diffusers/pipelines/kandinsky5/__pycache__/pipeline_kandinsky_i2v.cpython-312.pyc,, +diffusers/pipelines/kandinsky5/__pycache__/pipeline_kandinsky_t2i.cpython-312.pyc,, +diffusers/pipelines/kandinsky5/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/kandinsky5/pipeline_kandinsky.py,sha256=cl85_iJNHnKcBZ5_K2-Siw9SY9-RHLbBOtWB78iYx0o,40791 +diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2i.py,sha256=GHvmqE1imHqsIu0LlHyv6oR8Xwx2gULNt_q3_jieSDw,36576 +diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2v.py,sha256=efaRIoUZMuBdTV0ytlMrwqABv-l4_cCFtAEhJLYTgIo,44364 +diffusers/pipelines/kandinsky5/pipeline_kandinsky_t2i.py,sha256=B6oHs0XqPuPP1TZaMLRE5IMUS2O5BFQy4VuRpNn9kcA,34063 +diffusers/pipelines/kandinsky5/pipeline_output.py,sha256=1RgB2K81TP8-l7tv1rRClAVNTm3CK7eu8Jy4Quh04i4,1110 +diffusers/pipelines/kolors/__init__.py,sha256=6Xp5M_K6PfByqqnK1HuMD9RKLkOZYekeNNqrGk4HToM,1791 +diffusers/pipelines/kolors/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/kolors/__pycache__/pipeline_kolors.cpython-312.pyc,, +diffusers/pipelines/kolors/__pycache__/pipeline_kolors_img2img.cpython-312.pyc,, +diffusers/pipelines/kolors/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/kolors/__pycache__/text_encoder.cpython-312.pyc,, +diffusers/pipelines/kolors/__pycache__/tokenizer.cpython-312.pyc,, +diffusers/pipelines/kolors/pipeline_kolors.py,sha256=ePDqW3MYbR7Hn-Kt0mQY4CFwuB-j1r9GwP43MZN2Wcc,55509 +diffusers/pipelines/kolors/pipeline_kolors_img2img.py,sha256=fbY4PFa_r54buKC0ULR8x8cp8WcjdFjt4m9Y5yrDcB0,65381 +diffusers/pipelines/kolors/pipeline_output.py,sha256=4tDZE3D8Sa9Mkrpa8e9QW6VvWOBqhwK47fdF0ZJ2wyQ,553 +diffusers/pipelines/kolors/text_encoder.py,sha256=Zaj23nUNaX5PmWfim8DcxH6QbMLr6dS-LTjHra2CrT8,35075 +diffusers/pipelines/kolors/tokenizer.py,sha256=CdfLzVX9nX7vgGxTnTo2r5OlmZlGOWQOYd9Z8VzTJGo,13360 +diffusers/pipelines/latent_consistency_models/__init__.py,sha256=SfUylLTTBCs_wlGOPpW899lgE1E0GOLGu4GhDPFx-Ls,1560 +diffusers/pipelines/latent_consistency_models/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/latent_consistency_models/__pycache__/pipeline_latent_consistency_img2img.cpython-312.pyc,, +diffusers/pipelines/latent_consistency_models/__pycache__/pipeline_latent_consistency_text2img.cpython-312.pyc,, +diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py,sha256=hD2MM_mBwSH-UqM14ji4Elp3VRnYmPvL6IWkExDlADM,49561 +diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py,sha256=sgsbgtS73g6wdd31LZO7eBC9EyId9wcqRpT-IVlS5IM,46115 +diffusers/pipelines/latent_diffusion/__init__.py,sha256=iUkMRZY-pteRsvsROOz2Pacm7t02Q6QvbsgQedJt6-E,1542 +diffusers/pipelines/latent_diffusion/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/latent_diffusion/__pycache__/pipeline_latent_diffusion.cpython-312.pyc,, +diffusers/pipelines/latent_diffusion/__pycache__/pipeline_latent_diffusion_superresolution.cpython-312.pyc,, +diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py,sha256=RoOErAAj6EXbIl8wRK8hkcE0FI_b3M5zYeAPOUjaN4A,32416 +diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py,sha256=tga_1kWpcp82Il7KjnYJGo54ztQ68gcforniES5628g,8138 +diffusers/pipelines/latte/__init__.py,sha256=1XMhkoAvpw2akbDmMTsKJbTU4PsR9H6boq4FEhCGbwo,1282 +diffusers/pipelines/latte/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/latte/__pycache__/pipeline_latte.cpython-312.pyc,, +diffusers/pipelines/latte/pipeline_latte.py,sha256=Y3r1tyfEwkYpAdwvNqsF8tN4BjwQ-OwVZ5d0-FQeVdM,42687 +diffusers/pipelines/ledits_pp/__init__.py,sha256=3VaqGS1d39iC5flUifb4vAD_bDJ-sIUFaLIYhBuHbwE,1783 +diffusers/pipelines/ledits_pp/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/ledits_pp/__pycache__/pipeline_leditspp_stable_diffusion.cpython-312.pyc,, +diffusers/pipelines/ledits_pp/__pycache__/pipeline_leditspp_stable_diffusion_xl.cpython-312.pyc,, +diffusers/pipelines/ledits_pp/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py,sha256=CUx1hf0bhML-S_GALTuIKyOSLSPIoQDHQ4bq1LBYz5w,78547 +diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py,sha256=BG6ifUDhlT8H_S0G8GYf0-mkunSnZwMMD-UKpynTHJM,90139 +diffusers/pipelines/ledits_pp/pipeline_output.py,sha256=rIswvZkRhOzlbkii07r7ZnQO-DCaFkybGUIZ8a7vw8M,1517 +diffusers/pipelines/longcat_image/__init__.py,sha256=hR2F8edpx7tcBSyZMfsF2cUFsUP289BkbbD0dc5rS8s,1608 +diffusers/pipelines/longcat_image/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/longcat_image/__pycache__/pipeline_longcat_image.cpython-312.pyc,, +diffusers/pipelines/longcat_image/__pycache__/pipeline_longcat_image_edit.cpython-312.pyc,, +diffusers/pipelines/longcat_image/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/longcat_image/__pycache__/system_messages.cpython-312.pyc,, +diffusers/pipelines/longcat_image/pipeline_longcat_image.py,sha256=f31g0R46qx9_WqPDb1N94UuVCareKVvQO0D-0IqHeA4,28814 +diffusers/pipelines/longcat_image/pipeline_longcat_image_edit.py,sha256=ABpTWSre_GcJyKbiEcz2IsRPE_-Y2akl9YtsC1DIJcQ,31018 +diffusers/pipelines/longcat_image/pipeline_output.py,sha256=QXMz6fDAyAS29ces5GP_5nYl6xp0yZAIxjYRuZA7KXE,575 +diffusers/pipelines/longcat_image/system_messages.py,sha256=rDAHjAG3sLBhpZOZfsg_Dh4DR6ymqxWP-QdoJzyDZwc,19389 +diffusers/pipelines/ltx/__init__.py,sha256=vvYs1bpjOvEatZi399IVF9qBLkpIaZ_0sIZtng-EYuc,2060 +diffusers/pipelines/ltx/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/ltx/__pycache__/modeling_latent_upsampler.cpython-312.pyc,, +diffusers/pipelines/ltx/__pycache__/pipeline_ltx.cpython-312.pyc,, +diffusers/pipelines/ltx/__pycache__/pipeline_ltx_condition.cpython-312.pyc,, +diffusers/pipelines/ltx/__pycache__/pipeline_ltx_i2v_long_multi_prompt.cpython-312.pyc,, +diffusers/pipelines/ltx/__pycache__/pipeline_ltx_image2video.cpython-312.pyc,, +diffusers/pipelines/ltx/__pycache__/pipeline_ltx_latent_upsample.cpython-312.pyc,, +diffusers/pipelines/ltx/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/ltx/modeling_latent_upsampler.py,sha256=2K7uR3zjDo_67MxEQcmL5VNL7XuDxMfIkEG_8x2rbGs,7450 +diffusers/pipelines/ltx/pipeline_ltx.py,sha256=Kk1QDnDEQL9jPi_7TTn4h1jzlv4_YldnoBL9Q2MnmQQ,40939 +diffusers/pipelines/ltx/pipeline_ltx_condition.py,sha256=AM6M1wv9fF2pJTYQko6vJhmuLUqlkGpukx97seXEV2I,62418 +diffusers/pipelines/ltx/pipeline_ltx_i2v_long_multi_prompt.py,sha256=eO5Y8JEYZ5oNdDxFIuk-8lvYgL2g8jba5jfgXUwWoV0,67886 +diffusers/pipelines/ltx/pipeline_ltx_image2video.py,sha256=MeKd7DWt7V_gVhFVOAKrGGfEXGAztKu2qQUZbXHMFZI,45775 +diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py,sha256=0e4-Pg4gKnTk6VYff-qLugnUXL0eGaSUfLdKSp7PKvo,14871 +diffusers/pipelines/ltx/pipeline_output.py,sha256=Xeeld80KqF9OQFvo0LK7UVZBRU4SeVSFRew_Btmav-E,605 +diffusers/pipelines/ltx2/__init__.py,sha256=jB8PZifUHLXE_intL6GV0apehwtDCNKGn-WV3S3lefg,2082 +diffusers/pipelines/ltx2/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/ltx2/__pycache__/connectors.cpython-312.pyc,, +diffusers/pipelines/ltx2/__pycache__/export_utils.cpython-312.pyc,, +diffusers/pipelines/ltx2/__pycache__/latent_upsampler.cpython-312.pyc,, +diffusers/pipelines/ltx2/__pycache__/pipeline_ltx2.cpython-312.pyc,, +diffusers/pipelines/ltx2/__pycache__/pipeline_ltx2_condition.cpython-312.pyc,, +diffusers/pipelines/ltx2/__pycache__/pipeline_ltx2_image2video.cpython-312.pyc,, +diffusers/pipelines/ltx2/__pycache__/pipeline_ltx2_latent_upsample.cpython-312.pyc,, +diffusers/pipelines/ltx2/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/ltx2/__pycache__/utils.cpython-312.pyc,, +diffusers/pipelines/ltx2/__pycache__/vocoder.cpython-312.pyc,, +diffusers/pipelines/ltx2/connectors.py,sha256=gEu1pdPGFVrbY5KtlaHR_htNYwp3NdFulWSUWVpDUk0,13659 +diffusers/pipelines/ltx2/export_utils.py,sha256=9rq9g8fhmeLypBpErT2xiImbWrHKd2f_AhL4JnSYlvU,7110 +diffusers/pipelines/ltx2/latent_upsampler.py,sha256=7bmQ2QKwoeYbxwnNW0pY6__zu3IrwMhmTTQNO48vz1Q,11841 +diffusers/pipelines/ltx2/pipeline_ltx2.py,sha256=wctMLA1ziyedz2nMIXgu77cvSHTvT5Fj_ZW8EBvwHtU,61297 +diffusers/pipelines/ltx2/pipeline_ltx2_condition.py,sha256=zcKUJhT5rQVXXIVRghBGFeNY2epVMMrdScycejn0vmM,74457 +diffusers/pipelines/ltx2/pipeline_ltx2_image2video.py,sha256=xW49MXmZy1exyOHAwGBDceyC96fiJ7szNfC2Ucb4q3Y,65480 +diffusers/pipelines/ltx2/pipeline_ltx2_latent_upsample.py,sha256=LtaXc_wY8AieGK5aTSeb8-TVtbBIBbkooHmjzD4rRkw,20012 +diffusers/pipelines/ltx2/pipeline_output.py,sha256=ectGVg1MC7pBvlpUDOEvTCgT8OV6U9EvG-6vjT97WK4,693 +diffusers/pipelines/ltx2/utils.py,sha256=Qs9xsWUmgx0pg0qgRHRXPvNgvjkQNNRaXBxPFdgVD1E,400 +diffusers/pipelines/ltx2/vocoder.py,sha256=L04cyRZvXvw2lEBarjT4VUxz1mhBdoh_ROAb8OWmaG8,6322 +diffusers/pipelines/lucy/__init__.py,sha256=jEs1Bimkh0kzhNhbRzErDk8v2HyEwPV03H5SUune8C8,1295 +diffusers/pipelines/lucy/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/lucy/__pycache__/pipeline_lucy_edit.cpython-312.pyc,, +diffusers/pipelines/lucy/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/lucy/pipeline_lucy_edit.py,sha256=rggMBIrI9Fh3gPVToesDKQVVJaHjUCi5d9xusn_6Jc0,33435 +diffusers/pipelines/lucy/pipeline_output.py,sha256=s3IIUOz2gKkor9jVDnRzvNBThH9A4SI3O5WcYtENKmg,607 +diffusers/pipelines/lumina/__init__.py,sha256=AzWsnxikODkQnCxliBN7eDi83TxcSihxfehfLYxRPD4,1336 +diffusers/pipelines/lumina/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/lumina/__pycache__/pipeline_lumina.cpython-312.pyc,, +diffusers/pipelines/lumina/pipeline_lumina.py,sha256=yG2DRL3jQZN6V1VGnKlh3zOFd8zUAd7K-9Zo1pXEjZ8,44904 +diffusers/pipelines/lumina2/__init__.py,sha256=ZnlJglaTqtwptFJ0uelQ4MKg_p-Lxwzu7eQCd1CFxtc,1342 +diffusers/pipelines/lumina2/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/lumina2/__pycache__/pipeline_lumina2.cpython-312.pyc,, +diffusers/pipelines/lumina2/pipeline_lumina2.py,sha256=oYbtGwqz57hlBhcXEkBzlfBYN-hQD-ghSL3Flmf__i4,39701 +diffusers/pipelines/marigold/__init__.py,sha256=kAs3DZB4oxiYHqLOq9kgAvBCYw3ptpiQKGr01hM2BDQ,1926 +diffusers/pipelines/marigold/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/marigold/__pycache__/marigold_image_processing.cpython-312.pyc,, +diffusers/pipelines/marigold/__pycache__/pipeline_marigold_depth.cpython-312.pyc,, +diffusers/pipelines/marigold/__pycache__/pipeline_marigold_intrinsics.cpython-312.pyc,, +diffusers/pipelines/marigold/__pycache__/pipeline_marigold_normals.cpython-312.pyc,, +diffusers/pipelines/marigold/marigold_image_processing.py,sha256=K2bNW97MewBl0UoyDRJoAA69BBN3705w3hWxyCQIrDI,31060 +diffusers/pipelines/marigold/pipeline_marigold_depth.py,sha256=m3nvPEmubbO-hMMiqPUzyqtRWQSRqy-I4O1iiWWfIro,41001 +diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py,sha256=U8jfuSmuKfDI4vcWIFiHorUckE7hqr50K9GE_8GJqUY,35679 +diffusers/pipelines/marigold/pipeline_marigold_normals.py,sha256=KjPCz6QiIW5kAlEa9wOSUkKvUXYoIgHN4wGGF-CzTdU,34597 +diffusers/pipelines/mochi/__init__.py,sha256=8yDkp3YgOvbC4VhO4Tfin2myNxRlWiX1Mi8rY_UvAh4,1282 +diffusers/pipelines/mochi/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/mochi/__pycache__/pipeline_mochi.cpython-312.pyc,, +diffusers/pipelines/mochi/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/mochi/pipeline_mochi.py,sha256=eBrUEiNYqdOrMWPjtNgfmnPoCnkgyug87wpKZu9_xGQ,36884 +diffusers/pipelines/mochi/pipeline_output.py,sha256=ObNRbpladMy6Zm8GB6JzwjpSq9062jZb2TBr0kIR_yE,609 +diffusers/pipelines/musicldm/__init__.py,sha256=l1I5QzvTwMOOltJkcwpTb6nNcr93bWiP_ErHbDdwz6Y,1411 +diffusers/pipelines/musicldm/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/musicldm/__pycache__/pipeline_musicldm.cpython-312.pyc,, +diffusers/pipelines/musicldm/pipeline_musicldm.py,sha256=4if0as5sI8R3xgFa-I4ppkyg4O17zGY1lsplG6-OIes,30569 +diffusers/pipelines/omnigen/__init__.py,sha256=9596QBScCQfCbrybjkWJ7p0N4CnYB7W3hQJsNlGn3dU,1292 +diffusers/pipelines/omnigen/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/omnigen/__pycache__/pipeline_omnigen.cpython-312.pyc,, +diffusers/pipelines/omnigen/__pycache__/processor_omnigen.cpython-312.pyc,, +diffusers/pipelines/omnigen/pipeline_omnigen.py,sha256=sTDinHdY37gammprF-cl4Bb2QyYmH7XXjniHxuXlCYI,25019 +diffusers/pipelines/omnigen/processor_omnigen.py,sha256=f8tWJOyChDkdONn757LFFXyaQS4KwLleCKaKZA8wy8k,14161 +diffusers/pipelines/onnx_utils.py,sha256=UwJdj259lLAVHUbULpz-_obdystN4j0sLIbM3-uCHtQ,8736 +diffusers/pipelines/ovis_image/__init__.py,sha256=CRPNsrfPlS3fgCS03AfNDhX5pdPmL8ntoeqcHh-pfbI,1433 +diffusers/pipelines/ovis_image/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/ovis_image/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/ovis_image/__pycache__/pipeline_ovis_image.cpython-312.pyc,, +diffusers/pipelines/ovis_image/pipeline_output.py,sha256=Ua3hpyZw1QCdNnQmUbVxndUiXHkqQ5M-jt3XHjkzss0,1201 +diffusers/pipelines/ovis_image/pipeline_ovis_image.py,sha256=FZaYGVuUElxlDepBM8kDWbSeeF6QoU5XhtOeH4v7sYQ,30463 +diffusers/pipelines/pag/__init__.py,sha256=pyv70bIvWZpMgXJgf8I8JH4OWzvdqJqfvxlFax9SzTg,3986 +diffusers/pipelines/pag/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/pag/__pycache__/pag_utils.cpython-312.pyc,, +diffusers/pipelines/pag/__pycache__/pipeline_pag_controlnet_sd.cpython-312.pyc,, +diffusers/pipelines/pag/__pycache__/pipeline_pag_controlnet_sd_inpaint.cpython-312.pyc,, +diffusers/pipelines/pag/__pycache__/pipeline_pag_controlnet_sd_xl.cpython-312.pyc,, +diffusers/pipelines/pag/__pycache__/pipeline_pag_controlnet_sd_xl_img2img.cpython-312.pyc,, +diffusers/pipelines/pag/__pycache__/pipeline_pag_hunyuandit.cpython-312.pyc,, +diffusers/pipelines/pag/__pycache__/pipeline_pag_kolors.cpython-312.pyc,, +diffusers/pipelines/pag/__pycache__/pipeline_pag_pixart_sigma.cpython-312.pyc,, +diffusers/pipelines/pag/__pycache__/pipeline_pag_sana.cpython-312.pyc,, +diffusers/pipelines/pag/__pycache__/pipeline_pag_sd.cpython-312.pyc,, +diffusers/pipelines/pag/__pycache__/pipeline_pag_sd_3.cpython-312.pyc,, +diffusers/pipelines/pag/__pycache__/pipeline_pag_sd_3_img2img.cpython-312.pyc,, +diffusers/pipelines/pag/__pycache__/pipeline_pag_sd_animatediff.cpython-312.pyc,, +diffusers/pipelines/pag/__pycache__/pipeline_pag_sd_img2img.cpython-312.pyc,, +diffusers/pipelines/pag/__pycache__/pipeline_pag_sd_inpaint.cpython-312.pyc,, +diffusers/pipelines/pag/__pycache__/pipeline_pag_sd_xl.cpython-312.pyc,, +diffusers/pipelines/pag/__pycache__/pipeline_pag_sd_xl_img2img.cpython-312.pyc,, +diffusers/pipelines/pag/__pycache__/pipeline_pag_sd_xl_inpaint.cpython-312.pyc,, +diffusers/pipelines/pag/pag_utils.py,sha256=LD9zLr0Y5o0kEKa7Kw52G50gFpEC1TXFYD0qrUhjjR4,10165 +diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py,sha256=ZQcXvRad6xkeAAmMtwbIKvats6RlDfo-g54DvvrAcdE,68347 +diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py,sha256=t5OOMiMzyqB29oC1NnejzREw7lrP7Zdh-hoByxkL8VA,78997 +diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py,sha256=6deSkT8d52mxCcEMmb_muIyE9rramO1jkOF9ff3xbxs,83131 +diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py,sha256=ybTaSIuuSYy0JLgtfkJCtBgju7PyT45SJRCs4POoCWQ,87868 +diffusers/pipelines/pag/pipeline_pag_hunyuandit.py,sha256=NLbgO-mFDQuqUzX3KPXT7gfuL0hM65u9KMygTVrayKo,46412 +diffusers/pipelines/pag/pipeline_pag_kolors.py,sha256=rP08DPN57HtI3VKmmDLCMYord1eywpnjgPbBn6SgY2U,59196 +diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py,sha256=HLvf7MFAcOQsNKt9418uWwk7UPb3qSPAnMaiE_oF6vE,42099 +diffusers/pipelines/pag/pipeline_pag_sana.py,sha256=Ap6S0yPCR6UR-3Y05tGWC3u8YE_DX7EOOXye6oJ9qVc,46956 +diffusers/pipelines/pag/pipeline_pag_sd.py,sha256=-fXwD61d1MgQ1PBMwTp-PPiCg1GgrCRDZPdcJprR710,55599 +diffusers/pipelines/pag/pipeline_pag_sd_3.py,sha256=F1v34e1uUo3tQyLlSWgYRZOHosdwL7pbXmw9ZM4AeJo,49366 +diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py,sha256=9zUHTg-SUhf2vvUpSIvkwzggWMC8XLz7sZpQ9rnK49M,53836 +diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py,sha256=Wj-j1suRUhaB6X1nz-RbuIUywFuKTn99YcNKpG2gYXg,43442 +diffusers/pipelines/pag/pipeline_pag_sd_img2img.py,sha256=TMu4IVHVJ3NzdlwC-epsbaCCzs8vWdcQT1EezlU7Dsw,57907 +diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py,sha256=V0-MDD4KSFxZQgPGxTAih9OcbVfLKIfXLnxRcYmmyFU,70028 +diffusers/pipelines/pag/pipeline_pag_sd_xl.py,sha256=yDKNrbDI2i7Xymu9qKSJFZQao4yaReXRSsrUGbBAcTw,70008 +diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py,sha256=jOVgpXJh9rZJfyb168mAuwnFumEk3ACzjkeH742KdmA,81489 +diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py,sha256=DozTBcsfG_OpvHzv2_mRSRftsFX9YH7dZSvE3CW_xo8,93246 +diffusers/pipelines/paint_by_example/__init__.py,sha256=AE_H0kNfZRCwMEs-5bgFHuuML5ds1SmXzUOiTlNDQZc,1543 +diffusers/pipelines/paint_by_example/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/paint_by_example/__pycache__/image_encoder.cpython-312.pyc,, +diffusers/pipelines/paint_by_example/__pycache__/pipeline_paint_by_example.cpython-312.pyc,, +diffusers/pipelines/paint_by_example/image_encoder.py,sha256=NT3RadNaIVztZWGP628BDHpvaMRPZ-w3J3j096y3L9s,2484 +diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py,sha256=vUMY5RGJb8gW7xSEXrXinf-1DEgXZEa_GwiElcvMqIc,31244 +diffusers/pipelines/pia/__init__.py,sha256=md5F8G279iZg4WGSmLP7N8apWkuHkfssjLQFzv6c2zI,1299 +diffusers/pipelines/pia/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/pia/__pycache__/pipeline_pia.cpython-312.pyc,, +diffusers/pipelines/pia/pipeline_pia.py,sha256=dzL3LL7mskkrYwBATEn2EqmwvYAKx-pQDcwzlySWrcs,46356 +diffusers/pipelines/pipeline_flax_utils.py,sha256=PiyZ15oQKtGkTM04Lq-_aDCJZdh_BzvhaKCYbuLzP6A,27165 +diffusers/pipelines/pipeline_loading_utils.py,sha256=Ye7-lxLqVSsSSEQQMYnCjya65GHfetB_3R_kCz9zhB0,49628 +diffusers/pipelines/pipeline_utils.py,sha256=_b8LlE-Tnkidg8ZopVCgVLfk_3Z0dFgpAOuXEIl2Hn8,119964 +diffusers/pipelines/pixart_alpha/__init__.py,sha256=QxcTJF9ryOIejEHQVw3bZAYHn2dah-WPT5pZudE8XxU,1595 +diffusers/pipelines/pixart_alpha/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/pixart_alpha/__pycache__/pipeline_pixart_alpha.cpython-312.pyc,, +diffusers/pipelines/pixart_alpha/__pycache__/pipeline_pixart_sigma.cpython-312.pyc,, +diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py,sha256=bLlXv2tBH_b28jX1PYhz6FD7EtkNCt9Sz3mXLDiWoCQ,45037 +diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py,sha256=sTJBQ3HCKrCy1hkh82zrDs1A8FMyjyj9a8P3woI-leE,42612 +diffusers/pipelines/prx/__init__.py,sha256=b40g8QQYGIjrjUflOCHlwtrlbWanOB4GXS7NV8NZvPE,2651 +diffusers/pipelines/prx/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/prx/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/prx/__pycache__/pipeline_prx.cpython-312.pyc,, +diffusers/pipelines/prx/pipeline_output.py,sha256=4kNNUq91ORcWAffc2xQmUi-KlFg35b_QwvF-SKxk8I4,1173 +diffusers/pipelines/prx/pipeline_prx.py,sha256=p80luqRgbHbkEdoehD84sjNnjJ0mQXDzSig9LM5OzJw,34095 +diffusers/pipelines/qwenimage/__init__.py,sha256=mLzaR9llyj8B8lEhGcapqJDW5Pp754E7Zx7anwYM3JM,2902 +diffusers/pipelines/qwenimage/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/qwenimage/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/qwenimage/__pycache__/pipeline_qwenimage.cpython-312.pyc,, +diffusers/pipelines/qwenimage/__pycache__/pipeline_qwenimage_controlnet.cpython-312.pyc,, +diffusers/pipelines/qwenimage/__pycache__/pipeline_qwenimage_controlnet_inpaint.cpython-312.pyc,, +diffusers/pipelines/qwenimage/__pycache__/pipeline_qwenimage_edit.cpython-312.pyc,, +diffusers/pipelines/qwenimage/__pycache__/pipeline_qwenimage_edit_inpaint.cpython-312.pyc,, +diffusers/pipelines/qwenimage/__pycache__/pipeline_qwenimage_edit_plus.cpython-312.pyc,, +diffusers/pipelines/qwenimage/__pycache__/pipeline_qwenimage_img2img.cpython-312.pyc,, +diffusers/pipelines/qwenimage/__pycache__/pipeline_qwenimage_inpaint.cpython-312.pyc,, +diffusers/pipelines/qwenimage/__pycache__/pipeline_qwenimage_layered.cpython-312.pyc,, +diffusers/pipelines/qwenimage/pipeline_output.py,sha256=1NsUhf-y4LWuRWigkLe67xJFCH3_fC8g7PX52_ZMNAs,566 +diffusers/pipelines/qwenimage/pipeline_qwenimage.py,sha256=SX4z74es0xmsaonlphErMAEKg-TY2hFcKsrE8isc3SQ,36231 +diffusers/pipelines/qwenimage/pipeline_qwenimage_controlnet.py,sha256=QOGtTpXTln9MPdnsrVYJF90AzSERpqiBK-dqTW56TbI,47400 +diffusers/pipelines/qwenimage/pipeline_qwenimage_controlnet_inpaint.py,sha256=N_FxuAIFLJ9bBcWw34awQimQTM_jjaX0uf1DF7-0HqQ,43380 +diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py,sha256=lWPVR5er9RyJPglEq9Agu9RbdLFmUDD4RCVY1p2ZVQM,43525 +diffusers/pipelines/qwenimage/pipeline_qwenimage_edit_inpaint.py,sha256=04QbPMqhZzUMooJsDIIiU_hBS4ImYIMOUcQPbUFIem8,54443 +diffusers/pipelines/qwenimage/pipeline_qwenimage_edit_plus.py,sha256=n_UeNBHPDKKwiNpGImk8GxzA1pG1Ipcnxo7ybfUNVik,43341 +diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py,sha256=Kb_nzQzk-ZVjKOCANXDLjTJOzS-jADXAjKREw6OiQvE,42494 +diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py,sha256=wmmt-CY6UEbkItJtOnl-KVWIuszesjN4RCMw6L_XitQ,51357 +diffusers/pipelines/qwenimage/pipeline_qwenimage_layered.py,sha256=31eq3ArwENRbqvSWi9pI5grb4f6Wbral0vy_frXWjhE,44674 +diffusers/pipelines/sana/__init__.py,sha256=qkgbJxOAEH4gmyQ4FX4USnOd-PPEDkZGjZ3QO0ID0pA,1719 +diffusers/pipelines/sana/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/sana/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/sana/__pycache__/pipeline_sana.cpython-312.pyc,, +diffusers/pipelines/sana/__pycache__/pipeline_sana_controlnet.cpython-312.pyc,, +diffusers/pipelines/sana/__pycache__/pipeline_sana_sprint.cpython-312.pyc,, +diffusers/pipelines/sana/__pycache__/pipeline_sana_sprint_img2img.cpython-312.pyc,, +diffusers/pipelines/sana/pipeline_output.py,sha256=SmAVFQyS_7b1ux7_b0wmg5_yIR1zVRIMwiIEpMZ80iM,549 +diffusers/pipelines/sana/pipeline_sana.py,sha256=Hy4W1zLbWOn6Op9AKm7666uIjJW0l4nQUHlVyX3eaek,48473 +diffusers/pipelines/sana/pipeline_sana_controlnet.py,sha256=SclFdx6vbtC-a_CmEif-y7vMlcn8WqxbNSKcdZIPDls,52955 +diffusers/pipelines/sana/pipeline_sana_sprint.py,sha256=FckBPpwEheaLQ_fnzFt2_nQw6gTAuYs5vK-0twVazfg,42502 +diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py,sha256=3nEzzYc0TF2oAhc1B1eLnHP9JB7tEOJMb43DsnkJ2Es,46692 +diffusers/pipelines/sana_video/__init__.py,sha256=r_eIfI7pdcI54CJQ2cnOVnESzEzGlsyoAUaxEzwdKIk,1449 +diffusers/pipelines/sana_video/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/sana_video/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/sana_video/__pycache__/pipeline_sana_video.cpython-312.pyc,, +diffusers/pipelines/sana_video/__pycache__/pipeline_sana_video_i2v.cpython-312.pyc,, +diffusers/pipelines/sana_video/pipeline_output.py,sha256=57_2jEJ4pC5RzzvIjDrjrvl-wqNxHFql79CVXrbiPtc,611 +diffusers/pipelines/sana_video/pipeline_sana_video.py,sha256=I1qlfXZ4Dpoz1idm6YTwK_cF3pWi3wf1Nt-di4-8WNI,48446 +diffusers/pipelines/sana_video/pipeline_sana_video_i2v.py,sha256=gNbUeK8xSAutawg6HLbW0UZBy6NTeWyR1xAwt6dmpqA,51551 +diffusers/pipelines/semantic_stable_diffusion/__init__.py,sha256=4jDvmgpXRVXGeSAcfGN90iQoJJBBRgE7NXzBE_8AYxM,1443 +diffusers/pipelines/semantic_stable_diffusion/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/semantic_stable_diffusion/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/semantic_stable_diffusion/__pycache__/pipeline_semantic_stable_diffusion.cpython-312.pyc,, +diffusers/pipelines/semantic_stable_diffusion/pipeline_output.py,sha256=3PXlWjRCJhhdvL5LLazrRLYxjRgeT1F5pCgmq27xXAg,772 +diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py,sha256=YVMAf8_B2BKe7W27PBkZE-9oKF_IReorXOGtBpEPew8,38767 +diffusers/pipelines/shap_e/__init__.py,sha256=LGToZwsVeVBEsE5eveY0Hc2GgI6UgDz6H_6cB_Snn0Y,2093 +diffusers/pipelines/shap_e/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/shap_e/__pycache__/camera.cpython-312.pyc,, +diffusers/pipelines/shap_e/__pycache__/pipeline_shap_e.cpython-312.pyc,, +diffusers/pipelines/shap_e/__pycache__/pipeline_shap_e_img2img.cpython-312.pyc,, +diffusers/pipelines/shap_e/__pycache__/renderer.cpython-312.pyc,, +diffusers/pipelines/shap_e/camera.py,sha256=KUG94ZqXnlmhQeKNFi7JvbiJ0yJ3h732lu7WLCgg-Vg,4917 +diffusers/pipelines/shap_e/pipeline_shap_e.py,sha256=wm0IHqXVx0EBm08v_HSW6CGTSv5GCNAXhJRhclR2BBA,13336 +diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py,sha256=grEgLCiqBmcERQUdL3PFN_kF83TPoKLIGZKeKBLYdrI,13137 +diffusers/pipelines/shap_e/renderer.py,sha256=i3kkd4nPZ_6pygmOhI9IzL_3LhxqUsjeHX0Avts92Cw,39109 +diffusers/pipelines/skyreels_v2/__init__.py,sha256=kFPdII5Tf0XcyqwcbomlbZn7iHnoSFFM6GwLcBm_hQ4,2159 +diffusers/pipelines/skyreels_v2/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/skyreels_v2/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/skyreels_v2/__pycache__/pipeline_skyreels_v2.cpython-312.pyc,, +diffusers/pipelines/skyreels_v2/__pycache__/pipeline_skyreels_v2_diffusion_forcing.cpython-312.pyc,, +diffusers/pipelines/skyreels_v2/__pycache__/pipeline_skyreels_v2_diffusion_forcing_i2v.cpython-312.pyc,, +diffusers/pipelines/skyreels_v2/__pycache__/pipeline_skyreels_v2_diffusion_forcing_v2v.cpython-312.pyc,, +diffusers/pipelines/skyreels_v2/__pycache__/pipeline_skyreels_v2_i2v.cpython-312.pyc,, +diffusers/pipelines/skyreels_v2/pipeline_output.py,sha256=Zkcjc9NZ9UyFJoKDXhIayt2mR9XRZc0NBZQggBAVQPI,619 +diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py,sha256=a2k9nAI87UmH6VZyke3JXwjGVd3fl9TR4BwJVzpc0Kc,27163 +diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py,sha256=IUXHkT73KhodDnkmOrGqP_COKS96Y0i3loc13pQJp-w,48231 +diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py,sha256=CxPkPtR2iHKdoJBW4poejIbp84vPayRksWbUDZOV7XM,52542 +diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py,sha256=DreRGX1ST8HEdn5NLz5L52woOdO7ZTGHb6b2dMPM61M,52000 +diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py,sha256=i9e3_WqoEduAP5kNGWiLJcvIZZ006zJIOxTZjOXVLVY,34437 +diffusers/pipelines/stable_audio/__init__.py,sha256=R8Tuxx2LsaWWR0lncRJ0faKOmAdaQ0ilvftdBC_07Eo,1561 +diffusers/pipelines/stable_audio/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/stable_audio/__pycache__/modeling_stable_audio.cpython-312.pyc,, +diffusers/pipelines/stable_audio/__pycache__/pipeline_stable_audio.cpython-312.pyc,, +diffusers/pipelines/stable_audio/modeling_stable_audio.py,sha256=vLOp9fB7wBPHbP0HWFbinxbN-UxbU2opzDPqjpG74bI,6048 +diffusers/pipelines/stable_audio/pipeline_stable_audio.py,sha256=R_H2k2xhXEwwVoEdcat6vhOpgcxl7H2TJsi5JcoBnPE,36122 +diffusers/pipelines/stable_cascade/__init__.py,sha256=buKExLbA-qdePd19JSEF29AhOCIaDgqFfLajEmo-Kg4,1672 +diffusers/pipelines/stable_cascade/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/stable_cascade/__pycache__/pipeline_stable_cascade.cpython-312.pyc,, +diffusers/pipelines/stable_cascade/__pycache__/pipeline_stable_cascade_combined.cpython-312.pyc,, +diffusers/pipelines/stable_cascade/__pycache__/pipeline_stable_cascade_prior.cpython-312.pyc,, +diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py,sha256=EuAdP-z0cs1_OAGYD025qxl7YvD6Tp5ttIPo9pg_V1o,26116 +diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py,sha256=NSINq4wzOLQzADglKhMD8Cyh2M4Rfpe5JD7qDml_SNM,18091 +diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py,sha256=VDJGizntv8uW9PuakGVovsBBnFyppsn1jFa98M_7wjA,31393 +diffusers/pipelines/stable_diffusion/__init__.py,sha256=UVNnVKR4TwdfaJid7-RGWfcKImcp52MIZ8hVEWYBD2Q,8497 +diffusers/pipelines/stable_diffusion/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/clip_image_project_model.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/convert_from_ckpt.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/pipeline_flax_stable_diffusion.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/pipeline_flax_stable_diffusion_img2img.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/pipeline_flax_stable_diffusion_inpaint.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/pipeline_onnx_stable_diffusion.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/pipeline_onnx_stable_diffusion_img2img.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/pipeline_onnx_stable_diffusion_inpaint.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/pipeline_onnx_stable_diffusion_upscale.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_depth2img.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_image_variation.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_img2img.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_inpaint.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_instruct_pix2pix.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_latent_upscale.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_upscale.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_unclip.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_unclip_img2img.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/safety_checker.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/safety_checker_flax.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/__pycache__/stable_unclip_image_normalizer.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion/clip_image_project_model.py,sha256=9LPlAopNG-vpWhmh8i8pz7CHttqZ203-RnGlvy74xxY,1094 +diffusers/pipelines/stable_diffusion/convert_from_ckpt.py,sha256=gCu6XbUZqfx_7cIkr7aL8ehd7__7dbbnn-7f38zCrUI,81634 +diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py,sha256=E_1mECoR_yi6oDopo9DKnSoXei9xNT7ujppePl0bGbY,20594 +diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py,sha256=mpArtmcdjq1n4oo9LmGHqOeew6olLgKzuS2kwbZBBoI,22386 +diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py,sha256=ikRHLyFlvkLTJKj9Kt1MND3tsBNMc7LwNa6LDeczPWc,25933 +diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py,sha256=bLxxryzyMGZ8TeSmS-hw7-5QNizUIcayXOkufBWXfGA,24287 +diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py,sha256=O7Vc0mSoMTg8grv2HwJiCdokxMNIerJxX9-Va01IqAk,28432 +diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py,sha256=zebAjw8cfdlvJ7hcGHQ-YYXcZ5qgcJyTbYM5GfFkj0g,29044 +diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py,sha256=9FC6pOTRtt8UoYilyeisqVcjJV6Tr1BNmyum7hXVLoQ,27835 +diffusers/pipelines/stable_diffusion/pipeline_output.py,sha256=1KYkC2FckCu12r0iLaH2lcTpyy-vB5NC3biexnFzbJk,1446 +diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py,sha256=_t1I5S9ZESLDuLmA2rzsfnyUoyMDdi_fRJjdK0VVutA,55717 +diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py,sha256=NWOPrNylMuXAck-d7vecxh-r6agcwOhb42p2ePL2S5A,44393 +diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py,sha256=8gqGRBL6GqkFrHrO79H50lxwzfqL7Ykw7sm8e-a5LOA,22665 +diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py,sha256=CvE0mp33B-qgvD-nDK1XbIC5ZUfpVZCGVlDr9WNvVBw,59474 +diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py,sha256=s90KRsoZzd-AGU7nSPMZP6jDjFQWM0txJ9ExDeKejjU,70110 +diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py,sha256=svl7Ba6KIG7utXJFBjFay7grREFvRkTzmkU1iWTL58c,45701 +diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py,sha256=jfIFtLHUfDAAD0FM4uO3T_ehKGFvveJ1cfIDJqmJTkc,30889 +diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py,sha256=32VgEXw99NSO9fa43feK7vkOPEFoCg7GBxxiMF4HK2c,39257 +diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py,sha256=EZ5H5q7_sQV2yLmqoSMMASngqlEzTbFIrTPN-Ma7CeU,45415 +diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py,sha256=w80tkC_t0WACbHeZYEmmCwwc6rtv8jo6QrH4r2MpgXY,40197 +diffusers/pipelines/stable_diffusion/safety_checker.py,sha256=m5kKpvbwAo0Xw4-FS5r9UutEjFARXMUBqME8KdjGsEs,5926 +diffusers/pipelines/stable_diffusion/safety_checker_flax.py,sha256=vmL15hKfTmqwnByTtf5s7GpNvTvQJv29pgz_sVn7ntk,4437 +diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py,sha256=AP8EcuTStyXZ6UcIKtXeFZu7f7o9_vNl9ngba9tJ85E,1842 +diffusers/pipelines/stable_diffusion_3/__init__.py,sha256=4JrcTgfij4mGbSSnCaHSqRRNhCUry8-HH3zQaUIq3DE,1922 +diffusers/pipelines/stable_diffusion_3/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_3/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_3/__pycache__/pipeline_stable_diffusion_3.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_3/__pycache__/pipeline_stable_diffusion_3_img2img.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_3/__pycache__/pipeline_stable_diffusion_3_inpaint.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_3/pipeline_output.py,sha256=yGQUX6JcDrspABI4YXJqJt1zweI34z0MBh5TJDDnT-g,573 +diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py,sha256=OGsH60hI_YAlmQ8QiydsDR-0ICyhPpAOe7foJMp6xig,56715 +diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py,sha256=mCNsBrqDRdLuJzhD9_z0kZQlz7v-tAhMjYO7e0crR1E,57609 +diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py,sha256=-loEWiiF0RILUqZnXUPfCshj4-NSmR5HzstXPT54iW4,69666 +diffusers/pipelines/stable_diffusion_attend_and_excite/__init__.py,sha256=VpZ5FPx9ACTOT4qiEqun2QYeUtx9Rp0YVDwqhYe28QM,1390 +diffusers/pipelines/stable_diffusion_attend_and_excite/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_attend_and_excite/__pycache__/pipeline_stable_diffusion_attend_and_excite.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py,sha256=HGK_8XpxhsXnkTr9LvhTinH-DPxpMRgCxoWHLCx_z4Q,51459 +diffusers/pipelines/stable_diffusion_diffedit/__init__.py,sha256=JlcUNahRBm0uaPzappogqfjyLDsNW6IeyOfuLs4af5M,1358 +diffusers/pipelines/stable_diffusion_diffedit/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_diffedit/__pycache__/pipeline_stable_diffusion_diffedit.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py,sha256=ydPLo7uY32imUx0766ZBXbI3T1N1YQazjcRTFqxTI2c,78278 +diffusers/pipelines/stable_diffusion_gligen/__init__.py,sha256=b4dZB5bUuZmEAcg7MmCyWZpyxNmMrlrByEQW_xwGGgI,1568 +diffusers/pipelines/stable_diffusion_gligen/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_gligen/__pycache__/pipeline_stable_diffusion_gligen.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_gligen/__pycache__/pipeline_stable_diffusion_gligen_text_image.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py,sha256=0_dZz-EXFJv_wAbPS80VWwv5AFw9KQVl6EN3AB5Al6A,43398 +diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py,sha256=RuaRMRVgBYaOviNtlmLESX6QGy6eI7XMovJo6NXR0NM,51985 +diffusers/pipelines/stable_diffusion_ldm3d/__init__.py,sha256=8p2npGKPPJbPaTa4swOWRMd24x36E563Bhc_mM29va0,1346 +diffusers/pipelines/stable_diffusion_ldm3d/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_ldm3d/__pycache__/pipeline_stable_diffusion_ldm3d.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py,sha256=tF42um9-GqaPm6ev4CYD68R5vovib8VFi3nbFiOLSX0,51649 +diffusers/pipelines/stable_diffusion_panorama/__init__.py,sha256=af52eZSYshuw1d6kqKwx0C5Teopkx8UpO9ph_A4WI0Q,1358 +diffusers/pipelines/stable_diffusion_panorama/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_panorama/__pycache__/pipeline_stable_diffusion_panorama.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py,sha256=gEV7Ja57WsKqlcWgWhLLum-jSMIklhhmg-AXg9E4XWo,60262 +diffusers/pipelines/stable_diffusion_safe/__init__.py,sha256=vGOGnNTDxSimXy89qa2g80aluYQNVKJAZbJB7SdSlCw,2728 +diffusers/pipelines/stable_diffusion_safe/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_safe/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_safe/__pycache__/pipeline_stable_diffusion_safe.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_safe/__pycache__/safety_checker.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_safe/pipeline_output.py,sha256=GdAGb2e__lArRELj-W2-ixfQj9hPPAKaJYREWN-XF_k,1397 +diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py,sha256=zIHsyl24Ddt66vd1zAcs6cyXbH-h_OkyQ8-qmU15XfM,39393 +diffusers/pipelines/stable_diffusion_safe/safety_checker.py,sha256=Ewt9xIW9sKMGL0sbdjBgu4OhuzmjN_xqI87E6CwLBg0,5039 +diffusers/pipelines/stable_diffusion_sag/__init__.py,sha256=06vnWbASiG3o4sQ7CDlDrqEm6dSCerKdLODz1FS-EFE,1338 +diffusers/pipelines/stable_diffusion_sag/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_sag/__pycache__/pipeline_stable_diffusion_sag.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py,sha256=or7Ha2esrR0aUtMQjyDuy_CIRo84stsERHO_mM-I3cU,47819 +diffusers/pipelines/stable_diffusion_xl/__init__.py,sha256=6lTMI458kVDLzQDeZxEBacdFxpj4xAY9CSZ6Xr_FWoY,3022 +diffusers/pipelines/stable_diffusion_xl/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_xl/__pycache__/pipeline_flax_stable_diffusion_xl.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_xl/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_xl/__pycache__/pipeline_stable_diffusion_xl.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_xl/__pycache__/pipeline_stable_diffusion_xl_img2img.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_xl/__pycache__/pipeline_stable_diffusion_xl_inpaint.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_xl/__pycache__/pipeline_stable_diffusion_xl_instruct_pix2pix.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_xl/__pycache__/watermark.cpython-312.pyc,, +diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py,sha256=TRTK1BDZPrNRO82D3QAVo1ECIn5kRdsAqsnQG6GVR1U,11207 +diffusers/pipelines/stable_diffusion_xl/pipeline_output.py,sha256=l_JTgKtp6dXebdioUy3uImnJkaglIRTT1ieNWavpkJc,1000 +diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py,sha256=SZvxi4AfNmk4UeO46qQjNQDOFsfdReIZ6snG4XoQSi4,67288 +diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py,sha256=BG4-mocfYR2DpKkThD859B85NUhtCXRNsNrgP5uRoVA,78363 +diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py,sha256=3gZHJIUzgUfQE1_iLH3cGwKmoXh-hJtxa87eO18uZDM,90091 +diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py,sha256=SRAUbg0GTlZISoZpiGH8Kf3mN0TQV4W9L-0PWbEFMPA,52298 +diffusers/pipelines/stable_diffusion_xl/watermark.py,sha256=LDItvRnZKokIUchP0oIrO2Ew9AARhAP4MMrQY8maQ6Q,1458 +diffusers/pipelines/stable_video_diffusion/__init__.py,sha256=QtcDxzfLJ7loCDspiulKyKU6kd-l3twJyWBDPraD_94,1551 +diffusers/pipelines/stable_video_diffusion/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/stable_video_diffusion/__pycache__/pipeline_stable_video_diffusion.cpython-312.pyc,, +diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py,sha256=1MWYAfmU-16Yb18pZe1zsZvfzY3iesG9Vsj4ewkoM-c,32658 +diffusers/pipelines/t2i_adapter/__init__.py,sha256=PgIg_SzwFAqWOML5BLHvuCTmu4p06MPT66xBpDShx8c,1556 +diffusers/pipelines/t2i_adapter/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/t2i_adapter/__pycache__/pipeline_stable_diffusion_adapter.cpython-312.pyc,, +diffusers/pipelines/t2i_adapter/__pycache__/pipeline_stable_diffusion_xl_adapter.cpython-312.pyc,, +diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py,sha256=6RP6lWjBFr0yJVXb-fSF-PPEH2o15EKhv6Z9VtvWtwk,47739 +diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py,sha256=yOUc_sk-P-0UFh-w2Bl9AzUr1YItsK6kYlEPCLsNZqA,68956 +diffusers/pipelines/text_to_video_synthesis/__init__.py,sha256=7-NplGtgnp5GUu4XN_STE9fqAtFCAc6FF3lphjbDBhs,1979 +diffusers/pipelines/text_to_video_synthesis/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/text_to_video_synthesis/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/text_to_video_synthesis/__pycache__/pipeline_text_to_video_synth.cpython-312.pyc,, +diffusers/pipelines/text_to_video_synthesis/__pycache__/pipeline_text_to_video_synth_img2img.cpython-312.pyc,, +diffusers/pipelines/text_to_video_synthesis/__pycache__/pipeline_text_to_video_zero.cpython-312.pyc,, +diffusers/pipelines/text_to_video_synthesis/__pycache__/pipeline_text_to_video_zero_sdxl.cpython-312.pyc,, +diffusers/pipelines/text_to_video_synthesis/pipeline_output.py,sha256=b4ldNJqGZD7q2jRD-Jcyszy7TPc89vngBaYCISPImcU,699 +diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py,sha256=ZH4KaDvp7j-N9u0lqZHpNpVCR8cY14I6CrliePeRgi4,31669 +diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py,sha256=Y9HQ5mJ6PKDRwmZOMPhy0My4t8XXc8o-2cCqzBM2JL4,35336 +diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py,sha256=x3kBrONsjQ4cU5KvdW8SD21DWF8tADPwcc35JW193jQ,45635 +diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py,sha256=YMsR3ylRyHaxAXUsv0xzrnFnC2Wr4GWphApSsO4IU3w,64257 +diffusers/pipelines/transformers_loading_utils.py,sha256=e0L0rbCLH4SHf6qaOl-gnYEX7naluEAahgESavurUNA,5440 +diffusers/pipelines/unclip/__init__.py,sha256=jBYZIN7NhTKM_Oq7ipJ4JaMXO-GtdchmFWe07gDerfA,1752 +diffusers/pipelines/unclip/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/unclip/__pycache__/pipeline_unclip.cpython-312.pyc,, +diffusers/pipelines/unclip/__pycache__/pipeline_unclip_image_variation.cpython-312.pyc,, +diffusers/pipelines/unclip/__pycache__/text_proj.cpython-312.pyc,, +diffusers/pipelines/unclip/pipeline_unclip.py,sha256=yUBGgNBt5WfHospXABlV14dP0ez3Omc2kHFpbetKues,22343 +diffusers/pipelines/unclip/pipeline_unclip_image_variation.py,sha256=18MOLkYfKmindjg195jN1yjEWZZts-xmUHXolddhYEg,19280 +diffusers/pipelines/unclip/text_proj.py,sha256=UkICP2P1vL8tVJr92A6gPFuqrjl03fAs-HiFFkZDvfU,4286 +diffusers/pipelines/unidiffuser/__init__.py,sha256=GvGtf-AToJXNHxv3RAo5_I_9zPQjDFbMTAHICCt-4xY,1814 +diffusers/pipelines/unidiffuser/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/unidiffuser/__pycache__/modeling_text_decoder.cpython-312.pyc,, +diffusers/pipelines/unidiffuser/__pycache__/modeling_uvit.cpython-312.pyc,, +diffusers/pipelines/unidiffuser/__pycache__/pipeline_unidiffuser.cpython-312.pyc,, +diffusers/pipelines/unidiffuser/modeling_text_decoder.py,sha256=PDCy1NWLNqvGvqB2gIYk26rEdJerLv3czmlJuOJQuQ0,14067 +diffusers/pipelines/unidiffuser/modeling_uvit.py,sha256=5RP2pLKhmUySfNMuLxsDGdbt778VE1huARuI39muq_8,54179 +diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py,sha256=0tGHaLj_FKXG_P8isKw9OcVBNH6OlwO0LFTDZr24wMQ,69986 +diffusers/pipelines/visualcloze/__init__.py,sha256=Cgc6UqhelXaUEYJbmEf_kGBY90hS3wx7eIiO1C5A0g4,1502 +diffusers/pipelines/visualcloze/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/visualcloze/__pycache__/pipeline_visualcloze_combined.cpython-312.pyc,, +diffusers/pipelines/visualcloze/__pycache__/pipeline_visualcloze_generation.cpython-312.pyc,, +diffusers/pipelines/visualcloze/__pycache__/visualcloze_utils.cpython-312.pyc,, +diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py,sha256=Uq8dGxo6T-yggIIRcVsS6MB1iqh8Z2DHxM-2UhYPYyg,23893 +diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py,sha256=AggqJxMgaexua-Uz-fuoftfvMVNoEGpsLLE84EkDfz4,46442 +diffusers/pipelines/visualcloze/visualcloze_utils.py,sha256=z0PP7w_t3A7X9jQMbOvjGGQw6bmPozir89zKkeQ8ang,10787 +diffusers/pipelines/wan/__init__.py,sha256=dCs97uhd2RGCErzb_VV06mpeDswDNV3Mh2hSgpOntw4,1808 +diffusers/pipelines/wan/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/wan/__pycache__/image_processor.cpython-312.pyc,, +diffusers/pipelines/wan/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/wan/__pycache__/pipeline_wan.cpython-312.pyc,, +diffusers/pipelines/wan/__pycache__/pipeline_wan_animate.cpython-312.pyc,, +diffusers/pipelines/wan/__pycache__/pipeline_wan_i2v.cpython-312.pyc,, +diffusers/pipelines/wan/__pycache__/pipeline_wan_vace.cpython-312.pyc,, +diffusers/pipelines/wan/__pycache__/pipeline_wan_video2video.cpython-312.pyc,, +diffusers/pipelines/wan/image_processor.py,sha256=PLNflTtaffcVyADaA0L45-dh8qX4RE28r0Rj4p-Kqnc,8334 +diffusers/pipelines/wan/pipeline_output.py,sha256=aPng1TOZ9Pw7Ma03zEB0horbZHXQO_e9gMyRaSPYKrE,605 +diffusers/pipelines/wan/pipeline_wan.py,sha256=IN8hcjQq6qpuTXCpZrU4z78_0VPo7b1npUm78iOIaKw,31518 +diffusers/pipelines/wan/pipeline_wan_animate.py,sha256=qD7vGV_l5r5OxMRgh6r8A-NKmwNQhta6mRWP3-RaDC8,58872 +diffusers/pipelines/wan/pipeline_wan_i2v.py,sha256=JHjLy9pHQ_TEZAmfF5zH3f6prBhYEWLLAm9tlr_Htmw,40215 +diffusers/pipelines/wan/pipeline_wan_vace.py,sha256=ynCEiPVUeNv1w-Nd1GUqYVAdUZ8P7lIRGz_EHFOvyfc,52085 +diffusers/pipelines/wan/pipeline_wan_video2video.py,sha256=ePd9mWEdTY9ssOImN-JdZKxEPZdc-q9duIUVHpv5YG0,33302 +diffusers/pipelines/wuerstchen/__init__.py,sha256=JSCoPCwV_rBJiCy4jbILRoAgQSITS4-j77qOPmzy284,2100 +diffusers/pipelines/wuerstchen/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/wuerstchen/__pycache__/modeling_paella_vq_model.cpython-312.pyc,, +diffusers/pipelines/wuerstchen/__pycache__/modeling_wuerstchen_common.cpython-312.pyc,, +diffusers/pipelines/wuerstchen/__pycache__/modeling_wuerstchen_diffnext.cpython-312.pyc,, +diffusers/pipelines/wuerstchen/__pycache__/modeling_wuerstchen_prior.cpython-312.pyc,, +diffusers/pipelines/wuerstchen/__pycache__/pipeline_wuerstchen.cpython-312.pyc,, +diffusers/pipelines/wuerstchen/__pycache__/pipeline_wuerstchen_combined.cpython-312.pyc,, +diffusers/pipelines/wuerstchen/__pycache__/pipeline_wuerstchen_prior.cpython-312.pyc,, +diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py,sha256=EF4RtR17KZOUK_wPCLUs4djyxgRx3IAPE6IhdH7DnpY,6888 +diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py,sha256=mx0bj5b87g590UQhoFWY_L0ht_RTIynaPQa9DLk9MTU,2713 +diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py,sha256=-vKLdD7RkbDlUPpX8sgjKUTpbq7bwBdHP6y8L8kPwS0,10423 +diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py,sha256=GcZx7dVgTvAvnYZ8uVmFlRupTOhA2O2Qk5ih3J9feY0,4581 +diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py,sha256=kKDiLqLsndxZgPKKhfglY5Zr3zKa-Ssslsj9d69fFhs,20755 +diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py,sha256=ES7Kmu-AlmB9FnQX5r8Asc2q5fOGEOus7WMz7Do0I7w,16567 +diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py,sha256=zfjocrZIkLCbnBYvRM-1390SK87HiQHXvavqRt-YS0M,24035 +diffusers/pipelines/z_image/__init__.py,sha256=Y6DYPKMJf-Ww7uOUd_dFjU2jltwEDQB_UOJLPssgdHQ,2186 +diffusers/pipelines/z_image/__pycache__/__init__.cpython-312.pyc,, +diffusers/pipelines/z_image/__pycache__/pipeline_output.cpython-312.pyc,, +diffusers/pipelines/z_image/__pycache__/pipeline_z_image.cpython-312.pyc,, +diffusers/pipelines/z_image/__pycache__/pipeline_z_image_controlnet.cpython-312.pyc,, +diffusers/pipelines/z_image/__pycache__/pipeline_z_image_controlnet_inpaint.cpython-312.pyc,, +diffusers/pipelines/z_image/__pycache__/pipeline_z_image_img2img.cpython-312.pyc,, +diffusers/pipelines/z_image/__pycache__/pipeline_z_image_inpaint.cpython-312.pyc,, +diffusers/pipelines/z_image/__pycache__/pipeline_z_image_omni.cpython-312.pyc,, +diffusers/pipelines/z_image/pipeline_output.py,sha256=Xltw35TdqqQZdpkXhvs2n9EHjjyUcQVCx75obFOLK3w,1192 +diffusers/pipelines/z_image/pipeline_z_image.py,sha256=cu7ygNOys7PR-wGfN5dO08vNkIOpV85lUnI_bku0ojk,26323 +diffusers/pipelines/z_image/pipeline_z_image_controlnet.py,sha256=5CG85cLkT7TG56XLO7abRpINBFPF9Geb3oqJkUBG6sY,32022 +diffusers/pipelines/z_image/pipeline_z_image_controlnet_inpaint.py,sha256=Bih3NkqZQwNOW28s36R3S4JMeGvvAqri83P63-cLmfc,33691 +diffusers/pipelines/z_image/pipeline_z_image_img2img.py,sha256=GJKTdw66eZI4AFL6nz4SfP8hmNmt1I1Gd8wHq95CCc8,32421 +diffusers/pipelines/z_image/pipeline_z_image_inpaint.py,sha256=IIfFH9xTC66zHSO_Z90RzKjmN5DWZmI3dT4IIeQTFaU,42819 +diffusers/pipelines/z_image/pipeline_z_image_omni.py,sha256=DA-iFM1kK44SjXOq8wW67ID2AAjUKf15gzJR4iMjxjQ,33750 +diffusers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +diffusers/quantizers/__init__.py,sha256=WRPgr_dguadpWwogTAFbdSFgr6ER-DwHtquZVPAONhY,744 +diffusers/quantizers/__pycache__/__init__.cpython-312.pyc,, +diffusers/quantizers/__pycache__/auto.cpython-312.pyc,, +diffusers/quantizers/__pycache__/base.cpython-312.pyc,, +diffusers/quantizers/__pycache__/pipe_quant_config.cpython-312.pyc,, +diffusers/quantizers/__pycache__/quantization_config.cpython-312.pyc,, +diffusers/quantizers/auto.py,sha256=TWjbyJGalgaa-qYjwtF8PlkYPmZFZ9gsG5MGbK9slm8,6006 +diffusers/quantizers/base.py,sha256=l-5fBVGDIi7R3gwUpULL3K4cG7JLWkePemwL6_HBT_8,10284 +diffusers/quantizers/bitsandbytes/__init__.py,sha256=ILCM6ZopnzrhM_fW1oh4J_YCNsaEQAptcoTuSVgXab8,170 +diffusers/quantizers/bitsandbytes/__pycache__/__init__.cpython-312.pyc,, +diffusers/quantizers/bitsandbytes/__pycache__/bnb_quantizer.cpython-312.pyc,, +diffusers/quantizers/bitsandbytes/__pycache__/utils.cpython-312.pyc,, +diffusers/quantizers/bitsandbytes/bnb_quantizer.py,sha256=fKI8gtan-ICyvAr9odP09L5h4_IIfo2vGTCPN_9Cwbc,26209 +diffusers/quantizers/bitsandbytes/utils.py,sha256=mK_AmeSWETugVsyW1BejFa83cUckw-CA72IXwnmMV_U,13659 +diffusers/quantizers/gguf/__init__.py,sha256=2bxvfZbFr4xqm953cZaGJMgSCRiGJAWwbwKxNRQIEs4,42 +diffusers/quantizers/gguf/__pycache__/__init__.cpython-312.pyc,, +diffusers/quantizers/gguf/__pycache__/gguf_quantizer.cpython-312.pyc,, +diffusers/quantizers/gguf/__pycache__/utils.cpython-312.pyc,, +diffusers/quantizers/gguf/gguf_quantizer.py,sha256=s2359dyIkyzdvZQIag4ct2tgHaDHm5wBrgW68_V-C0M,6006 +diffusers/quantizers/gguf/utils.py,sha256=b0qkvwTNXdfg_4oToP6uDBK45N9tiWBbhc4h9PhE12o,22477 +diffusers/quantizers/modelopt/__init__.py,sha256=sJGgokwhYRcmtLZV4jSpOe7hBIsgk_0L-ZjEWOHippY,56 +diffusers/quantizers/modelopt/__pycache__/__init__.cpython-312.pyc,, +diffusers/quantizers/modelopt/__pycache__/modelopt_quantizer.cpython-312.pyc,, +diffusers/quantizers/modelopt/modelopt_quantizer.py,sha256=X6FGrnSuGgihY8IgYgV_dy_E4a3gb6xrOCtOsK_6JoM,6750 +diffusers/quantizers/pipe_quant_config.py,sha256=Ht8fr3m3YWJ1qOw0XEIlRjzDBQyCsctsMwLJW-xzCao,9423 +diffusers/quantizers/quantization_config.py,sha256=egXPziAFAQ-Ii2Nk4FHuUre8HPTGHKemBnt5a-NhH4o,47753 +diffusers/quantizers/quanto/__init__.py,sha256=ynS7j_VTG-QtimbyxHAaihUmi6eVqEDxA5dnKGjeS5M,46 +diffusers/quantizers/quanto/__pycache__/__init__.cpython-312.pyc,, +diffusers/quantizers/quanto/__pycache__/quanto_quantizer.cpython-312.pyc,, +diffusers/quantizers/quanto/__pycache__/utils.cpython-312.pyc,, +diffusers/quantizers/quanto/quanto_quantizer.py,sha256=1wJmsDeMrhBVTPO-48oJ-3tmJ1sesJgPPgZh1kbLtmk,6305 +diffusers/quantizers/quanto/utils.py,sha256=6-EaqWTbhb0dkuJ0C8XRDIpMmowPVjJ_C4rPaNNHMkc,2448 +diffusers/quantizers/torchao/__init__.py,sha256=tJimVpSGQsz3owo3yzh2SuCg7NQfiMnrHkAHyYHkmGA,662 +diffusers/quantizers/torchao/__pycache__/__init__.cpython-312.pyc,, +diffusers/quantizers/torchao/__pycache__/torchao_quantizer.cpython-312.pyc,, +diffusers/quantizers/torchao/torchao_quantizer.py,sha256=2NaNkhrB8As21lndoQXny1S3kcuMd0wInBijLrAv8G4,17677 +diffusers/schedulers/__init__.py,sha256=GBBcPOUXgvMjijpoln7Fr_y7_GUu2dvHTXrL4_nfz84,11614 +diffusers/schedulers/__pycache__/__init__.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_amused.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_consistency_decoder.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_consistency_models.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_cosine_dpmsolver_multistep.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_ddim.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_ddim_cogvideox.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_ddim_flax.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_ddim_inverse.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_ddim_parallel.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_ddpm.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_ddpm_flax.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_ddpm_parallel.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_ddpm_wuerstchen.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_deis_multistep.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_dpm_cogvideox.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_dpmsolver_multistep.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_dpmsolver_multistep_flax.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_dpmsolver_multistep_inverse.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_dpmsolver_sde.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_dpmsolver_singlestep.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_edm_dpmsolver_multistep.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_edm_euler.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_euler_ancestral_discrete.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_euler_discrete.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_euler_discrete_flax.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_flow_match_euler_discrete.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_flow_match_heun_discrete.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_flow_match_lcm.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_helios.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_helios_dmd.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_heun_discrete.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_ipndm.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_k_dpm_2_ancestral_discrete.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_k_dpm_2_discrete.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_karras_ve_flax.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_lcm.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_lms_discrete.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_lms_discrete_flax.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_ltx_euler_ancestral_rf.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_pndm.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_pndm_flax.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_repaint.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_sasolver.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_scm.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_sde_ve.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_sde_ve_flax.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_tcd.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_unclip.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_unipc_multistep.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_utils.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_utils_flax.cpython-312.pyc,, +diffusers/schedulers/__pycache__/scheduling_vq_diffusion.cpython-312.pyc,, +diffusers/schedulers/deprecated/__init__.py,sha256=3QlQ4gSBFu4zUkY3S5KLxd9sukbxLv8Aj4eO0Rymaq0,1349 +diffusers/schedulers/deprecated/__pycache__/__init__.cpython-312.pyc,, +diffusers/schedulers/deprecated/__pycache__/scheduling_karras_ve.cpython-312.pyc,, +diffusers/schedulers/deprecated/__pycache__/scheduling_sde_vp.cpython-312.pyc,, +diffusers/schedulers/deprecated/scheduling_karras_ve.py,sha256=eEbENBqV96tSya_QqFQi43PIHmLZrPV41gvRFDSgIbQ,9640 +diffusers/schedulers/deprecated/scheduling_sde_vp.py,sha256=N4-LJbGGbhbIvfp0V-OfRV9WAASvO4L_exkeUhVEWas,4263 +diffusers/schedulers/scheduling_amused.py,sha256=zTi85yAhPJEToXEQU9elvyfpqMzzpdcq9F37fehEGRM,9712 +diffusers/schedulers/scheduling_consistency_decoder.py,sha256=iUyaa-e3m79X3bzmIY5Y2XSNh6m4nuNXayjwlxUH_xc,8233 +diffusers/schedulers/scheduling_consistency_models.py,sha256=-ENZPLDsECa_TyydUdm0ELP2wcvaH5p0gm4fKGZjeK4,21627 +diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py,sha256=f8Bvm_2dxcF55hgevUBNzy3tGZY9KHqqcFSgBRmtNC0,31325 +diffusers/schedulers/scheduling_ddim.py,sha256=ph5NmUAtEYbSrsG-yLDzNhCLBtcsB-frLrg3pEqbGWw,28089 +diffusers/schedulers/scheduling_ddim_cogvideox.py,sha256=zbVRKUfu0CAURHFG62VZShM_ASf3l11cKTGip01Rf3o,23218 +diffusers/schedulers/scheduling_ddim_flax.py,sha256=yRDHpzHsNcwFzntRTyMejBikOc_DjYHQjsI-p_ixBvs,13409 +diffusers/schedulers/scheduling_ddim_inverse.py,sha256=6H41zytCQuo_gJaGJn91p4ON-xLTk_rXpVlNr9cVVYg,17656 +diffusers/schedulers/scheduling_ddim_parallel.py,sha256=9Gyk6AaLY9N0_iB2kNbK4IRO9rjieOuY3CV1RUYqaU0,33796 +diffusers/schedulers/scheduling_ddpm.py,sha256=OCIgXY6Vk42mr4XfueBFXBW8N5g9LVhC7lhsKbx1t0o,29755 +diffusers/schedulers/scheduling_ddpm_flax.py,sha256=OS-ATlwM4TWNyGOXV1hvLy3xqV1AVFCqsbXCHoq03e0,12848 +diffusers/schedulers/scheduling_ddpm_parallel.py,sha256=HcrOLUZm07DsFAGqYApH3p13uggNdTAHyOjngTaLK2c,34965 +diffusers/schedulers/scheduling_ddpm_wuerstchen.py,sha256=wLTlRTJdX2Fx0_lCSWcBHy_nF4_215WcKRh10xfOcJs,8865 +diffusers/schedulers/scheduling_deis_multistep.py,sha256=EAWdMfpFigazQU6b-7IVOmxctqsS2yqv_JstG3you1I,45850 +diffusers/schedulers/scheduling_dpm_cogvideox.py,sha256=4nBNhI_Chmc-AvnBv31acM7hZlZ14MBsWb9xtZRYtmA,27481 +diffusers/schedulers/scheduling_dpmsolver_multistep.py,sha256=8Hz8SsIgWN6NGYSgAhm8RfCOWYH2cIiaIRdYb9z2yn0,61762 +diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py,sha256=0o0UeGKncisVC8QvFX0uvjRy0i7r2UCs-tUJkpqwKfg,29266 +diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py,sha256=1dTYTEkEavlsJV0tc0RVSF9jtsgHu1YD4rPJmn2LCMY,53653 +diffusers/schedulers/scheduling_dpmsolver_sde.py,sha256=HFjoEORoyfe6JnkHHAvSPLeIPpR6x2Ow_6NYnWR4dPE,34850 +diffusers/schedulers/scheduling_dpmsolver_singlestep.py,sha256=XsEeMwdhg1-s1JW6bp0ZNs9RqBkDU3nsb_0ZOwzal24,60276 +diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py,sha256=uXf1_ERLu2UbHOxg8-XwaL7Ba1VdNGTqGGrigT7Mpc8,37693 +diffusers/schedulers/scheduling_edm_euler.py,sha256=yiKc12VCEE7NlBJj0M1fq9lCA5qtNepou4hkNFVi1A4,25445 +diffusers/schedulers/scheduling_euler_ancestral_discrete.py,sha256=msEenChRhjZo413GhduJvKQGlYyfjrlaTwTpmJareLE,23300 +diffusers/schedulers/scheduling_euler_discrete.py,sha256=b8DOqSNuwWVuoCWmsAl_Su0vL8lJHZaPjCx3Zy2NAPs,41829 +diffusers/schedulers/scheduling_euler_discrete_flax.py,sha256=OdMnCXpo4Del_OMzzAajBOuSjW0QUHzhW3uS4r8V4uQ,11215 +diffusers/schedulers/scheduling_flow_match_euler_discrete.py,sha256=VqMwvFdlV4rJc4Jlx2oyE6tCrWG-_owEB3fFONpSBF0,26869 +diffusers/schedulers/scheduling_flow_match_heun_discrete.py,sha256=VfA5--JrbmsI_-ASj-2M5jKGKywZqaCIE69IeBtUAp4,13186 +diffusers/schedulers/scheduling_flow_match_lcm.py,sha256=zs20X8lTghONQw3NUf9b-slYncXQ0hi3kcML5ihDv7U,26740 +diffusers/schedulers/scheduling_helios.py,sha256=VAqdd4AxIgxXeFleN7WJSb_wGb5o1GUv9MB-fHpD4_I,32883 +diffusers/schedulers/scheduling_helios_dmd.py,sha256=8Jq7fUHgu1RSYTYGXAIHJf7idYMBA1kJMPxy56RvxdQ,13173 +diffusers/schedulers/scheduling_heun_discrete.py,sha256=qZXmloRXXDx0F6RSMFlizuQBmErBW5czfbu1Zzm2cx4,31918 +diffusers/schedulers/scheduling_ipndm.py,sha256=oOFmht5IDeCUWQNcX_e6ia4aTpbfbRBOI40LelfPa8Y,10694 +diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py,sha256=EHPpvxHZDwjz56BnzEkZ0GShzAKDhHj57HIKZVyMeYU,31474 +diffusers/schedulers/scheduling_k_dpm_2_discrete.py,sha256=ynp9ku2QCGas1fFLvAgpH5EunBvnAeNMd_FAaauiDn0,30010 +diffusers/schedulers/scheduling_karras_ve_flax.py,sha256=gxbsvzhcSky6d0ESZmOhoLweDNNtetsFsSL2nvDZJ6k,9808 +diffusers/schedulers/scheduling_lcm.py,sha256=cX0j8v3iNaCDTrWWdKt5tDatKlDiGzof5pmDbviNtwc,34857 +diffusers/schedulers/scheduling_lms_discrete.py,sha256=K2Fi9vi865sdWU6HvZVbMa5Mj3yxk3zKokbdfwH2Ekk,29201 +diffusers/schedulers/scheduling_lms_discrete_flax.py,sha256=blQRw9rhLo1cPupvp9jJCk-v273xOAzns2LDvNXvtW4,11460 +diffusers/schedulers/scheduling_ltx_euler_ancestral_rf.py,sha256=dN0vKo15g3TGRZb_7mGvD1f7LahAh7XFsPLuvTRcesA,16454 +diffusers/schedulers/scheduling_pndm.py,sha256=vQjy6c4wkHi0QfV6c9kyBB-VkLdNU8JrxRBLcxbJ-Ko,22905 +diffusers/schedulers/scheduling_pndm_flax.py,sha256=8_g6e31tQ97MwTNwPdbD7BFsimGPfUQj4Hirs17FCtw,21891 +diffusers/schedulers/scheduling_repaint.py,sha256=ap-US8I-ymEiFc5ZozgyPGcibSaxs1Oh1rr7G9V5i1E,16078 +diffusers/schedulers/scheduling_sasolver.py,sha256=Irr_n9nIXV_5gLaAtyoCgsECuBNzHBSsjLR2js7_k74,59184 +diffusers/schedulers/scheduling_scm.py,sha256=0Yh74PwE82_rI2k4xRBaUZoIIcpyY3hgIUYwswcqozo,12284 +diffusers/schedulers/scheduling_sde_ve.py,sha256=5d1Xiy5AmHlEVPngd3falng4lvdXZ_s9X2ckDXuDueU,13231 +diffusers/schedulers/scheduling_sde_ve_flax.py,sha256=sEWgb6IFH3aKkQR-A3nTS44PZ72-nyMpVa6tOtC1EXE,12393 +diffusers/schedulers/scheduling_tcd.py,sha256=bL0vA0aICPCdGWDdKdgXKC6a3qWked_RrQpnw2JKzcM,38372 +diffusers/schedulers/scheduling_unclip.py,sha256=WFFHpLhu45Gzb5tCckGSN0LDDK47NwIA20ewaGuyL_8,15939 +diffusers/schedulers/scheduling_unipc_multistep.py,sha256=AIXPoH7IJ2Yu2l8ZY4at0zKq80-JVK1SOAoHCzX5zSI,56088 +diffusers/schedulers/scheduling_utils.py,sha256=Quc85UiJdnsnoSxVjROXrQVX6E9s6q5t79bPKzbr7bA,8627 +diffusers/schedulers/scheduling_utils_flax.py,sha256=rlrGIwg0cHC5rELupDpISPIphLPL8TdQzKBFHtbqg3s,12309 +diffusers/schedulers/scheduling_vq_diffusion.py,sha256=4bBTHT36LiwTXjIr36oshZzjrl6KJpIZyMKuIKG6oMU,22894 +diffusers/training_utils.py,sha256=JzhecE3Hr6s4pdSI6OG4kV3gPlZ7lAMAZiU4W3YAc3A,32646 +diffusers/utils/__init__.py,sha256=JeA3Rq-qmMFxNVlJk7N-hKZGJ0jT4hDNBV4X-LEXK1g,5149 +diffusers/utils/__pycache__/__init__.cpython-312.pyc,, +diffusers/utils/__pycache__/accelerate_utils.cpython-312.pyc,, +diffusers/utils/__pycache__/constants.cpython-312.pyc,, +diffusers/utils/__pycache__/deprecation_utils.cpython-312.pyc,, +diffusers/utils/__pycache__/distributed_utils.cpython-312.pyc,, +diffusers/utils/__pycache__/doc_utils.cpython-312.pyc,, +diffusers/utils/__pycache__/dummy_bitsandbytes_objects.cpython-312.pyc,, +diffusers/utils/__pycache__/dummy_flax_and_transformers_objects.cpython-312.pyc,, +diffusers/utils/__pycache__/dummy_flax_objects.cpython-312.pyc,, +diffusers/utils/__pycache__/dummy_gguf_objects.cpython-312.pyc,, +diffusers/utils/__pycache__/dummy_note_seq_objects.cpython-312.pyc,, +diffusers/utils/__pycache__/dummy_nvidia_modelopt_objects.cpython-312.pyc,, +diffusers/utils/__pycache__/dummy_onnx_objects.cpython-312.pyc,, +diffusers/utils/__pycache__/dummy_optimum_quanto_objects.cpython-312.pyc,, +diffusers/utils/__pycache__/dummy_pt_objects.cpython-312.pyc,, +diffusers/utils/__pycache__/dummy_torch_and_librosa_objects.cpython-312.pyc,, +diffusers/utils/__pycache__/dummy_torch_and_scipy_objects.cpython-312.pyc,, +diffusers/utils/__pycache__/dummy_torch_and_torchsde_objects.cpython-312.pyc,, +diffusers/utils/__pycache__/dummy_torch_and_transformers_and_onnx_objects.cpython-312.pyc,, +diffusers/utils/__pycache__/dummy_torch_and_transformers_and_opencv_objects.cpython-312.pyc,, +diffusers/utils/__pycache__/dummy_torch_and_transformers_and_sentencepiece_objects.cpython-312.pyc,, +diffusers/utils/__pycache__/dummy_torch_and_transformers_objects.cpython-312.pyc,, +diffusers/utils/__pycache__/dummy_torchao_objects.cpython-312.pyc,, +diffusers/utils/__pycache__/dummy_transformers_and_torch_and_note_seq_objects.cpython-312.pyc,, +diffusers/utils/__pycache__/dynamic_modules_utils.cpython-312.pyc,, +diffusers/utils/__pycache__/export_utils.cpython-312.pyc,, +diffusers/utils/__pycache__/hub_utils.cpython-312.pyc,, +diffusers/utils/__pycache__/import_utils.cpython-312.pyc,, +diffusers/utils/__pycache__/loading_utils.cpython-312.pyc,, +diffusers/utils/__pycache__/logging.cpython-312.pyc,, +diffusers/utils/__pycache__/outputs.cpython-312.pyc,, +diffusers/utils/__pycache__/peft_utils.cpython-312.pyc,, +diffusers/utils/__pycache__/pil_utils.cpython-312.pyc,, +diffusers/utils/__pycache__/remote_utils.cpython-312.pyc,, +diffusers/utils/__pycache__/source_code_parsing_utils.cpython-312.pyc,, +diffusers/utils/__pycache__/state_dict_utils.cpython-312.pyc,, +diffusers/utils/__pycache__/testing_utils.cpython-312.pyc,, +diffusers/utils/__pycache__/torch_utils.cpython-312.pyc,, +diffusers/utils/__pycache__/typing_utils.cpython-312.pyc,, +diffusers/utils/__pycache__/versions.cpython-312.pyc,, +diffusers/utils/accelerate_utils.py,sha256=ZkopOK29_6QrrdkIPidHnO078B8BtzzFSts3ul2cP8Q,1839 +diffusers/utils/constants.py,sha256=JBlMYowqsTlXqUqZqE1dJuOxIplhvGWMwL4NANgm2Xg,3858 +diffusers/utils/deprecation_utils.py,sha256=YgzNTWN9ZtyzpBVwva7uzNjJ9LTJpr53XT93pepxUiw,3603 +diffusers/utils/distributed_utils.py,sha256=53-rABpIZoMrCFMFRDSe5C87jdzTfkO-ym28GUxYR3k,1045 +diffusers/utils/doc_utils.py,sha256=3x-UhJrm7eNcJiQAYnYefoH7Q-V2qPmc04TzbEr25-o,1348 +diffusers/utils/dummy_bitsandbytes_objects.py,sha256=7uoVirIvcuylaDyUas94Wc9AAKJMJS3GEv-in3bEqJA,527 +diffusers/utils/dummy_flax_and_transformers_objects.py,sha256=XyiqnjacRb86sS9F_VwniBrLLEmff2cgJM2X4T_RAg4,2358 +diffusers/utils/dummy_flax_objects.py,sha256=EIyO7jYPH4yjuBIxysZWE0rka3qPLEl1TmMBt5SwXNA,5316 +diffusers/utils/dummy_gguf_objects.py,sha256=H0SYZuOON9cFlkyYSTcUJJk4skYjgjIu-wDausCm0sU,499 +diffusers/utils/dummy_note_seq_objects.py,sha256=DffX40mDzWTMCyYhKudgIeBhtqTSpiSkVzcAMRue8dY,506 +diffusers/utils/dummy_nvidia_modelopt_objects.py,sha256=Dmn8fqZkPiiw2YdTYh-WFHFTZLfS-97Vaf8-87xXZ80,541 +diffusers/utils/dummy_onnx_objects.py,sha256=4Z61m3P9NUwbebsK58wAKs6y32Id6UaiSRyeHXo3ecA,493 +diffusers/utils/dummy_optimum_quanto_objects.py,sha256=_k-3g7WAYcJO0-38rJrHX7aIAzkkICtj2ISiggVFiz8,529 +diffusers/utils/dummy_pt_objects.py,sha256=JKmZ-opsSvAUlq4m-9SWOfTdFtMBIDjGIleOWTzKL8I,77047 +diffusers/utils/dummy_torch_and_librosa_objects.py,sha256=JUfqU2n3tSKHyWbjSXrpdW_jr-YbMxAvAhLlPa2_Rxs,948 +diffusers/utils/dummy_torch_and_scipy_objects.py,sha256=zOLdmqbtma5nakkdYgoErISV28yaztmBLI3wrC2Z_bU,537 +diffusers/utils/dummy_torch_and_torchsde_objects.py,sha256=EJiExfXva8tnRJEn-VaCkcII31WnPr2HqdTh3PBQ-jk,985 +diffusers/utils/dummy_torch_and_transformers_and_onnx_objects.py,sha256=SiKni7YZ-pmZrurHU3-lhbDGKOGCCVxSK3GJbrARqgU,3023 +diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py,sha256=Hvskt_HEoCkRikDyiYWQ95CsSv0fX7jYqxDxjJuNIZc,601 +diffusers/utils/dummy_torch_and_transformers_and_sentencepiece_objects.py,sha256=rauUQkG4sLSyBVeEbfp5fhJFJUGqw273oXbN_KC8NIM,1637 +diffusers/utils/dummy_torch_and_transformers_objects.py,sha256=UDdSLquqwYH9bQFiB5zJraDGGwKrfxBnLs9THzL8dXQ,127294 +diffusers/utils/dummy_torchao_objects.py,sha256=XiJAoV11rr_7aSgkb0vgYkdZOGjM1ouTuQZ4YrXzJ4g,502 +diffusers/utils/dummy_transformers_and_torch_and_note_seq_objects.py,sha256=z-JrPgPo2dWv-buMytUqBd6QqEx8Uha6M1cKa6gR4Dc,621 +diffusers/utils/dynamic_modules_utils.py,sha256=18jhJaObFexuBP0YXseu6sWQmsTLOXO0mCAxDOm6JBo,22210 +diffusers/utils/export_utils.py,sha256=iJ5R-Z4vb429RJAu1YxLn0WJ2Pc6J7bqwf8hqvJ4pWc,7690 +diffusers/utils/hub_utils.py,sha256=BRxqLLLZLvYUwQ6QVXpByGHD8x0Zpo9lJ9cvi6tnC80,25616 +diffusers/utils/import_utils.py,sha256=7J06VZh4e7G5OSaq-GWy0lSgiy0L8S1xsysFuIrIneE,34826 +diffusers/utils/loading_utils.py,sha256=n4A1J-theDPKK3UJr6wniiUojRrKmGz-OtJeDEZnWyM,5258 +diffusers/utils/logging.py,sha256=7CvYmxNX0AB2AZ_keQi3DURJpl8n_ZrVKao4i4baYKI,10165 +diffusers/utils/model_card_template.md,sha256=ZhGhzjnMT2oPLbHnC0UYRNEpVC-okH-MLKjvkYsh-Ds,550 +diffusers/utils/outputs.py,sha256=dmC0Okh_TULD-cS4kDR_wDOw1molF58bLvEjp2_0L2s,5002 +diffusers/utils/peft_utils.py,sha256=eNF0iu7CpzD2Ob3t3FyYjJ8W44nEINWtixUKUJeMJtw,16115 +diffusers/utils/pil_utils.py,sha256=2Rs6pMDhEpl-SkBhCZubA5p14_VxENZKhPQ-rwRTDwI,1954 +diffusers/utils/remote_utils.py,sha256=NdVmHQ39P42vgJAO7aOf2RkxB6LEl9WS84sIrlVQ_2M,16018 +diffusers/utils/source_code_parsing_utils.py,sha256=Mk4KHfymwXHYmV2zZdVsDSn-VJ97HvUV6fGfYSuYn8g,1863 +diffusers/utils/state_dict_utils.py,sha256=_OFCs1ZMwcMa2ptfrnBNG0g0Oe_rNiD258iekNnsdv0,23111 +diffusers/utils/testing_utils.py,sha256=7FX7U9GGLdKU45zT4Uze4fRl9k3WxDWNDyVs4ERgk9k,58984 +diffusers/utils/torch_utils.py,sha256=vI--UmbF_nqCv9nFdkwuyTECS0aVvuQz_1OXnXPmgn8,12207 +diffusers/utils/typing_utils.py,sha256=rZhOlShahWwZAXSmYuz6MdINrJ8Sbc1p8rhdlA_msJU,3394 +diffusers/utils/versions.py,sha256=dG1y8xBZK2CgfrFle8p0-bJXf_4xRbWIpDmZe8XC9zA,4302 +diffusers/video_processor.py,sha256=_HV3db5ujuhXjdZ3-aOb5xp-Y8RnkfJYjc_duoVBFVw,8353 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/REQUESTED b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/REQUESTED new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/WHEEL b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..e7fa31b6f3f78deb1022c1f7927f07d4d16da822 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: setuptools (80.9.0) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/entry_points.txt b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/entry_points.txt new file mode 100644 index 0000000000000000000000000000000000000000..4d8cdefa4968bbed7a122fa7a34940bf9a15360b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +diffusers-cli = diffusers.commands.diffusers_cli:main diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/top_level.txt b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..6033efb6dbaaff9bc81792fd75a6b39d9f195aeb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/diffusers-0.37.0.dist-info/top_level.txt @@ -0,0 +1 @@ +diffusers diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/einops-0.8.1.dist-info/INSTALLER b/URSA/.venv_ursa/lib/python3.12/site-packages/einops-0.8.1.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/einops-0.8.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/einops-0.8.1.dist-info/METADATA b/URSA/.venv_ursa/lib/python3.12/site-packages/einops-0.8.1.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..b5fe4314594eb36a75885c03f635a6dfc587e9bf --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/einops-0.8.1.dist-info/METADATA @@ -0,0 +1,384 @@ +Metadata-Version: 2.4 +Name: einops +Version: 0.8.1 +Summary: A new flavour of deep learning operations +Project-URL: Homepage, https://github.com/arogozhnikov/einops +Author: Alex Rogozhnikov +License: MIT +License-File: LICENSE +Keywords: deep learning,einops,machine learning,neural networks,scientific computations,tensor manipulation +Classifier: Intended Audience :: Science/Research +Classifier: License :: OSI Approved :: MIT License +Classifier: Programming Language :: Python :: 3 +Requires-Python: >=3.8 +Description-Content-Type: text/markdown + + + + + + +https://user-images.githubusercontent.com/6318811/177030658-66f0eb5d-e136-44d8-99c9-86ae298ead5b.mp4 + + + + +# einops +[![Run tests](https://github.com/arogozhnikov/einops/actions/workflows/run_tests.yml/badge.svg)](https://github.com/arogozhnikov/einops/actions/workflows/run_tests.yml) +[![PyPI version](https://badge.fury.io/py/einops.svg)](https://badge.fury.io/py/einops) +[![Documentation](https://img.shields.io/badge/documentation-link-blue.svg)](https://einops.rocks/) +![Supported python versions](https://raw.githubusercontent.com/arogozhnikov/einops/main/docs/resources/python_badge.svg) + + +Flexible and powerful tensor operations for readable and reliable code.
+Supports numpy, pytorch, tensorflow, jax, and [others](#supported-frameworks). + +## Recent updates: + +- 0.8.0: tinygrad backend added, small fixes +- 0.7.0: no-hassle `torch.compile`, support of [array api standard](https://data-apis.org/array-api/latest/API_specification/index.html) and more +- 10'000🎉: github reports that more than 10k project use einops +- einops 0.6.1: paddle backend added +- einops 0.6 introduces [packing and unpacking](https://github.com/arogozhnikov/einops/blob/main/docs/4-pack-and-unpack.ipynb) +- einops 0.5: einsum is now a part of einops +- [Einops paper](https://openreview.net/pdf?id=oapKSVM2bcj) is accepted for oral presentation at ICLR 2022 (yes, it worth reading). + Talk recordings are [available](https://iclr.cc/virtual/2022/oral/6603) + + +
+Previous updates +- flax and oneflow backend added +- torch.jit.script is supported for pytorch layers +- powerful EinMix added to einops. [Einmix tutorial notebook](https://github.com/arogozhnikov/einops/blob/main/docs/3-einmix-layer.ipynb) +
+ + + + +## Tweets + +> In case you need convincing arguments for setting aside time to learn about einsum and einops... +[Tim Rocktäschel](https://twitter.com/_rockt/status/1230818967205425152) + +> Writing better code with PyTorch and einops 👌 +[Andrej Karpathy](https://twitter.com/karpathy/status/1290826075916779520) + +> Slowly but surely, einops is seeping in to every nook and cranny of my code. If you find yourself shuffling around bazillion dimensional tensors, this might change your life +[Nasim Rahaman](https://twitter.com/nasim_rahaman/status/1216022614755463169) + +[More testimonials](https://einops.rocks/pages/testimonials/) + + +## Contents + +- [Installation](#Installation) +- [Documentation](https://einops.rocks/) +- [Tutorial](#Tutorials) +- [API micro-reference](#API) +- [Why use einops](#Why-use-einops-notation) +- [Supported frameworks](#Supported-frameworks) +- [Citing](#Citing) +- [Repository](https://github.com/arogozhnikov/einops) and [discussions](https://github.com/arogozhnikov/einops/discussions) + +## Installation + +Plain and simple: +```bash +pip install einops +``` + +## Tutorials + +Tutorials are the most convenient way to see `einops` in action + +- part 1: [einops fundamentals](https://github.com/arogozhnikov/einops/blob/main/docs/1-einops-basics.ipynb) +- part 2: [einops for deep learning](https://github.com/arogozhnikov/einops/blob/main/docs/2-einops-for-deep-learning.ipynb) +- part 3: [packing and unpacking](https://github.com/arogozhnikov/einops/blob/main/docs/4-pack-and-unpack.ipynb) +- part 4: [improve pytorch code with einops](http://einops.rocks/pytorch-examples.html) + +Kapil Sachdeva recorded a small [intro to einops](https://www.youtube.com/watch?v=xGy75Pjsqzo). + +## API + +`einops` has a minimalistic yet powerful API. + +Three core operations provided ([einops tutorial](https://github.com/arogozhnikov/einops/blob/main/docs/) +shows those cover stacking, reshape, transposition, squeeze/unsqueeze, repeat, tile, concatenate, view and numerous reductions) + +```python +from einops import rearrange, reduce, repeat +# rearrange elements according to the pattern +output_tensor = rearrange(input_tensor, 't b c -> b c t') +# combine rearrangement and reduction +output_tensor = reduce(input_tensor, 'b c (h h2) (w w2) -> b h w c', 'mean', h2=2, w2=2) +# copy along a new axis +output_tensor = repeat(input_tensor, 'h w -> h w c', c=3) +``` + +Later additions to the family are `pack` and `unpack` functions (better than stack/split/concatenate): + +```python +from einops import pack, unpack +# pack and unpack allow reversibly 'packing' multiple tensors into one. +# Packed tensors may be of different dimensionality: +packed, ps = pack([class_token_bc, image_tokens_bhwc, text_tokens_btc], 'b * c') +class_emb_bc, image_emb_bhwc, text_emb_btc = unpack(transformer(packed), ps, 'b * c') +``` + +Finally, einops provides einsum with a support of multi-lettered names: + +```python +from einops import einsum, pack, unpack +# einsum is like ... einsum, generic and flexible dot-product +# but 1) axes can be multi-lettered 2) pattern goes last 3) works with multiple frameworks +C = einsum(A, B, 'b t1 head c, b t2 head c -> b head t1 t2') +``` + +### EinMix + +`EinMix` is a generic linear layer, perfect for MLP Mixers and similar architectures. + +### Layers + +Einops provides layers (`einops` keeps a separate version for each framework) that reflect corresponding functions + +```python +from einops.layers.torch import Rearrange, Reduce +from einops.layers.tensorflow import Rearrange, Reduce +from einops.layers.flax import Rearrange, Reduce +from einops.layers.paddle import Rearrange, Reduce +``` + +
+Example of using layers within a pytorch model +Example given for pytorch, but code in other frameworks is almost identical + +```python +from torch.nn import Sequential, Conv2d, MaxPool2d, Linear, ReLU +from einops.layers.torch import Rearrange + +model = Sequential( + ..., + Conv2d(6, 16, kernel_size=5), + MaxPool2d(kernel_size=2), + # flattening without need to write forward + Rearrange('b c h w -> b (c h w)'), + Linear(16*5*5, 120), + ReLU(), + Linear(120, 10), +) +``` + +No more flatten needed! + +Additionally, torch layers as those are script-able and compile-able. +Operations [are torch.compile-able](https://github.com/arogozhnikov/einops/wiki/Using-torch.compile-with-einops), + but not script-able due to limitations of torch.jit.script. +
+ + + + +## Naming + +`einops` stands for Einstein-Inspired Notation for operations +(though "Einstein operations" is more attractive and easier to remember). + +Notation was loosely inspired by Einstein summation (in particular by `numpy.einsum` operation). + +## Why use `einops` notation?! + + +### Semantic information (being verbose in expectations) + +```python +y = x.view(x.shape[0], -1) +y = rearrange(x, 'b c h w -> b (c h w)') +``` +While these two lines are doing the same job in *some* context, +the second one provides information about the input and output. +In other words, `einops` focuses on interface: *what is the input and output*, not *how* the output is computed. + +The next operation looks similar: + +```python +y = rearrange(x, 'time c h w -> time (c h w)') +``` +but it gives the reader a hint: +this is not an independent batch of images we are processing, +but rather a sequence (video). + +Semantic information makes the code easier to read and maintain. + +### Convenient checks + +Reconsider the same example: + +```python +y = x.view(x.shape[0], -1) # x: (batch, 256, 19, 19) +y = rearrange(x, 'b c h w -> b (c h w)') +``` +The second line checks that the input has four dimensions, +but you can also specify particular dimensions. +That's opposed to just writing comments about shapes since comments don't prevent mistakes, +not tested, and without code review tend to be outdated +```python +y = x.view(x.shape[0], -1) # x: (batch, 256, 19, 19) +y = rearrange(x, 'b c h w -> b (c h w)', c=256, h=19, w=19) +``` + +### Result is strictly determined + +Below we have at least two ways to define the depth-to-space operation +```python +# depth-to-space +rearrange(x, 'b c (h h2) (w w2) -> b (c h2 w2) h w', h2=2, w2=2) +rearrange(x, 'b c (h h2) (w w2) -> b (h2 w2 c) h w', h2=2, w2=2) +``` +There are at least four more ways to do it. Which one is used by the framework? + +These details are ignored, since *usually* it makes no difference, +but it can make a big difference (e.g. if you use grouped convolutions in the next stage), +and you'd like to specify this in your code. + + +### Uniformity + +```python +reduce(x, 'b c (x dx) -> b c x', 'max', dx=2) +reduce(x, 'b c (x dx) (y dy) -> b c x y', 'max', dx=2, dy=3) +reduce(x, 'b c (x dx) (y dy) (z dz) -> b c x y z', 'max', dx=2, dy=3, dz=4) +``` +These examples demonstrated that we don't use separate operations for 1d/2d/3d pooling, +those are all defined in a uniform way. + +Space-to-depth and depth-to space are defined in many frameworks but how about width-to-height? Here you go: + +```python +rearrange(x, 'b c h (w w2) -> b c (h w2) w', w2=2) +``` + + +### Framework independent behavior + +Even simple functions are defined differently by different frameworks + +```python +y = x.flatten() # or flatten(x) +``` + +Suppose `x`'s shape was `(3, 4, 5)`, then `y` has shape ... + +- numpy, pytorch, cupy, chainer, jax: `(60,)` +- keras, tensorflow.layers, gluon: `(3, 20)` + +`einops` works the same way in all frameworks. + + +### Independence of framework terminology + +Example: `tile` vs `repeat` causes lots of confusion. To copy image along width: +```python +np.tile(image, (1, 2)) # in numpy +image.repeat(1, 2) # pytorch's repeat ~ numpy's tile +``` + +With einops you don't need to decipher which axis was repeated: +```python +repeat(image, 'h w -> h (tile w)', tile=2) # in numpy +repeat(image, 'h w -> h (tile w)', tile=2) # in pytorch +repeat(image, 'h w -> h (tile w)', tile=2) # in tf +repeat(image, 'h w -> h (tile w)', tile=2) # in jax +repeat(image, 'h w -> h (tile w)', tile=2) # in cupy +... (etc.) +``` + +[Testimonials](https://einops.rocks/pages/testimonials/) provide users' perspective on the same question. + + +## Supported frameworks + +Einops works with ... + +- [numpy](http://www.numpy.org/) +- [pytorch](https://pytorch.org/) +- [tensorflow](https://www.tensorflow.org/) +- [jax](https://github.com/google/jax) +- [cupy](https://github.com/cupy/cupy) +- [flax](https://github.com/google/flax) (community) +- [paddle](https://github.com/PaddlePaddle/Paddle) (community) +- [oneflow](https://github.com/Oneflow-Inc/oneflow) (community) +- [tinygrad](https://github.com/tinygrad/tinygrad) (community) +- [pytensor](https://github.com/pymc-devs/pytensor) (community) + +Additionally, einops can be used with any framework that supports +[Python array API standard](https://data-apis.org/array-api/latest/API_specification/index.html), +which includes + +- numpy >= 2.0 +- [MLX](https://github.com/ml-explore/mlx) +- [pydata/sparse](https://github.com/pydata/sparse) >= 0.15 +- [quantco/ndonnx](https://github.com/Quantco/ndonnx) +- recent releases of jax and cupy. +- dask is supported via [array-api-compat](https://github.com/data-apis/array-api-compat) + + +## Development + +Devcontainer is provided, this environment can be used locally, or on your server, +or within github codespaces. +To start with devcontainers in vs code, clone repo, and click 'Reopen in Devcontainer'. + +Starting from the next version, einops will distribute tests as a part of package. +To run tests: + +```bash +# pip install einops +python -m einops.tests.run_tests numpy pytorch jax --pip-install +``` + +`numpy pytorch jax` is an example, any subset of testable frameworks can be provided. +Every framework is tested against numpy, so it is a requirement for tests. + +Specifying `--pip-install` will install requirements in current virtualenv, +and should be omitted if dependencies are installed locally. + +To build/test docs: + +```bash +hatch run docs:serve # Serving on http://localhost:8000/ +``` + + +## Citing einops + +Please use the following bibtex record + +```text +@inproceedings{ + rogozhnikov2022einops, + title={Einops: Clear and Reliable Tensor Manipulations with Einstein-like Notation}, + author={Alex Rogozhnikov}, + booktitle={International Conference on Learning Representations}, + year={2022}, + url={https://openreview.net/forum?id=oapKSVM2bcj} +} +``` + + +## Supported python versions + +`einops` works with python 3.8 or later. diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/einops-0.8.1.dist-info/RECORD b/URSA/.venv_ursa/lib/python3.12/site-packages/einops-0.8.1.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..0f00988b645f65ba70cc25d6db49355cec4dc237 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/einops-0.8.1.dist-info/RECORD @@ -0,0 +1,59 @@ +einops-0.8.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +einops-0.8.1.dist-info/METADATA,sha256=eVKVWQRWA8zSdxPIRPK1ckfEjpMTUHe9uQS8U-0YlAM,13451 +einops-0.8.1.dist-info/RECORD,, +einops-0.8.1.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +einops-0.8.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87 +einops-0.8.1.dist-info/licenses/LICENSE,sha256=MNmENkKW9R_67K1LAe4SfpUlDFBokY1LZvyWIGcj5DQ,1073 +einops/__init__.py,sha256=rKyvE64bYMOMXAG4EfMMSVGxgFZ0x51dt82pRsw4lHE,422 +einops/__pycache__/__init__.cpython-312.pyc,, +einops/__pycache__/_backends.cpython-312.pyc,, +einops/__pycache__/_torch_specific.cpython-312.pyc,, +einops/__pycache__/array_api.cpython-312.pyc,, +einops/__pycache__/einops.cpython-312.pyc,, +einops/__pycache__/packing.cpython-312.pyc,, +einops/__pycache__/parsing.cpython-312.pyc,, +einops/_backends.py,sha256=7BAiKWdBLLvAiryfhwDbwxglGDW9Dh5FNMT1ERSuzxk,21281 +einops/_torch_specific.py,sha256=yMaQeqAZhBLWR1Q-Jv6uRINJfzROhLb-rzKKevpefUU,4138 +einops/array_api.py,sha256=jOb8RhwLS9wob_Y_e_KrnBR6ihQPoB2Ly0tfrHr-_Zk,5247 +einops/einops.py,sha256=sXvD8SWFqufziyQJKRPmfAGHVN1cMDvYOPNuZ8L1XQU,37569 +einops/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +einops/experimental/__pycache__/__init__.cpython-312.pyc,, +einops/experimental/__pycache__/indexing.cpython-312.pyc,, +einops/experimental/indexing.py,sha256=yFFflW3-kV6_5PPJU7_jOJsJBCWCWlE4dGlu9gwSPXo,121 +einops/layers/__init__.py,sha256=vBtnAt2afs4QlqpeFU4dlZNxBuC9IXl3fmilk-2OzHM,3747 +einops/layers/__pycache__/__init__.cpython-312.pyc,, +einops/layers/__pycache__/_einmix.cpython-312.pyc,, +einops/layers/__pycache__/flax.cpython-312.pyc,, +einops/layers/__pycache__/keras.cpython-312.pyc,, +einops/layers/__pycache__/oneflow.cpython-312.pyc,, +einops/layers/__pycache__/paddle.cpython-312.pyc,, +einops/layers/__pycache__/tensorflow.cpython-312.pyc,, +einops/layers/__pycache__/torch.cpython-312.pyc,, +einops/layers/_einmix.py,sha256=kNyW05BEG-miMpu1vGQqKXr8OJMe-xweS_aMUguerqM,11125 +einops/layers/flax.py,sha256=zFy83gSLRm31cLuKFRvZ82_HsefnXPbRvkKZh1KkC1I,2536 +einops/layers/keras.py,sha256=-7So0w94phvf9HdW0xi2mSeBg02qVPvAyfp_1XR02NM,212 +einops/layers/oneflow.py,sha256=YEPzz4xc7BDRQfb8ulD3teqQJdbO6qQg7Z4KIPVTLz8,1864 +einops/layers/paddle.py,sha256=8cRZQ8BT9vYEczh7pNProuTM_3XjLty2ht2sdvXNFiI,1907 +einops/layers/tensorflow.py,sha256=T9uhSVwbXREahc31ARAHoN5K-7zsuS8NRNPdY6Zk1Bc,3324 +einops/layers/torch.py,sha256=504G99kEgy7dk1UPBbj9hzJmZkAHwVhMDFN_8J-p3C8,2399 +einops/packing.py,sha256=1wN9vMa-coq1RiOR_Nu2aXqgIkrDf3j-N1Q-YN0-xWo,7650 +einops/parsing.py,sha256=xbqcvwReLiROEucoegZ20WQiEHlLg0uxo_vYoezKB_4,6746 +einops/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +einops/tests/__init__.py,sha256=CLKCGvFTCwmUoUxfa-JGHQQL9c43rTiqienVjT18Phc,3525 +einops/tests/__pycache__/__init__.cpython-312.pyc,, +einops/tests/__pycache__/run_tests.cpython-312.pyc,, +einops/tests/__pycache__/test_einsum.cpython-312.pyc,, +einops/tests/__pycache__/test_examples.cpython-312.pyc,, +einops/tests/__pycache__/test_layers.cpython-312.pyc,, +einops/tests/__pycache__/test_ops.cpython-312.pyc,, +einops/tests/__pycache__/test_other.cpython-312.pyc,, +einops/tests/__pycache__/test_packing.cpython-312.pyc,, +einops/tests/__pycache__/test_parsing.cpython-312.pyc,, +einops/tests/run_tests.py,sha256=V8q7_Y9dMksnLsqobqNHbR0j8dVCvo1hOeB2AjQ83No,2887 +einops/tests/test_einsum.py,sha256=efS7ynJvwI3VFfTzpYruWnKYhxWyxfRzdbXsMnZFHWM,10977 +einops/tests/test_examples.py,sha256=psd__CWOcX2J5APXxsFRvXlpKm9yioTQEnEebEStPKc,11538 +einops/tests/test_layers.py,sha256=oQVfU3GzsXq_dJV3ZkYBJGp8EumJEtPRB0eRg6paLa8,18488 +einops/tests/test_ops.py,sha256=oljzJ2N4k75QjSthaOAR4k57j0yBKjuSrDac7JuZHak,26998 +einops/tests/test_other.py,sha256=ag_h6oz3gMH8Mkd4CR5QXXw9kmeYEefUaGobXEgHjMM,11350 +einops/tests/test_packing.py,sha256=PdIOvHr1K7d4F5s1MtCAOIrIkvAniJBw5wnbnpDx3xk,10444 +einops/tests/test_parsing.py,sha256=XKFIe10Flpa5gTE1lHXeYt1GKroXkUI-b18Pat_uQeE,4389 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/einops-0.8.1.dist-info/REQUESTED b/URSA/.venv_ursa/lib/python3.12/site-packages/einops-0.8.1.dist-info/REQUESTED new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/einops-0.8.1.dist-info/WHEEL b/URSA/.venv_ursa/lib/python3.12/site-packages/einops-0.8.1.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..12228d414b6cfed7c39d3781c85c63256a1d7fb5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/einops-0.8.1.dist-info/WHEEL @@ -0,0 +1,4 @@ +Wheel-Version: 1.0 +Generator: hatchling 1.27.0 +Root-Is-Purelib: true +Tag: py3-none-any diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/einops/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/einops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4e67507733b744cee9ccab44e6299312d11ba3e1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/einops/__init__.py @@ -0,0 +1,17 @@ +# imports can use EinopsError class +# ruff: noqa: E402 + +__author__ = "Alex Rogozhnikov" +__version__ = "0.8.1" + + +class EinopsError(RuntimeError): + """Runtime error thrown by einops""" + + pass + + +__all__ = ["rearrange", "reduce", "repeat", "einsum", "pack", "unpack", "parse_shape", "asnumpy", "EinopsError"] + +from .einops import rearrange, reduce, repeat, einsum, parse_shape, asnumpy +from .packing import pack, unpack diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/einops/_backends.py b/URSA/.venv_ursa/lib/python3.12/site-packages/einops/_backends.py new file mode 100644 index 0000000000000000000000000000000000000000..3d2f9160b96af7cef112887fe8deceb440391ffd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/einops/_backends.py @@ -0,0 +1,719 @@ +""" +Backends in `einops` are organized to meet the following requirements +- backends are not imported unless those are actually needed, because + - backends may not be installed + - importing all available backends will drive to significant memory footprint + - backends may be present but installed with errors (but never used), + importing may drive to crashes +- backend should be either symbolic or imperative + - this determines which methods (from_numpy/to_numpy or create_symbol/eval_symbol) should be defined +- if backend can't provide symbols for shape dimensions, UnknownSize objects are used +""" + +import sys + +__author__ = "Alex Rogozhnikov" + +_loaded_backends: dict = {} +_type2backend: dict = {} +_debug_importing = False + + +def get_backend(tensor) -> "AbstractBackend": + """ + Takes a correct backend (e.g. numpy backend if tensor is numpy.ndarray) for a tensor. + If needed, imports package and creates backend + """ + _type = type(tensor) + _result = _type2backend.get(_type, None) + if _result is not None: + return _result + + for framework_name, backend in list(_loaded_backends.items()): + if backend.is_appropriate_type(tensor): + _type2backend[_type] = backend + return backend + + # Find backend subclasses recursively + backend_subclasses = [] + backends = AbstractBackend.__subclasses__() + while backends: + backend = backends.pop() + backends += backend.__subclasses__() + backend_subclasses.append(backend) + + for BackendSubclass in backend_subclasses: + if _debug_importing: + print("Testing for subclass of ", BackendSubclass) + if BackendSubclass.framework_name not in _loaded_backends: + # check that module was already imported. Otherwise it can't be imported + if BackendSubclass.framework_name in sys.modules: + if _debug_importing: + print("Imported backend for ", BackendSubclass.framework_name) + backend = BackendSubclass() + _loaded_backends[backend.framework_name] = backend + if backend.is_appropriate_type(tensor): + _type2backend[_type] = backend + return backend + + raise RuntimeError("Tensor type unknown to einops {}".format(type(tensor))) + + +class AbstractBackend: + """Base backend class, major part of methods are only for debugging purposes.""" + + framework_name: str + + def is_appropriate_type(self, tensor): + """helper method should recognize tensors it can handle""" + raise NotImplementedError() + + def from_numpy(self, x): + raise NotImplementedError("framework doesn't support imperative execution") + + def to_numpy(self, x): + raise NotImplementedError("framework doesn't support imperative execution") + + def create_symbol(self, shape): + raise NotImplementedError("framework doesn't support symbolic computations") + + def eval_symbol(self, symbol, symbol_value_pairs): + # symbol-value pairs is list[tuple[symbol, value-tensor]] + raise NotImplementedError("framework doesn't support symbolic computations") + + def arange(self, start, stop): + # supplementary method used only in testing, so should implement CPU version + raise NotImplementedError("framework doesn't implement arange") + + def shape(self, x): + """shape should return a tuple with integers or "shape symbols" (which will evaluate to actual size)""" + return x.shape + + def reshape(self, x, shape): + return x.reshape(shape) + + def transpose(self, x, axes): + return x.transpose(axes) + + def reduce(self, x, operation, axes): + return getattr(x, operation)(axis=axes) + + def stack_on_zeroth_dimension(self, tensors: list): + raise NotImplementedError() + + def add_axis(self, x, new_position): + raise NotImplementedError() + + def add_axes(self, x, n_axes, pos2len): + repeats = [1] * n_axes + for axis_position, axis_length in pos2len.items(): + x = self.add_axis(x, axis_position) + repeats[axis_position] = axis_length + return self.tile(x, tuple(repeats)) + + def tile(self, x, repeats): + """repeats - same lengths as x.shape""" + raise NotImplementedError() + + def concat(self, tensors, axis: int): + """concatenates tensors along axis. + Assume identical across tensors: devices, dtypes and shapes except selected axis.""" + raise NotImplementedError() + + def is_float_type(self, x): + # some backends (torch) can't compute average for non-floating types. + # Decided to drop average for all backends if type is not floating + raise NotImplementedError() + + def layers(self): + raise NotImplementedError("backend does not provide layers") + + def __repr__(self): + return "".format(self.framework_name) + + def einsum(self, pattern, *x): + raise NotImplementedError("backend does not support einsum") + + +class UnknownSize: + """pseudo-symbol for symbolic frameworks which do not provide symbols for shape elements""" + + def __floordiv__(self, other): + return self + + def __eq__(self, other): + return True # we don't know actual size + + def __mul__(self, other): + return self + + def __rmul__(self, other): + return self + + def __hash__(self): + return hash(None) + + +class NumpyBackend(AbstractBackend): + framework_name = "numpy" + + def __init__(self): + import numpy + + self.np = numpy + + def is_appropriate_type(self, tensor): + return isinstance(tensor, self.np.ndarray) + + def from_numpy(self, x): + return x + + def to_numpy(self, x): + return x + + def arange(self, start, stop): + return self.np.arange(start, stop) + + def stack_on_zeroth_dimension(self, tensors: list): + return self.np.stack(tensors) + + def tile(self, x, repeats): + return self.np.tile(x, repeats) + + def concat(self, tensors, axis: int): + return self.np.concatenate(tensors, axis=axis) + + def is_float_type(self, x): + return x.dtype in ("float16", "float32", "float64", "float128", "bfloat16") + + def add_axis(self, x, new_position): + return self.np.expand_dims(x, new_position) + + def einsum(self, pattern, *x): + return self.np.einsum(pattern, *x) + + +class JaxBackend(NumpyBackend): + framework_name = "jax" + + def __init__(self): + super(JaxBackend, self).__init__() + self.onp = self.np + + import jax.numpy + + self.np = jax.numpy + + def from_numpy(self, x): + return self.np.asarray(x) + + def to_numpy(self, x): + return self.onp.asarray(x) + + +class TorchBackend(AbstractBackend): + framework_name = "torch" + + def __init__(self): + import torch + + self.torch = torch + # importing would register operations in torch._dynamo for torch.compile + from . import _torch_specific # noqa + + def is_appropriate_type(self, tensor): + return isinstance(tensor, self.torch.Tensor) + + def from_numpy(self, x): + variable = self.torch.from_numpy(x) + if self.is_float_type(variable): + # attach grad only to floating types + variable.requires_grad = True + return variable + + def to_numpy(self, x): + return x.detach().cpu().numpy() + + def arange(self, start, stop): + return self.torch.arange(start, stop, dtype=self.torch.int64) + + def reduce(self, x, operation, reduced_axes): + if operation == "min": + return x.amin(dim=reduced_axes) + elif operation == "max": + return x.amax(dim=reduced_axes) + elif operation == "sum": + return x.sum(dim=reduced_axes) + elif operation == "mean": + return x.mean(dim=reduced_axes) + elif operation in ("any", "all", "prod"): + # pytorch supports reducing only one operation at a time + for i in list(sorted(reduced_axes))[::-1]: + x = getattr(x, operation)(dim=i) + return x + else: + raise NotImplementedError("Unknown reduction ", operation) + + def transpose(self, x, axes): + return x.permute(axes) + + def stack_on_zeroth_dimension(self, tensors: list): + return self.torch.stack(tensors) + + def add_axes(self, x, n_axes, pos2len): + repeats = [-1] * n_axes + for axis_position, axis_length in pos2len.items(): + x = self.add_axis(x, axis_position) + repeats[axis_position] = axis_length + return x.expand(repeats) + + def tile(self, x, repeats): + return x.repeat(repeats) + + def concat(self, tensors, axis: int): + return self.torch.cat(tensors, dim=axis) + + def add_axis(self, x, new_position): + return self.torch.unsqueeze(x, new_position) + + def is_float_type(self, x): + return x.dtype in [self.torch.float16, self.torch.float32, self.torch.float64, self.torch.bfloat16] + + def layers(self): + from .layers import torch + + return torch + + def einsum(self, pattern, *x): + return self.torch.einsum(pattern, *x) + + +class CupyBackend(AbstractBackend): + framework_name = "cupy" + + def __init__(self): + import cupy + + self.cupy = cupy + + def is_appropriate_type(self, tensor): + return isinstance(tensor, self.cupy.ndarray) + + def from_numpy(self, x): + return self.cupy.asarray(x) + + def to_numpy(self, x): + return self.cupy.asnumpy(x) + + def arange(self, start, stop): + return self.cupy.arange(start, stop) + + def stack_on_zeroth_dimension(self, tensors: list): + return self.cupy.stack(tensors) + + def tile(self, x, repeats): + return self.cupy.tile(x, repeats) + + def concat(self, tensors, axis: int): + return self.cupy.concatenate(tensors, axis=axis) + + def add_axis(self, x, new_position): + return self.cupy.expand_dims(x, new_position) + + def is_float_type(self, x): + return x.dtype in ("float16", "float32", "float64", "float128", "bfloat16") + + def einsum(self, pattern, *x): + return self.cupy.einsum(pattern, *x) + + +class HashableTuple: + """Overcomes non-hashability of symbolic elements""" + + def __init__(self, elements: tuple): + self.elements = elements + + def __iter__(self): + for x in self.elements: + yield x + + def __len__(self): + return len(self.elements) + + def __getitem__(self, item): + return self.elements[item] + + # default equality and hash is used (True only with itself, hash taken of id) + + +class TensorflowBackend(AbstractBackend): + framework_name = "tensorflow" + + def __init__(self): + import tensorflow + + self.tf = tensorflow + + def is_appropriate_type(self, tensor): + return isinstance(tensor, (self.tf.Tensor, self.tf.Variable)) + + def from_numpy(self, x): + assert self.tf.executing_eagerly() + return self.tf.convert_to_tensor(x) + + def to_numpy(self, x): + assert self.tf.executing_eagerly() + return x.numpy() + + def arange(self, start, stop): + return self.tf.range(start, stop) + + def shape(self, x): + if self.tf.executing_eagerly(): + return tuple(UnknownSize() if d is None else int(d) for d in x.shape) + else: + static_shape = x.shape.as_list() + tf_shape = self.tf.shape(x) + # use the static shape where known, otherwise use the TF shape components + shape = tuple([s or tf_shape[dim] for dim, s in enumerate(static_shape)]) + try: + hash(shape) + return shape + except BaseException: + # unhashable symbols in shape. Wrap tuple to be hashable. + return HashableTuple(shape) + + def reduce(self, x, operation, axes): + return getattr(self.tf, "reduce_" + operation)(x, axis=axes) + + def reshape(self, x, shape): + return self.tf.reshape(x, shape) + + def transpose(self, x, axes): + return self.tf.transpose(x, axes) + + def stack_on_zeroth_dimension(self, tensors: list): + return self.tf.stack(tensors) + + def tile(self, x, repeats): + return self.tf.tile(x, repeats) + + def concat(self, tensors, axis: int): + return self.tf.concat(tensors, axis=axis) + + def add_axis(self, x, new_position): + return self.tf.expand_dims(x, new_position) + + def is_float_type(self, x): + return x.dtype in ("float16", "float32", "float64", "float128", "bfloat16") + + def layers(self): + from .layers import tensorflow + + return tensorflow + + def einsum(self, pattern, *x): + return self.tf.einsum(pattern, *x) + + +class TFKerasBackend(AbstractBackend): + framework_name = "tensorflow.keras" + + def __init__(self): + import tensorflow as tf + + self.tf = tf + self.keras = tf.keras + self.K = tf.keras.backend + + def is_appropriate_type(self, tensor): + return self.tf.is_tensor(tensor) and self.K.is_keras_tensor(tensor) + + def create_symbol(self, shape): + return self.keras.Input(batch_shape=shape) + + def eval_symbol(self, symbol, symbol_value_pairs): + model = self.keras.models.Model([var for (var, _) in symbol_value_pairs], symbol) + return model.predict_on_batch([val for (_, val) in symbol_value_pairs]) + + def arange(self, start, stop): + return self.K.arange(start, stop) + + def shape(self, x): + shape = self.K.shape(x) # tf tensor + return HashableTuple(tuple(shape)) + + def reduce(self, x, operation, axes): + return getattr(self.K, operation)(x, axis=axes) + + def reshape(self, x, shape): + return self.K.reshape(x, shape) + + def transpose(self, x, axes): + return self.K.permute_dimensions(x, axes) + + def stack_on_zeroth_dimension(self, tensors: list): + return self.K.stack(tensors) + + def tile(self, x, repeats): + return self.K.tile(x, repeats) + + def concat(self, tensors, axis: int): + return self.K.concatenate(tensors, axis=axis) + + def add_axis(self, x, new_position): + return self.K.expand_dims(x, new_position) + + def is_float_type(self, x): + return "float" in self.K.dtype(x) + + def layers(self): + from .layers import keras + + return keras + + +class OneFlowBackend(AbstractBackend): + framework_name = "oneflow" + + def __init__(self): + import oneflow as flow + + self.flow = flow + + def is_appropriate_type(self, tensor): + return isinstance(tensor, self.flow.Tensor) + + def from_numpy(self, x): + variable = self.flow.from_numpy(x) + if self.is_float_type(variable): + # attach grad only to floating types + variable.requires_grad = True + return variable + + def to_numpy(self, x): + return x.detach().cpu().numpy() + + def arange(self, start, stop): + return self.flow.arange(start, stop, dtype=self.flow.int64) + + def reduce(self, x, operation, reduced_axes): + for axis in sorted(reduced_axes, reverse=True): + if operation == "min": + x, _ = x.min(dim=axis) + elif operation == "max": + x, _ = x.max(dim=axis) + elif operation in ["sum", "mean", "prod", "any", "all"]: + x = getattr(x, operation)(dim=axis) + else: + raise NotImplementedError("Unknown reduction ", operation) + return x + + def transpose(self, x, axes): + return x.permute(axes) + + def stack_on_zeroth_dimension(self, tensors: list): + return self.flow.stack(tensors) + + def add_axes(self, x, n_axes, pos2len): + repeats = [-1] * n_axes + for axis_position, axis_length in pos2len.items(): + x = self.add_axis(x, axis_position) + repeats[axis_position] = axis_length + return x.expand(*repeats) + + def tile(self, x, repeats): + return x.repeat(repeats) + + def concat(self, tensors, axis: int): + return self.flow.concat(tensors, dim=axis) + + def add_axis(self, x, new_position): + return self.flow.unsqueeze(x, new_position) + + def is_float_type(self, x): + return x.dtype in [self.flow.float16, self.flow.float32, self.flow.float64] + + def layers(self): + from .layers import oneflow + + return oneflow + + def einsum(self, pattern, *x): + return self.flow.einsum(pattern, *x) + + +class PaddleBackend(AbstractBackend): + framework_name = "paddle" + + def __init__(self): + import paddle + + self.paddle = paddle + + def is_appropriate_type(self, tensor): + return self.paddle.is_tensor(tensor) + + def from_numpy(self, x): + tensor = self.paddle.to_tensor(x) + tensor.stop_gradient = False + return tensor + + def to_numpy(self, x): + return x.detach().numpy() + + def arange(self, start, stop): + return self.paddle.arange(start, stop, dtype=self.paddle.int64) + + def reduce(self, x, operation, axes): + if len(axes) == x.ndim: + # currently paddle returns 1d tensor instead of 0d + return super().reduce(x, operation, axes).squeeze(0) + else: + return super().reduce(x, operation, axes) + + def transpose(self, x, axes): + return x.transpose(axes) + + def add_axes(self, x, n_axes, pos2len): + repeats = [-1] * n_axes + for axis_position, axis_length in pos2len.items(): + x = self.add_axis(x, axis_position) + repeats[axis_position] = axis_length + return x.expand(repeats) + + def stack_on_zeroth_dimension(self, tensors: list): + return self.paddle.stack(tensors) + + def reshape(self, x, shape): + return x.reshape(shape) + + def tile(self, x, repeats): + return x.tile(repeats) + + def concat(self, tensors, axis: int): + return self.paddle.concat(tensors, axis=axis) + + def add_axis(self, x, new_position): + return x.unsqueeze(new_position) + + def is_float_type(self, x): + return x.dtype in [self.paddle.float16, self.paddle.float32, self.paddle.float64] + + def layers(self): + from .layers import paddle + + return paddle + + def einsum(self, pattern, *x): + return self.paddle.einsum(pattern, *x) + + def shape(self, x): + return tuple(x.shape) + + +class TinygradBackend(AbstractBackend): + framework_name = "tinygrad" + + def __init__(self): + import tinygrad + + self.tinygrad = tinygrad + + def is_appropriate_type(self, tensor): + return isinstance(tensor, self.tinygrad.Tensor) + + def from_numpy(self, x): + return self.tinygrad.Tensor(x) + + def to_numpy(self, x): + return x.numpy() + + def arange(self, start, stop): + return self.tinygrad.Tensor.arange(start, stop) + + def shape(self, x): + return x.shape + + def reshape(self, x, shape): + return x.reshape(shape) + + def transpose(self, x, axes): + return x.permute(axes) + + def reduce(self, x, operation, axes): + for axis in sorted(axes, reverse=True): + x = getattr(x, operation)(axis=axis) + return x + + def stack_on_zeroth_dimension(self, tensors: list): + return self.tinygrad.Tensor.stack(tensors) + + def add_axis(self, x, new_position): + return x.unsqueeze(new_position) + + def tile(self, x, repeats): + return x.repeat(repeats) + + def concat(self, tensors, axis: int): + return tensors[0].cat(*tensors[1:], dim=axis) if len(tensors) > 1 else tensors[0] + + def is_float_type(self, x): + return self.tinygrad.dtypes.is_float(x.dtype) + + def einsum(self, pattern, *x): + return self.tinygrad.Tensor.einsum(pattern, *x) + + +class PyTensorBackend(AbstractBackend): + framework_name = "pytensor" + + def __init__(self): + from pytensor import tensor + + self.pt = tensor + + def is_appropriate_type(self, tensor): + return isinstance(tensor, self.pt.TensorVariable) + + def is_float_type(self, x): + return x.dtype in self.pt.type.float_dtypes + + def from_numpy(self, x): + return self.pt.as_tensor(x) + + def to_numpy(self, x): + return x.eval() # Will only work if there are no symbolic inputs + + def create_symbol(self, shape): + if not isinstance(shape, tuple | list): + shape = (shape,) + return self.pt.tensor(shape=shape) + + def eval_symbol(self, symbol, symbol_value_pairs): + return symbol.eval(dict(symbol_value_pairs)) + + def arange(self, start, stop): + return self.pt.arange(start, stop) + + def shape(self, x): + # use the static shape dimensions where known + return tuple( + static_dim if static_dim is not None else symbolic_dim + for static_dim, symbolic_dim in zip(x.type.shape, x.shape) + ) + + def stack_on_zeroth_dimension(self, tensors: list): + return self.pt.stack(tensors) + + def tile(self, x, repeats): + return self.pt.tile(x, repeats) + + def concat(self, tensors, axis: int): + return self.pt.concatenate(tensors, axis=axis) + + def add_axis(self, x, new_position): + return self.pt.expand_dims(x, new_position) + + def einsum(self, pattern, *x): + return self.pt.einsum(pattern, *x) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/einops/_torch_specific.py b/URSA/.venv_ursa/lib/python3.12/site-packages/einops/_torch_specific.py new file mode 100644 index 0000000000000000000000000000000000000000..3384c936824bfad459ca83db36a308294bc999b7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/einops/_torch_specific.py @@ -0,0 +1,128 @@ +""" +Specialization of einops for torch. + +Unfortunately, torch's jit scripting mechanism isn't strong enough, +and to have scripting supported at least for layers, +a number of additional moves is needed. + +Design of main operations (dynamic resolution by lookup) is unlikely +to be implemented by torch.jit.script, +but torch.compile seems to work with operations just fine. +""" + +import warnings +from typing import Dict, List, Tuple + +import torch +from einops.einops import TransformRecipe, _reconstruct_from_shape_uncached + + +class TorchJitBackend: + """ + Completely static backend that mimics part of normal backend functionality + but restricted to be within torchscript. + """ + + @staticmethod + def reduce(x: torch.Tensor, operation: str, reduced_axes: List[int]): + if operation == "min": + return x.amin(dim=reduced_axes) + elif operation == "max": + return x.amax(dim=reduced_axes) + elif operation == "sum": + return x.sum(dim=reduced_axes) + elif operation == "mean": + return x.mean(dim=reduced_axes) + elif operation == "prod": + for i in list(sorted(reduced_axes))[::-1]: + x = x.prod(dim=i) + return x + else: + raise NotImplementedError("Unknown reduction ", operation) + + @staticmethod + def transpose(x, axes: List[int]): + return x.permute(axes) + + @staticmethod + def stack_on_zeroth_dimension(tensors: List[torch.Tensor]): + return torch.stack(tensors) + + @staticmethod + def tile(x, repeats: List[int]): + return x.repeat(repeats) + + @staticmethod + def add_axes(x, n_axes: int, pos2len: Dict[int, int]): + repeats = [-1] * n_axes + for axis_position, axis_length in pos2len.items(): + x = torch.unsqueeze(x, axis_position) + repeats[axis_position] = axis_length + return x.expand(repeats) + + @staticmethod + def is_float_type(x): + return x.dtype in [torch.float16, torch.float32, torch.float64, torch.bfloat16] + + @staticmethod + def shape(x): + return x.shape + + @staticmethod + def reshape(x, shape: List[int]): + return x.reshape(shape) + + +# mirrors einops.einops._apply_recipe +def apply_for_scriptable_torch( + recipe: TransformRecipe, tensor: torch.Tensor, reduction_type: str, axes_dims: List[Tuple[str, int]] +) -> torch.Tensor: + backend = TorchJitBackend + ( + init_shapes, + axes_reordering, + reduced_axes, + added_axes, + final_shapes, + n_axes_w_added, + ) = _reconstruct_from_shape_uncached(recipe, backend.shape(tensor), axes_dims=axes_dims) + if init_shapes is not None: + tensor = backend.reshape(tensor, init_shapes) + if axes_reordering is not None: + tensor = backend.transpose(tensor, axes_reordering) + if len(reduced_axes) > 0: + tensor = backend.reduce(tensor, operation=reduction_type, reduced_axes=reduced_axes) + if len(added_axes) > 0: + tensor = backend.add_axes(tensor, n_axes=n_axes_w_added, pos2len=added_axes) + if final_shapes is not None: + tensor = backend.reshape(tensor, final_shapes) + return tensor + + +def allow_ops_in_compiled_graph(): + if hasattr(torch, "__version__") and torch.__version__[0] < "2": + # torch._dynamo and torch.compile appear in pytorch 2.0 + return + try: + from torch._dynamo import allow_in_graph + except ImportError: + warnings.warn("allow_ops_in_compiled_graph failed to import torch: ensure pytorch >=2.0", ImportWarning) + return + + from .einops import rearrange, reduce, repeat, einsum + from .packing import pack, unpack + + allow_in_graph(rearrange) + allow_in_graph(reduce) + allow_in_graph(repeat) + allow_in_graph(einsum) + allow_in_graph(pack) + allow_in_graph(unpack) + + # CF: https://github.com/pytorch/pytorch/blob/2df939aacac68e9621fbd5d876c78d86e72b41e2/torch/_dynamo/__init__.py#L222 + global _ops_were_registered_in_torchdynamo + _ops_were_registered_in_torchdynamo = True + + +# module import automatically registers ops in torchdynamo +allow_ops_in_compiled_graph() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/einops/array_api.py b/URSA/.venv_ursa/lib/python3.12/site-packages/einops/array_api.py new file mode 100644 index 0000000000000000000000000000000000000000..3a001f61e85fb54a3e5d3209314cf92e8783b3fb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/einops/array_api.py @@ -0,0 +1,124 @@ +from typing import List, Tuple, Sequence +from .einops import Tensor, Reduction, EinopsError, _prepare_transformation_recipe, _apply_recipe_array_api +from .packing import analyze_pattern, prod + + +def reduce(tensor: Tensor, pattern: str, reduction: Reduction, **axes_lengths: int) -> Tensor: + if isinstance(tensor, list): + if len(tensor) == 0: + raise TypeError("Einops can't be applied to an empty list") + xp = tensor[0].__array_namespace__() + tensor = xp.stack(tensor) + else: + xp = tensor.__array_namespace__() + try: + hashable_axes_lengths = tuple(axes_lengths.items()) + recipe = _prepare_transformation_recipe(pattern, reduction, axes_names=tuple(axes_lengths), ndim=tensor.ndim) + return _apply_recipe_array_api( + xp, + recipe=recipe, + tensor=tensor, + reduction_type=reduction, + axes_lengths=hashable_axes_lengths, + ) + except EinopsError as e: + message = ' Error while processing {}-reduction pattern "{}".'.format(reduction, pattern) + if not isinstance(tensor, list): + message += "\n Input tensor shape: {}. ".format(tensor.shape) + else: + message += "\n Input is list. " + message += "Additional info: {}.".format(axes_lengths) + raise EinopsError(message + "\n {}".format(e)) + + +def repeat(tensor: Tensor, pattern: str, **axes_lengths) -> Tensor: + return reduce(tensor, pattern, reduction="repeat", **axes_lengths) + + +def rearrange(tensor: Tensor, pattern: str, **axes_lengths) -> Tensor: + return reduce(tensor, pattern, reduction="rearrange", **axes_lengths) + + +def asnumpy(tensor: Tensor): + import numpy as np + + return np.from_dlpack(tensor) + + +Shape = Tuple + + +def pack(tensors: Sequence[Tensor], pattern: str) -> Tuple[Tensor, List[Shape]]: + n_axes_before, n_axes_after, min_axes = analyze_pattern(pattern, "pack") + xp = tensors[0].__array_namespace__() + + reshaped_tensors: List[Tensor] = [] + packed_shapes: List[Shape] = [] + for i, tensor in enumerate(tensors): + shape = tensor.shape + if len(shape) < min_axes: + raise EinopsError( + f"packed tensor #{i} (enumeration starts with 0) has shape {shape}, " + f"while pattern {pattern} assumes at least {min_axes} axes" + ) + axis_after_packed_axes = len(shape) - n_axes_after + packed_shapes.append(shape[n_axes_before:axis_after_packed_axes]) + reshaped_tensors.append(xp.reshape(tensor, (*shape[:n_axes_before], -1, *shape[axis_after_packed_axes:]))) + + return xp.concat(reshaped_tensors, axis=n_axes_before), packed_shapes + + +def unpack(tensor: Tensor, packed_shapes: List[Shape], pattern: str) -> List[Tensor]: + xp = tensor.__array_namespace__() + n_axes_before, n_axes_after, min_axes = analyze_pattern(pattern, opname="unpack") + + # backend = get_backend(tensor) + input_shape = tensor.shape + if len(input_shape) != n_axes_before + 1 + n_axes_after: + raise EinopsError(f"unpack(..., {pattern}) received input of wrong dim with shape {input_shape}") + + unpacked_axis: int = n_axes_before + + lengths_of_composed_axes: List[int] = [-1 if -1 in p_shape else prod(p_shape) for p_shape in packed_shapes] + + n_unknown_composed_axes = sum(x == -1 for x in lengths_of_composed_axes) + if n_unknown_composed_axes > 1: + raise EinopsError( + f"unpack(..., {pattern}) received more than one -1 in {packed_shapes} and can't infer dimensions" + ) + + # following manipulations allow to skip some shape verifications + # and leave it to backends + + # [[], [2, 3], [4], [-1, 5], [6]] < examples of packed_axis + # split positions when computed should be + # [0, 1, 7, 11, N-6 , N ], where N = length of axis + split_positions = [0] * len(packed_shapes) + [input_shape[unpacked_axis]] + if n_unknown_composed_axes == 0: + for i, x in enumerate(lengths_of_composed_axes[:-1]): + split_positions[i + 1] = split_positions[i] + x + else: + unknown_composed_axis: int = lengths_of_composed_axes.index(-1) + for i in range(unknown_composed_axis): + split_positions[i + 1] = split_positions[i] + lengths_of_composed_axes[i] + for j in range(unknown_composed_axis + 1, len(lengths_of_composed_axes))[::-1]: + split_positions[j] = split_positions[j + 1] - lengths_of_composed_axes[j] + + shape_start = input_shape[:unpacked_axis] + shape_end = input_shape[unpacked_axis + 1 :] + slice_filler = (slice(None, None),) * unpacked_axis + try: + return [ + xp.reshape( + # shortest way slice arbitrary axis + tensor[(*slice_filler, slice(split_positions[i], split_positions[i + 1]), ...)], + (*shape_start, *element_shape, *shape_end), + ) + for i, element_shape in enumerate(packed_shapes) + ] + except Exception: + # this hits if there is an error during reshapes, which means passed shapes were incorrect + raise RuntimeError( + f'Error during unpack(..., "{pattern}"): could not split axis of size {split_positions[-1]}' + f" into requested {packed_shapes}" + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/einops/einops.py b/URSA/.venv_ursa/lib/python3.12/site-packages/einops/einops.py new file mode 100644 index 0000000000000000000000000000000000000000..8f198372e102ed4278004bdf553e26bf3b5f978c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/einops/einops.py @@ -0,0 +1,916 @@ +import functools +import itertools +import string +import typing +from collections import OrderedDict +from typing import Set, Tuple, List, Dict, Union, Callable, Optional, TypeVar, cast, Any + +if typing.TYPE_CHECKING: + # for docstrings in pycharm + import numpy as np # noqa E401 + +from . import EinopsError +from ._backends import get_backend +from .parsing import ParsedExpression, _ellipsis, AnonymousAxis + +Tensor = TypeVar("Tensor") +ReductionCallable = Callable[[Tensor, Tuple[int, ...]], Tensor] +Reduction = Union[str, ReductionCallable] +Size = typing.Any + +_reductions = ("min", "max", "sum", "mean", "prod", "any", "all") + +# magic integers are required to stay within +# traceable subset of language +_unknown_axis_length = -999999 +_expected_axis_length = -99999 + + +def _product(sequence: List[int]) -> int: + """minimalistic product that works both with numbers and symbols. Supports empty lists""" + result = 1 + for element in sequence: + result *= element + return result + + +def _reduce_axes(tensor, reduction_type: Reduction, reduced_axes: List[int], backend): + if callable(reduction_type): + # custom callable + return reduction_type(tensor, tuple(reduced_axes)) + else: + # one of built-in operations + assert reduction_type in _reductions + if reduction_type == "mean": + if not backend.is_float_type(tensor): + raise NotImplementedError("reduce_mean is not available for non-floating tensors") + return backend.reduce(tensor, reduction_type, tuple(reduced_axes)) + + +def _optimize_transformation(init_shapes, reduced_axes, axes_reordering, final_shapes): + # 'collapses' neighboring axes if those participate in the result pattern in the same order + # TODO add support for added_axes + assert len(axes_reordering) + len(reduced_axes) == len(init_shapes) + # joining consecutive axes that will be reduced + # possibly we can skip this if all backends can optimize this (not sure) + reduced_axes = tuple(sorted(reduced_axes)) + for i in range(len(reduced_axes) - 1)[::-1]: + if reduced_axes[i] + 1 == reduced_axes[i + 1]: + removed_axis = reduced_axes[i + 1] + removed_length = init_shapes[removed_axis] + init_shapes = init_shapes[:removed_axis] + init_shapes[removed_axis + 1 :] + init_shapes[removed_axis - 1] *= removed_length + reduced_axes = reduced_axes[: i + 1] + tuple(axis - 1 for axis in reduced_axes[i + 2 :]) + + # removing axes that are moved together during reshape + def build_mapping(): + init_to_final = {} + for axis in range(len(init_shapes)): + if axis in reduced_axes: + init_to_final[axis] = None + else: + after_reduction = sum(x is not None for x in init_to_final.values()) + init_to_final[axis] = list(axes_reordering).index(after_reduction) + return init_to_final + + init_axis_to_final_axis = build_mapping() + + for init_axis in range(len(init_shapes) - 1)[::-1]: + if init_axis_to_final_axis[init_axis] is None: + continue + if init_axis_to_final_axis[init_axis + 1] is None: + continue + if init_axis_to_final_axis[init_axis] + 1 == init_axis_to_final_axis[init_axis + 1]: + removed_axis = init_axis + 1 + removed_length = init_shapes[removed_axis] + removed_axis_after_reduction = sum(x not in reduced_axes for x in range(removed_axis)) + + reduced_axes = tuple(axis if axis < removed_axis else axis - 1 for axis in reduced_axes) + init_shapes = init_shapes[:removed_axis] + init_shapes[removed_axis + 1 :] + init_shapes[removed_axis - 1] *= removed_length + old_reordering = axes_reordering + axes_reordering = [] + for axis in old_reordering: + if axis == removed_axis_after_reduction: + pass + elif axis < removed_axis_after_reduction: + axes_reordering.append(axis) + else: + axes_reordering.append(axis - 1) + init_axis_to_final_axis = build_mapping() + + return init_shapes, reduced_axes, axes_reordering, final_shapes + + +CookedRecipe = Tuple[Optional[List[int]], Optional[List[int]], List[int], Dict[int, int], Optional[List[int]], int] + +# Actual type is tuple[tuple[str, int], ...] +# However torch.jit.script does not "understand" the correct type, +# and torch_specific will use list version. +HashableAxesLengths = Tuple[Tuple[str, int], ...] +FakeHashableAxesLengths = List[Tuple[str, int]] + + +class TransformRecipe: + """ + Recipe describes actual computation pathway. + Recipe can be applied to a tensor or variable. + """ + + # structure is non-mutable. In future, this can be non-mutable dataclass (python 3.7+) + # update: pytorch 2.0 torch.jit.script seems to have problems with dataclasses unless they were explicitly provided + + def __init__( + self, + # list of sizes (or just sizes) for elementary axes as they appear in left expression. + # this is what (after computing unknown parts) will be a shape after first transposition. + # This does not include any ellipsis dimensions. + elementary_axes_lengths: List[int], + # if additional axes are provided, they should be set in prev array + # This shows mapping from name to position + axis_name2elementary_axis: Dict[str, int], + # each dimension in input can help to reconstruct length of one elementary axis + # or verify one of dimensions. Each element points to element of elementary_axes_lengths. + input_composition_known_unknown: List[Tuple[List[int], List[int]]], + # permutation applied to elementary axes, if ellipsis is absent + axes_permutation: List[int], + # permutation puts reduced axes in the end, we only need to know the first position. + first_reduced_axis: int, + # at which positions which of elementary axes should appear. Axis position -> axis index. + added_axes: Dict[int, int], + # ids of axes as they appear in result, again pointers to elementary_axes_lengths, + # only used to infer result dimensions + output_composite_axes: List[List[int]], + ): + self.elementary_axes_lengths: List[int] = elementary_axes_lengths + self.axis_name2elementary_axis: Dict[str, int] = axis_name2elementary_axis + self.input_composition_known_unknown: List[Tuple[List[int], List[int]]] = input_composition_known_unknown + self.axes_permutation: List[int] = axes_permutation + + self.first_reduced_axis: int = first_reduced_axis + self.added_axes: Dict[int, int] = added_axes + self.output_composite_axes: List[List[int]] = output_composite_axes + + +def _reconstruct_from_shape_uncached( + self: TransformRecipe, shape: List[int], axes_dims: FakeHashableAxesLengths +) -> CookedRecipe: + """ + Reconstruct all actual parameters using shape. + Shape is a tuple that may contain integers, shape symbols (tf, theano) and UnknownSize (tf, previously mxnet) + known axes can be integers or symbols, but not Nones. + """ + # magic number + need_init_reshape = False + + # last axis is allocated for collapsed ellipsis + axes_lengths: List[int] = list(self.elementary_axes_lengths) + for axis, dim in axes_dims: + axes_lengths[self.axis_name2elementary_axis[axis]] = dim + + for input_axis, (known_axes, unknown_axes) in enumerate(self.input_composition_known_unknown): + length = shape[input_axis] + if len(known_axes) == 0 and len(unknown_axes) == 1: + # shortcut for the most common case + axes_lengths[unknown_axes[0]] = length + continue + + known_product = 1 + for axis in known_axes: + known_product *= axes_lengths[axis] + + if len(unknown_axes) == 0: + if isinstance(length, int) and isinstance(known_product, int) and length != known_product: + raise EinopsError(f"Shape mismatch, {length} != {known_product}") + else: + # assert len(unknown_axes) == 1, 'this is enforced when recipe is created, so commented out' + if isinstance(length, int) and isinstance(known_product, int) and length % known_product != 0: + raise EinopsError(f"Shape mismatch, can't divide axis of length {length} in chunks of {known_product}") + + unknown_axis = unknown_axes[0] + inferred_length: int = length // known_product + axes_lengths[unknown_axis] = inferred_length + + if len(known_axes) + len(unknown_axes) != 1: + need_init_reshape = True + + # at this point all axes_lengths are computed (either have values or variables, but not Nones) + + # elementary axes are ordered as they appear in input, then all added axes + init_shapes: Optional[List[int]] = axes_lengths[: len(self.axes_permutation)] if need_init_reshape else None + + need_final_reshape = False + final_shapes: List[int] = [] + for grouping in self.output_composite_axes: + lengths = [axes_lengths[elementary_axis] for elementary_axis in grouping] + final_shapes.append(_product(lengths)) + if len(lengths) != 1: + need_final_reshape = True + + added_axes: Dict[int, int] = { + pos: axes_lengths[pos_in_elementary] for pos, pos_in_elementary in self.added_axes.items() + } + + # this list can be empty + reduced_axes = list(range(self.first_reduced_axis, len(self.axes_permutation))) + + n_axes_after_adding_axes = len(added_axes) + len(self.axes_permutation) + + axes_reordering: Optional[List[int]] = self.axes_permutation + if self.axes_permutation == list(range(len(self.axes_permutation))): + axes_reordering = None + + _final_shapes = final_shapes if need_final_reshape else None + return init_shapes, axes_reordering, reduced_axes, added_axes, _final_shapes, n_axes_after_adding_axes + + +_reconstruct_from_shape = functools.lru_cache(1024)(_reconstruct_from_shape_uncached) + + +def _apply_recipe( + backend, recipe: TransformRecipe, tensor: Tensor, reduction_type: Reduction, axes_lengths: HashableAxesLengths +) -> Tensor: + # this method implements actual work for all backends for 3 operations + try: + init_shapes, axes_reordering, reduced_axes, added_axes, final_shapes, n_axes_w_added = _reconstruct_from_shape( + recipe, backend.shape(tensor), axes_lengths + ) + except TypeError: + # shape or one of passed axes lengths is not hashable (i.e. they are symbols) + _result = _reconstruct_from_shape_uncached(recipe, backend.shape(tensor), axes_lengths) + (init_shapes, axes_reordering, reduced_axes, added_axes, final_shapes, n_axes_w_added) = _result + if init_shapes is not None: + tensor = backend.reshape(tensor, init_shapes) + if axes_reordering is not None: + tensor = backend.transpose(tensor, axes_reordering) + if len(reduced_axes) > 0: + tensor = _reduce_axes(tensor, reduction_type=reduction_type, reduced_axes=reduced_axes, backend=backend) + if len(added_axes) > 0: + tensor = backend.add_axes(tensor, n_axes=n_axes_w_added, pos2len=added_axes) + if final_shapes is not None: + tensor = backend.reshape(tensor, final_shapes) + return tensor + + +def _apply_recipe_array_api( + xp, recipe: TransformRecipe, tensor: Tensor, reduction_type: Reduction, axes_lengths: HashableAxesLengths +) -> Tensor: + # completely-inline implementation + init_shapes, axes_reordering, reduced_axes, added_axes, final_shapes, n_axes_w_added = _reconstruct_from_shape( + recipe, tensor.shape, axes_lengths + ) + if init_shapes is not None: + tensor = xp.reshape(tensor, init_shapes) + if axes_reordering is not None: + tensor = xp.permute_dims(tensor, axes_reordering) + if len(reduced_axes) > 0: + if callable(reduction_type): + # custom callable + tensor = reduction_type(tensor, tuple(reduced_axes)) + else: + # one of built-in operations + assert reduction_type in _reductions + tensor = getattr(xp, reduction_type)(tensor, axis=tuple(reduced_axes)) + if len(added_axes) > 0: + # we use broadcasting + for axis_position, axis_length in added_axes.items(): + tensor = xp.expand_dims(tensor, axis=axis_position) + + final_shape = list(tensor.shape) + for axis_position, axis_length in added_axes.items(): + final_shape[axis_position] = axis_length + + tensor = xp.broadcast_to(tensor, final_shape) + if final_shapes is not None: + tensor = xp.reshape(tensor, final_shapes) + return tensor + + +@functools.lru_cache(256) +def _prepare_transformation_recipe( + pattern: str, + operation: Reduction, + axes_names: Tuple[str, ...], + ndim: int, +) -> TransformRecipe: + """Perform initial parsing of pattern and provided supplementary info + axes_lengths is a tuple of tuples (axis_name, axis_length) + """ + left_str, rght_str = pattern.split("->") + left = ParsedExpression(left_str) + rght = ParsedExpression(rght_str) + + # checking that axes are in agreement - new axes appear only in repeat, while disappear only in reduction + if not left.has_ellipsis and rght.has_ellipsis: + raise EinopsError("Ellipsis found in right side, but not left side of a pattern {}".format(pattern)) + if left.has_ellipsis and left.has_ellipsis_parenthesized: + raise EinopsError("Ellipsis inside parenthesis in the left side is not allowed: {}".format(pattern)) + if operation == "rearrange": + if left.has_non_unitary_anonymous_axes or rght.has_non_unitary_anonymous_axes: + raise EinopsError("Non-unitary anonymous axes are not supported in rearrange (exception is length 1)") + difference = set.symmetric_difference(left.identifiers, rght.identifiers) + if len(difference) > 0: + raise EinopsError("Identifiers only on one side of expression (should be on both): {}".format(difference)) + elif operation == "repeat": + difference = set.difference(left.identifiers, rght.identifiers) + if len(difference) > 0: + raise EinopsError("Unexpected identifiers on the left side of repeat: {}".format(difference)) + axes_without_size = set.difference( + {ax for ax in rght.identifiers if not isinstance(ax, AnonymousAxis)}, + {*left.identifiers, *axes_names}, + ) + if len(axes_without_size) > 0: + raise EinopsError("Specify sizes for new axes in repeat: {}".format(axes_without_size)) + elif operation in _reductions or callable(operation): + difference = set.difference(rght.identifiers, left.identifiers) + if len(difference) > 0: + raise EinopsError("Unexpected identifiers on the right side of reduce {}: {}".format(operation, difference)) + else: + raise EinopsError("Unknown reduction {}. Expect one of {}.".format(operation, _reductions)) + + if left.has_ellipsis: + n_other_dims = len(left.composition) - 1 + if ndim < n_other_dims: + raise EinopsError(f"Wrong shape: expected >={n_other_dims} dims. Received {ndim}-dim tensor.") + ellipsis_ndim = ndim - n_other_dims + ell_axes = [_ellipsis + str(i) for i in range(ellipsis_ndim)] + left_composition = [] + for composite_axis in left.composition: + if composite_axis == _ellipsis: + for axis in ell_axes: + left_composition.append([axis]) + else: + left_composition.append(composite_axis) + + rght_composition = [] + for composite_axis in rght.composition: + if composite_axis == _ellipsis: + for axis in ell_axes: + rght_composition.append([axis]) + else: + group = [] + for axis in composite_axis: + if axis == _ellipsis: + group.extend(ell_axes) + else: + group.append(axis) + rght_composition.append(group) + + left.identifiers.update(ell_axes) + left.identifiers.remove(_ellipsis) + if rght.has_ellipsis: + rght.identifiers.update(ell_axes) + rght.identifiers.remove(_ellipsis) + else: + if ndim != len(left.composition): + raise EinopsError(f"Wrong shape: expected {len(left.composition)} dims. Received {ndim}-dim tensor.") + left_composition = left.composition + rght_composition = rght.composition + + # parsing all dimensions to find out lengths + axis_name2known_length: Dict[Union[str, AnonymousAxis], int] = OrderedDict() + for composite_axis in left_composition: + for axis_name in composite_axis: + if isinstance(axis_name, AnonymousAxis): + axis_name2known_length[axis_name] = axis_name.value + else: + axis_name2known_length[axis_name] = _unknown_axis_length + + # axis_ids_after_first_reshape = range(len(axis_name2known_length)) at this point + + repeat_axes_names = [] + for axis_name in rght.identifiers: + if axis_name not in axis_name2known_length: + if isinstance(axis_name, AnonymousAxis): + axis_name2known_length[axis_name] = axis_name.value + else: + axis_name2known_length[axis_name] = _unknown_axis_length + repeat_axes_names.append(axis_name) + + axis_name2position = {name: position for position, name in enumerate(axis_name2known_length)} + + # axes provided as kwargs + for elementary_axis in axes_names: + if not ParsedExpression.check_axis_name(elementary_axis): + raise EinopsError("Invalid name for an axis", elementary_axis) + if elementary_axis not in axis_name2known_length: + raise EinopsError("Axis {} is not used in transform".format(elementary_axis)) + axis_name2known_length[elementary_axis] = _expected_axis_length + + input_axes_known_unknown = [] + # some shapes are inferred later - all information is prepared for faster inference + for i, composite_axis in enumerate(left_composition): + known: Set[str] = {axis for axis in composite_axis if axis_name2known_length[axis] != _unknown_axis_length} + unknown: Set[str] = {axis for axis in composite_axis if axis_name2known_length[axis] == _unknown_axis_length} + if len(unknown) > 1: + raise EinopsError("Could not infer sizes for {}".format(unknown)) + assert len(unknown) + len(known) == len(composite_axis) + input_axes_known_unknown.append( + ([axis_name2position[axis] for axis in known], [axis_name2position[axis] for axis in unknown]) + ) + + axis_position_after_reduction: Dict[str, int] = {} + for axis_name in itertools.chain(*left_composition): + if axis_name in rght.identifiers: + axis_position_after_reduction[axis_name] = len(axis_position_after_reduction) + + result_axes_grouping: List[List[int]] = [ + [axis_name2position[axis] for axis in composite_axis] for i, composite_axis in enumerate(rght_composition) + ] + + ordered_axis_left = list(itertools.chain(*left_composition)) + ordered_axis_rght = list(itertools.chain(*rght_composition)) + reduced_axes = [axis for axis in ordered_axis_left if axis not in rght.identifiers] + order_after_transposition = [axis for axis in ordered_axis_rght if axis in left.identifiers] + reduced_axes + axes_permutation = [ordered_axis_left.index(axis) for axis in order_after_transposition] + added_axes = { + i: axis_name2position[axis_name] + for i, axis_name in enumerate(ordered_axis_rght) + if axis_name not in left.identifiers + } + + first_reduced_axis = len(order_after_transposition) - len(reduced_axes) + + return TransformRecipe( + elementary_axes_lengths=list(axis_name2known_length.values()), + axis_name2elementary_axis={axis: axis_name2position[axis] for axis in axes_names}, + input_composition_known_unknown=input_axes_known_unknown, + axes_permutation=axes_permutation, + first_reduced_axis=first_reduced_axis, + added_axes=added_axes, + output_composite_axes=result_axes_grouping, + ) + + +def _prepare_recipes_for_all_dims( + pattern: str, operation: Reduction, axes_names: Tuple[str, ...] +) -> Dict[int, TransformRecipe]: + """ + Internal function, used in layers. + Layer makes all recipe creation when it is initialized, thus to keep recipes simple we pre-compute for all dims + """ + left_str, rght_str = pattern.split("->") + left = ParsedExpression(left_str) + dims = [len(left.composition)] + if left.has_ellipsis: + dims = [len(left.composition) - 1 + ellipsis_dims for ellipsis_dims in range(8)] + return {ndim: _prepare_transformation_recipe(pattern, operation, axes_names, ndim=ndim) for ndim in dims} + + +def reduce(tensor: Union[Tensor, List[Tensor]], pattern: str, reduction: Reduction, **axes_lengths: Size) -> Tensor: + """ + einops.reduce combines rearrangement and reduction using reader-friendly notation. + + Some examples: + + ```python + >>> x = np.random.randn(100, 32, 64) + + # perform max-reduction on the first axis + # Axis t does not appear on RHS - thus we reduced over t + >>> y = reduce(x, 't b c -> b c', 'max') + + # same as previous, but using verbose names for axes + >>> y = reduce(x, 'time batch channel -> batch channel', 'max') + + # let's pretend now that x is a batch of images + # with 4 dims: batch=10, height=20, width=30, channel=40 + >>> x = np.random.randn(10, 20, 30, 40) + + # 2d max-pooling with kernel size = 2 * 2 for image processing + >>> y1 = reduce(x, 'b c (h1 h2) (w1 w2) -> b c h1 w1', 'max', h2=2, w2=2) + + # same as previous, using anonymous axes, + # note: only reduced axes can be anonymous + >>> y1 = reduce(x, 'b c (h1 2) (w1 2) -> b c h1 w1', 'max') + + # adaptive 2d max-pooling to 3 * 4 grid, + # each element is max of 10x10 tile in the original tensor. + >>> reduce(x, 'b c (h1 h2) (w1 w2) -> b c h1 w1', 'max', h1=3, w1=4).shape + (10, 20, 3, 4) + + # Global average pooling + >>> reduce(x, 'b c h w -> b c', 'mean').shape + (10, 20) + + # subtracting mean over batch for each channel; + # similar to x - np.mean(x, axis=(0, 2, 3), keepdims=True) + >>> y = x - reduce(x, 'b c h w -> 1 c 1 1', 'mean') + + # Subtracting per-image mean for each channel + >>> y = x - reduce(x, 'b c h w -> b c 1 1', 'mean') + + # same as previous, but using empty compositions + >>> y = x - reduce(x, 'b c h w -> b c () ()', 'mean') + + ``` + + Parameters: + tensor: tensor: tensor of any supported library (e.g. numpy.ndarray, tensorflow, pytorch). + list of tensors is also accepted, those should be of the same type and shape + pattern: string, reduction pattern + reduction: one of available reductions ('min', 'max', 'sum', 'mean', 'prod', 'any', 'all'). + Alternatively, a callable f(tensor, reduced_axes) -> tensor can be provided. + This allows using various reductions like: np.max, np.nanmean, tf.reduce_logsumexp, torch.var, etc. + axes_lengths: any additional specifications for dimensions + + Returns: + tensor of the same type as input + """ + try: + if isinstance(tensor, list): + if len(tensor) == 0: + raise TypeError("Rearrange/Reduce/Repeat can't be applied to an empty list") + backend = get_backend(tensor[0]) + tensor = backend.stack_on_zeroth_dimension(tensor) + else: + backend = get_backend(tensor) + + hashable_axes_lengths = tuple(axes_lengths.items()) + shape = backend.shape(tensor) + recipe = _prepare_transformation_recipe(pattern, reduction, axes_names=tuple(axes_lengths), ndim=len(shape)) + return _apply_recipe( + backend, recipe, cast(Tensor, tensor), reduction_type=reduction, axes_lengths=hashable_axes_lengths + ) + except EinopsError as e: + message = ' Error while processing {}-reduction pattern "{}".'.format(reduction, pattern) + if not isinstance(tensor, list): + message += "\n Input tensor shape: {}. ".format(shape) + else: + message += "\n Input is list. " + message += "Additional info: {}.".format(axes_lengths) + raise EinopsError(message + "\n {}".format(e)) + + +def rearrange(tensor: Union[Tensor, List[Tensor]], pattern: str, **axes_lengths: Size) -> Tensor: + """ + einops.rearrange is a reader-friendly smart element reordering for multidimensional tensors. + This operation includes functionality of transpose (axes permutation), reshape (view), squeeze, unsqueeze, + stack, concatenate and other operations. + + Examples: + + ```python + # suppose we have a set of 32 images in "h w c" format (height-width-channel) + >>> images = [np.random.randn(30, 40, 3) for _ in range(32)] + + # stack along first (batch) axis, output is a single array + >>> rearrange(images, 'b h w c -> b h w c').shape + (32, 30, 40, 3) + + # stacked and reordered axes to "b c h w" format + >>> rearrange(images, 'b h w c -> b c h w').shape + (32, 3, 30, 40) + + # concatenate images along height (vertical axis), 960 = 32 * 30 + >>> rearrange(images, 'b h w c -> (b h) w c').shape + (960, 40, 3) + + # concatenated images along horizontal axis, 1280 = 32 * 40 + >>> rearrange(images, 'b h w c -> h (b w) c').shape + (30, 1280, 3) + + # flattened each image into a vector, 3600 = 30 * 40 * 3 + >>> rearrange(images, 'b h w c -> b (c h w)').shape + (32, 3600) + + # split each image into 4 smaller (top-left, top-right, bottom-left, bottom-right), 128 = 32 * 2 * 2 + >>> rearrange(images, 'b (h1 h) (w1 w) c -> (b h1 w1) h w c', h1=2, w1=2).shape + (128, 15, 20, 3) + + # space-to-depth operation + >>> rearrange(images, 'b (h h1) (w w1) c -> b h w (c h1 w1)', h1=2, w1=2).shape + (32, 15, 20, 12) + + ``` + + When composing axes, C-order enumeration used (consecutive elements have different last axis). + Find more examples in einops tutorial. + + Parameters: + tensor: tensor of any supported library (e.g. numpy.ndarray, tensorflow, pytorch). + list of tensors is also accepted, those should be of the same type and shape + pattern: string, rearrangement pattern + axes_lengths: any additional specifications for dimensions + + Returns: + tensor of the same type as input. If possible, a view to the original tensor is returned. + + """ + return reduce(tensor, pattern, reduction="rearrange", **axes_lengths) + + +def repeat(tensor: Union[Tensor, List[Tensor]], pattern: str, **axes_lengths: Size) -> Tensor: + """ + einops.repeat allows reordering elements and repeating them in arbitrary combinations. + This operation includes functionality of repeat, tile, and broadcast functions. + + Examples for repeat operation: + + ```python + # a grayscale image (of shape height x width) + >>> image = np.random.randn(30, 40) + + # change it to RGB format by repeating in each channel + >>> repeat(image, 'h w -> h w c', c=3).shape + (30, 40, 3) + + # repeat image 2 times along height (vertical axis) + >>> repeat(image, 'h w -> (repeat h) w', repeat=2).shape + (60, 40) + + # repeat image 2 time along height and 3 times along width + >>> repeat(image, 'h w -> (h2 h) (w3 w)', h2=2, w3=3).shape + (60, 120) + + # convert each pixel to a small square 2x2. Upsample image by 2x + >>> repeat(image, 'h w -> (h h2) (w w2)', h2=2, w2=2).shape + (60, 80) + + # pixelate image first by downsampling by 2x, then upsampling + >>> downsampled = reduce(image, '(h h2) (w w2) -> h w', 'mean', h2=2, w2=2) + >>> repeat(downsampled, 'h w -> (h h2) (w w2)', h2=2, w2=2).shape + (30, 40) + + ``` + + When composing axes, C-order enumeration used (consecutive elements have different last axis). + Find more examples in einops tutorial. + + Parameters: + tensor: tensor of any supported library (e.g. numpy.ndarray, tensorflow, pytorch). + list of tensors is also accepted, those should be of the same type and shape + pattern: string, rearrangement pattern + axes_lengths: any additional specifications for dimensions + + Returns: + Tensor of the same type as input. If possible, a view to the original tensor is returned. + + """ + return reduce(tensor, pattern, reduction="repeat", **axes_lengths) + + +def parse_shape(x: Tensor, pattern: str) -> dict: + """ + Parse a tensor shape to dictionary mapping axes names to their lengths. + + ```python + # Use underscore to skip the dimension in parsing. + >>> x = np.zeros([2, 3, 5, 7]) + >>> parse_shape(x, 'batch _ h w') + {'batch': 2, 'h': 5, 'w': 7} + + # `parse_shape` output can be used to specify axes_lengths for other operations: + >>> y = np.zeros([700]) + >>> rearrange(y, '(b c h w) -> b c h w', **parse_shape(x, 'b _ h w')).shape + (2, 10, 5, 7) + + ``` + + For symbolic frameworks may return symbols, not integers. + + Parameters: + x: tensor of any supported framework + pattern: str, space separated names for axes, underscore means skip axis + + Returns: + dict, maps axes names to their lengths + """ + exp = ParsedExpression(pattern, allow_underscore=True) + shape = get_backend(x).shape(x) + if exp.has_composed_axes(): + raise RuntimeError(f"Can't parse shape with composite axes: {pattern} {shape}") + if len(shape) != len(exp.composition): + if exp.has_ellipsis: + if len(shape) < len(exp.composition) - 1: + raise RuntimeError(f"Can't parse shape with this number of dimensions: {pattern} {shape}") + else: + raise RuntimeError(f"Can't parse shape with different number of dimensions: {pattern} {shape}") + if exp.has_ellipsis: + ellipsis_idx = exp.composition.index(_ellipsis) + composition = ( + exp.composition[:ellipsis_idx] + + ["_"] * (len(shape) - len(exp.composition) + 1) + + exp.composition[ellipsis_idx + 1 :] + ) + else: + composition = exp.composition + result = {} + for axes, axis_length in zip(composition, shape): # type: ignore + # axes either [], or [AnonymousAxis] or ['axis_name'] + if len(axes) == 0: + if axis_length != 1: + raise RuntimeError(f"Length of axis is not 1: {pattern} {shape}") + else: + [axis] = axes + if isinstance(axis, str): + if axis != "_": + result[axis] = axis_length + else: + if axis.value != axis_length: + raise RuntimeError(f"Length of anonymous axis does not match: {pattern} {shape}") + return result + + +# _enumerate_directions is not exposed in the public API +def _enumerate_directions(x): + """ + For an n-dimensional tensor, returns tensors to enumerate each axis. + ```python + x = np.zeros([2, 3, 4]) # or any other tensor + i, j, k = _enumerate_directions(x) + result = i + 2*j + 3*k + ``` + + `result[i, j, k] = i + 2j + 3k`, and also has the same shape as result + Works very similarly to numpy.ogrid (open indexing grid) + """ + backend = get_backend(x) + shape = backend.shape(x) + result = [] + for axis_id, axis_length in enumerate(shape): + shape = [1] * len(shape) + shape[axis_id] = axis_length + result.append(backend.reshape(backend.arange(0, axis_length), shape)) + return result + + +# to avoid importing numpy +np_ndarray = Any + + +def asnumpy(tensor: Tensor) -> np_ndarray: + """ + Convert a tensor of an imperative framework (i.e. numpy/cupy/torch/jax/etc.) to `numpy.ndarray` + + Parameters: + tensor: tensor of any known imperative framework + + Returns: + `numpy.ndarray`, converted to numpy + """ + return get_backend(tensor).to_numpy(tensor) + + +def _validate_einsum_axis_name(axis_name): + if len(axis_name) == 0: + raise NotImplementedError("Singleton () axes are not yet supported in einsum.") + if len(axis_name) > 1: + raise NotImplementedError("Shape rearrangement is not yet supported in einsum.") + + axis_name = axis_name[0] + + if isinstance(axis_name, AnonymousAxis): + raise NotImplementedError("Anonymous axes are not yet supported in einsum.") + if len(axis_name) == 0: + raise RuntimeError("Encountered empty axis name in einsum.") + if not isinstance(axis_name, str): + raise RuntimeError("Axis name in einsum must be a string.") + + +@functools.lru_cache(256) +def _compactify_pattern_for_einsum(pattern: str) -> str: + if "->" not in pattern: + # numpy allows this, so make sure users + # don't accidentally do something like this. + raise ValueError("Einsum pattern must contain '->'.") + lefts_str, right_str = pattern.split("->") + + lefts = [ParsedExpression(left, allow_underscore=True, allow_duplicates=True) for left in lefts_str.split(",")] + + right = ParsedExpression(right_str, allow_underscore=True) + + # Start from 'a' and go up to 'Z' + output_axis_names = string.ascii_letters + i = 0 + axis_name_mapping = {} + + left_patterns = [] + for left in lefts: + left_pattern = "" + for raw_axis_name in left.composition: + if raw_axis_name == _ellipsis: + left_pattern += "..." + continue + + _validate_einsum_axis_name(raw_axis_name) + axis_name = raw_axis_name[0] + if axis_name not in axis_name_mapping: + if i >= len(output_axis_names): + raise RuntimeError("Too many axes in einsum.") + axis_name_mapping[axis_name] = output_axis_names[i] + i += 1 + + left_pattern += axis_name_mapping[axis_name] + left_patterns.append(left_pattern) + + compact_pattern = ",".join(left_patterns) + "->" + + for raw_axis_name in right.composition: + if raw_axis_name == _ellipsis: + compact_pattern += "..." + continue + + _validate_einsum_axis_name(raw_axis_name) + axis_name = raw_axis_name[0] + + if axis_name not in axis_name_mapping: + raise EinopsError(f"Unknown axis {axis_name} on right side of einsum {pattern}.") + + compact_pattern += axis_name_mapping[axis_name] + + return compact_pattern + + +@typing.overload +def einsum(tensor: Tensor, pattern: str, /) -> Tensor: ... + + +@typing.overload +def einsum(tensor1: Tensor, tensor2: Tensor, pattern: str, /) -> Tensor: ... + + +@typing.overload +def einsum(tensor1: Tensor, tensor2: Tensor, tensor3: Tensor, pattern: str, /) -> Tensor: ... + + +@typing.overload +def einsum(tensor1: Tensor, tensor2: Tensor, tensor3: Tensor, tensor4: Tensor, pattern: str, /) -> Tensor: ... + + +def einsum(*tensors_and_pattern: Union[Tensor, str]) -> Tensor: + r""" + einops.einsum calls einsum operations with einops-style named + axes indexing, computing tensor products with an arbitrary + number of tensors. Unlike typical einsum syntax, here you must + pass tensors first, and then the pattern. + + Also, note that rearrange operations such as `"(batch chan) out"`, + or singleton axes `()`, are not currently supported. + + Examples: + + For a given pattern such as: + ```python + >>> x, y, z = np.random.randn(3, 20, 20, 20) + >>> output = einsum(x, y, z, "a b c, c b d, a g k -> a b k") + + ``` + the following formula is computed: + ```tex + output[a, b, k] = + \sum_{c, d, g} x[a, b, c] * y[c, b, d] * z[a, g, k] + ``` + where the summation over `c`, `d`, and `g` is performed + because those axes names do not appear on the right-hand side. + + Let's see some additional examples: + ```python + # Filter a set of images: + >>> batched_images = np.random.randn(128, 16, 16) + >>> filters = np.random.randn(16, 16, 30) + >>> result = einsum(batched_images, filters, + ... "batch h w, h w channel -> batch channel") + >>> result.shape + (128, 30) + + # Matrix multiplication, with an unknown input shape: + >>> batch_shape = (50, 30) + >>> data = np.random.randn(*batch_shape, 20) + >>> weights = np.random.randn(10, 20) + >>> result = einsum(weights, data, + ... "out_dim in_dim, ... in_dim -> ... out_dim") + >>> result.shape + (50, 30, 10) + + # Matrix trace on a single tensor: + >>> matrix = np.random.randn(10, 10) + >>> result = einsum(matrix, "i i ->") + >>> result.shape + () + + ``` + + Parameters: + tensors_and_pattern: + tensors: tensors of any supported library (numpy, tensorflow, pytorch, jax). + pattern: string, einsum pattern, with commas + separating specifications for each tensor. + pattern should be provided after all tensors. + + Returns: + Tensor of the same type as input, after processing with einsum. + + """ + if len(tensors_and_pattern) <= 1: + raise ValueError( + "`einops.einsum` takes at minimum two arguments: the tensors (at least one), followed by the pattern." + ) + pattern = tensors_and_pattern[-1] + if not isinstance(pattern, str): + raise ValueError( + "The last argument passed to `einops.einsum` must be a string, representing the einsum pattern." + ) + tensors = tensors_and_pattern[:-1] + pattern = _compactify_pattern_for_einsum(pattern) + return get_backend(tensors[0]).einsum(pattern, *tensors) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/einops/packing.py b/URSA/.venv_ursa/lib/python3.12/site-packages/einops/packing.py new file mode 100644 index 0000000000000000000000000000000000000000..cc6803066a6eb9079cdc3909104ddd6ac2719242 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/einops/packing.py @@ -0,0 +1,190 @@ +from functools import lru_cache +from typing import List, Union, TypeVar, Tuple, Sequence + +from einops import EinopsError + +from einops._backends import get_backend +from einops.parsing import ParsedExpression + +Tensor = TypeVar("Tensor") + +Shape = Union[Tuple[int, ...], List[int]] + + +@lru_cache(maxsize=128) +def analyze_pattern(pattern: str, opname: str) -> Tuple[int, int, int]: + # Maybe some validation of identifiers? + axes = pattern.split() + axes_set = set(axes) + if len(axes) != len(axes_set): + raise EinopsError(f'Duplicates in axes names in {opname}(..., "{pattern}")') + if "*" not in axes_set: + raise EinopsError(f'No *-axis in {opname}(..., "{pattern}")') + for axis in axes: + if axis != "*": + is_valid, reason = ParsedExpression.check_axis_name_return_reason(axis) + if not is_valid: + raise EinopsError(f'Invalid axis name {axis} in {opname}(..., "{pattern}")') + n_axes_before = axes.index("*") + n_axes_after = len(axes) - n_axes_before - 1 + min_axes = n_axes_before + n_axes_after + return n_axes_before, n_axes_after, min_axes + + +def pack(tensors: Sequence[Tensor], pattern: str) -> Tuple[Tensor, List[Shape]]: + """ + Packs several tensors into one. + See einops tutorial for introduction into packing (and how it replaces stack and concatenation). + + Parameters: + tensors: tensors to be packed, can be of different dimensionality + pattern: pattern that is shared for all inputs and output, e.g. "i j * k" or "batch seq *" + + Returns: + (packed_tensor, packed_shapes aka PS) + + Example: + ```python + >>> from numpy import zeros as Z + >>> inputs = [Z([2, 3, 5]), Z([2, 3, 7, 5]), Z([2, 3, 7, 9, 5])] + >>> packed, ps = pack(inputs, 'i j * k') + >>> packed.shape, ps + ((2, 3, 71, 5), [(), (7,), (7, 9)]) + ``` + + In this example, axes were matched to: i=2, j=3, k=5 based on order (first, second, and last). + All other axes were 'packed' and concatenated. + PS (packed shapes) contains information about axes that were matched to '*' in every input. + Resulting tensor has as many elements as all inputs in total. + + Packing can be reversed with unpack, which additionally needs PS (packed shapes) to reconstruct order. + + ```python + >>> inputs_unpacked = unpack(packed, ps, 'i j * k') + >>> [x.shape for x in inputs_unpacked] + [(2, 3, 5), (2, 3, 7, 5), (2, 3, 7, 9, 5)] + ``` + + Read the tutorial for introduction and application scenarios. + """ + n_axes_before, n_axes_after, min_axes = analyze_pattern(pattern, "pack") + + # packing zero tensors is illegal + backend = get_backend(tensors[0]) + + reshaped_tensors: List[Tensor] = [] + packed_shapes: List[Shape] = [] + for i, tensor in enumerate(tensors): + shape = backend.shape(tensor) + if len(shape) < min_axes: + raise EinopsError( + f"packed tensor #{i} (enumeration starts with 0) has shape {shape}, " + f"while pattern {pattern} assumes at least {min_axes} axes" + ) + axis_after_packed_axes = len(shape) - n_axes_after + packed_shapes.append(shape[n_axes_before:axis_after_packed_axes]) + reshaped_tensors.append(backend.reshape(tensor, (*shape[:n_axes_before], -1, *shape[axis_after_packed_axes:]))) + + return backend.concat(reshaped_tensors, axis=n_axes_before), packed_shapes + + +def prod(x: Shape) -> int: + result = 1 + for i in x: + result *= i + return result + + +def unpack(tensor: Tensor, packed_shapes: List[Shape], pattern: str) -> List[Tensor]: + """ + Unpacks a single tensor into several by splitting over a selected axes. + See einops tutorial for introduction into packing (and how it replaces stack and concatenation). + + Parameters: + tensor: tensor to be unpacked + packed_shapes: packed_shapes (aka PS) is a list of shapes that take place of '*' in each output. + output will contain a single tensor for every provided shape + pattern: pattern that is shared for input and all outputs, e.g. "i j * k" or "batch seq *", + where * designates an axis to be unpacked + + Returns: + list of tensors + + If framework supports views, results are views to the original tensor. + + Example: + ```python + >>> from numpy import zeros as Z + >>> inputs = [Z([2, 3, 5]), Z([2, 3, 7, 5]), Z([2, 3, 7, 9, 5])] + >>> packed, ps = pack(inputs, 'i j * k') + >>> packed.shape, ps + ((2, 3, 71, 5), [(), (7,), (7, 9)]) + ``` + + In this example, axes were matched to: i=2, j=3, k=5 based on order (first, second, and last). + All other axes were 'packed' and concatenated. + PS (packed shapes) contains information about axes that were matched to '*' in every input. + Resulting tensor has as many elements as all inputs in total. + + Packing can be reversed with unpack, which additionally needs PS (packed shapes) to reconstruct order. + + ```python + >>> inputs_unpacked = unpack(packed, ps, 'i j * k') + >>> [x.shape for x in inputs_unpacked] + [(2, 3, 5), (2, 3, 7, 5), (2, 3, 7, 9, 5)] + ``` + + Read the tutorial for introduction and application scenarios. + """ + n_axes_before, n_axes_after, min_axes = analyze_pattern(pattern, opname="unpack") + + backend = get_backend(tensor) + input_shape = backend.shape(tensor) + if len(input_shape) != n_axes_before + 1 + n_axes_after: + raise EinopsError(f"unpack(..., {pattern}) received input of wrong dim with shape {input_shape}") + + unpacked_axis: int = n_axes_before + + lengths_of_composed_axes: List[int] = [-1 if -1 in p_shape else prod(p_shape) for p_shape in packed_shapes] + + n_unknown_composed_axes = sum(int(x == -1) for x in lengths_of_composed_axes) + if n_unknown_composed_axes > 1: + raise EinopsError( + f"unpack(..., {pattern}) received more than one -1 in {packed_shapes} and can't infer dimensions" + ) + + # following manipulations allow to skip some shape verifications + # and leave it to backends + + # [[], [2, 3], [4], [-1, 5], [6]] < examples of packed_axis + # split positions when computed should be + # [0, 1, 7, 11, N-6 , N ], where N = length of axis + split_positions = [0] * len(packed_shapes) + [input_shape[unpacked_axis]] + if n_unknown_composed_axes == 0: + for i, x in enumerate(lengths_of_composed_axes[:-1]): + split_positions[i + 1] = split_positions[i] + x + else: + unknown_composed_axis: int = lengths_of_composed_axes.index(-1) + for i in range(unknown_composed_axis): + split_positions[i + 1] = split_positions[i] + lengths_of_composed_axes[i] + for j in range(unknown_composed_axis + 1, len(lengths_of_composed_axes))[::-1]: + split_positions[j] = split_positions[j + 1] - lengths_of_composed_axes[j] + + shape_start = input_shape[:unpacked_axis] + shape_end = input_shape[unpacked_axis + 1 :] + slice_filler = (slice(None, None),) * unpacked_axis + try: + return [ + backend.reshape( + # shortest way slice arbitrary axis + tensor[(*slice_filler, slice(split_positions[i], split_positions[i + 1]))], + (*shape_start, *element_shape, *shape_end), + ) + for i, element_shape in enumerate(packed_shapes) + ] + except Exception: + # this hits if there is an error during reshapes, which means passed shapes were incorrect + raise RuntimeError( + f'Error during unpack(..., "{pattern}"): could not split axis of size {split_positions[-1]}' + f" into requested {packed_shapes}" + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/einops/parsing.py b/URSA/.venv_ursa/lib/python3.12/site-packages/einops/parsing.py new file mode 100644 index 0000000000000000000000000000000000000000..a33fe49e765c582389ca87d3fad59e5e21b4e101 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/einops/parsing.py @@ -0,0 +1,152 @@ +from einops import EinopsError +import keyword +import warnings +from typing import List, Optional, Set, Tuple, Union + +_ellipsis: str = "…" # NB, this is a single unicode symbol. String is used as it is not a list, but can be iterated + + +class AnonymousAxis(object): + """Important thing: all instances of this class are not equal to each other""" + + def __init__(self, value: str): + self.value = int(value) + if self.value <= 1: + if self.value == 1: + raise EinopsError("No need to create anonymous axis of length 1. Report this as an issue") + else: + raise EinopsError("Anonymous axis should have positive length, not {}".format(self.value)) + + def __repr__(self): + return "{}-axis".format(str(self.value)) + + +class ParsedExpression: + """ + non-mutable structure that contains information about one side of expression (e.g. 'b c (h w)') + and keeps some information important for downstream + """ + + def __init__(self, expression: str, *, allow_underscore: bool = False, allow_duplicates: bool = False): + self.has_ellipsis: bool = False + self.has_ellipsis_parenthesized: Optional[bool] = None + self.identifiers: Set[str] = set() + # that's axes like 2, 3, 4 or 5. Axes with size 1 are exceptional and replaced with empty composition + self.has_non_unitary_anonymous_axes: bool = False + # composition keeps structure of composite axes, see how different corner cases are handled in tests + self.composition: List[Union[List[str], str]] = [] + if "." in expression: + if "..." not in expression: + raise EinopsError("Expression may contain dots only inside ellipsis (...)") + if str.count(expression, "...") != 1 or str.count(expression, ".") != 3: + raise EinopsError( + "Expression may contain dots only inside ellipsis (...); only one ellipsis for tensor " + ) + expression = expression.replace("...", _ellipsis) + self.has_ellipsis = True + + bracket_group: Optional[List[str]] = None + + def add_axis_name(x): + if x in self.identifiers: + if not (allow_underscore and x == "_") and not allow_duplicates: + raise EinopsError('Indexing expression contains duplicate dimension "{}"'.format(x)) + if x == _ellipsis: + self.identifiers.add(_ellipsis) + if bracket_group is None: + self.composition.append(_ellipsis) + self.has_ellipsis_parenthesized = False + else: + bracket_group.append(_ellipsis) + self.has_ellipsis_parenthesized = True + else: + is_number = str.isdecimal(x) + if is_number and int(x) == 1: + # handling the case of anonymous axis of length 1 + if bracket_group is None: + self.composition.append([]) + else: + pass # no need to think about 1s inside parenthesis + return + is_axis_name, reason = self.check_axis_name_return_reason(x, allow_underscore=allow_underscore) + if not (is_number or is_axis_name): + raise EinopsError("Invalid axis identifier: {}\n{}".format(x, reason)) + if is_number: + x = AnonymousAxis(x) + self.identifiers.add(x) + if is_number: + self.has_non_unitary_anonymous_axes = True + if bracket_group is None: + self.composition.append([x]) + else: + bracket_group.append(x) + + current_identifier = None + for char in expression: + if char in "() ": + if current_identifier is not None: + add_axis_name(current_identifier) + current_identifier = None + if char == "(": + if bracket_group is not None: + raise EinopsError("Axis composition is one-level (brackets inside brackets not allowed)") + bracket_group = [] + elif char == ")": + if bracket_group is None: + raise EinopsError("Brackets are not balanced") + self.composition.append(bracket_group) + bracket_group = None + elif str.isalnum(char) or char in ["_", _ellipsis]: + if current_identifier is None: + current_identifier = char + else: + current_identifier += char + else: + raise EinopsError("Unknown character '{}'".format(char)) + + if bracket_group is not None: + raise EinopsError('Imbalanced parentheses in expression: "{}"'.format(expression)) + if current_identifier is not None: + add_axis_name(current_identifier) + + def flat_axes_order(self) -> List: + result = [] + for composed_axis in self.composition: + assert isinstance(composed_axis, list), "does not work with ellipsis" + for axis in composed_axis: + result.append(axis) + return result + + def has_composed_axes(self) -> bool: + # this will ignore 1 inside brackets + for axes in self.composition: + if isinstance(axes, list) and len(axes) > 1: + return True + return False + + @staticmethod + def check_axis_name_return_reason(name: str, allow_underscore: bool = False) -> Tuple[bool, str]: + if not str.isidentifier(name): + return False, "not a valid python identifier" + elif name[0] == "_" or name[-1] == "_": + if name == "_" and allow_underscore: + return True, "" + return False, "axis name should should not start or end with underscore" + else: + if keyword.iskeyword(name): + warnings.warn("It is discouraged to use axes names that are keywords: {}".format(name), RuntimeWarning) + if name in ["axis"]: + warnings.warn( + "It is discouraged to use 'axis' as an axis name " "and will raise an error in future", + FutureWarning, + ) + return True, "" + + @staticmethod + def check_axis_name(name: str) -> bool: + """ + Valid axes names are python identifiers except keywords, + and additionally should not start or end with underscore + """ + is_valid, _reason = ParsedExpression.check_axis_name_return_reason(name) + return is_valid diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/einops/py.typed b/URSA/.venv_ursa/lib/python3.12/site-packages/einops/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/filelock-3.20.0.dist-info/licenses/LICENSE b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock-3.20.0.dist-info/licenses/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..cf1ab25da0349f84a3fdd40032f0ce99db813b8b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock-3.20.0.dist-info/licenses/LICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c9d8c5b8ebe565a652b3671b3dfa066f7346af45 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/__init__.py @@ -0,0 +1,70 @@ +""" +A platform independent file lock that supports the with-statement. + +.. autodata:: filelock.__version__ + :no-value: + +""" + +from __future__ import annotations + +import sys +import warnings +from typing import TYPE_CHECKING + +from ._api import AcquireReturnProxy, BaseFileLock +from ._error import Timeout +from ._soft import SoftFileLock +from ._unix import UnixFileLock, has_fcntl +from ._windows import WindowsFileLock +from .asyncio import ( + AsyncAcquireReturnProxy, + AsyncSoftFileLock, + AsyncUnixFileLock, + AsyncWindowsFileLock, + BaseAsyncFileLock, +) +from .version import version + +#: version of the project as a string +__version__: str = version + + +if sys.platform == "win32": # pragma: win32 cover + _FileLock: type[BaseFileLock] = WindowsFileLock + _AsyncFileLock: type[BaseAsyncFileLock] = AsyncWindowsFileLock +else: # pragma: win32 no cover # noqa: PLR5501 + if has_fcntl: + _FileLock: type[BaseFileLock] = UnixFileLock + _AsyncFileLock: type[BaseAsyncFileLock] = AsyncUnixFileLock + else: + _FileLock = SoftFileLock + _AsyncFileLock = AsyncSoftFileLock + if warnings is not None: + warnings.warn("only soft file lock is available", stacklevel=2) + +if TYPE_CHECKING: + FileLock = SoftFileLock + AsyncFileLock = AsyncSoftFileLock +else: + #: Alias for the lock, which should be used for the current platform. + FileLock = _FileLock + AsyncFileLock = _AsyncFileLock + + +__all__ = [ + "AcquireReturnProxy", + "AsyncAcquireReturnProxy", + "AsyncFileLock", + "AsyncSoftFileLock", + "AsyncUnixFileLock", + "AsyncWindowsFileLock", + "BaseAsyncFileLock", + "BaseFileLock", + "FileLock", + "SoftFileLock", + "Timeout", + "UnixFileLock", + "WindowsFileLock", + "__version__", +] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/_api.py b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/_api.py new file mode 100644 index 0000000000000000000000000000000000000000..8fde69a0fef7badcc123d17735cd784a99baed52 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/_api.py @@ -0,0 +1,403 @@ +from __future__ import annotations + +import contextlib +import inspect +import logging +import os +import time +import warnings +from abc import ABCMeta, abstractmethod +from dataclasses import dataclass +from threading import local +from typing import TYPE_CHECKING, Any, cast +from weakref import WeakValueDictionary + +from ._error import Timeout + +if TYPE_CHECKING: + import sys + from types import TracebackType + + if sys.version_info >= (3, 11): # pragma: no cover (py311+) + from typing import Self + else: # pragma: no cover ( None: + self.lock = lock + + def __enter__(self) -> BaseFileLock: + return self.lock + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: + self.lock.release() + + +@dataclass +class FileLockContext: + """A dataclass which holds the context for a ``BaseFileLock`` object.""" + + # The context is held in a separate class to allow optional use of thread local storage via the + # ThreadLocalFileContext class. + + #: The path to the lock file. + lock_file: str + + #: The default timeout value. + timeout: float + + #: The mode for the lock files + mode: int + + #: Whether the lock should be blocking or not + blocking: bool + + #: The file descriptor for the *_lock_file* as it is returned by the os.open() function, not None when lock held + lock_file_fd: int | None = None + + #: The lock counter is used for implementing the nested locking mechanism. + lock_counter: int = 0 # When the lock is acquired is increased and the lock is only released, when this value is 0 + + +class ThreadLocalFileContext(FileLockContext, local): + """A thread local version of the ``FileLockContext`` class.""" + + +class FileLockMeta(ABCMeta): + def __call__( # noqa: PLR0913 + cls, + lock_file: str | os.PathLike[str], + timeout: float = -1, + mode: int = 0o644, + thread_local: bool = True, # noqa: FBT001, FBT002 + *, + blocking: bool = True, + is_singleton: bool = False, + **kwargs: Any, # capture remaining kwargs for subclasses # noqa: ANN401 + ) -> BaseFileLock: + if is_singleton: + instance = cls._instances.get(str(lock_file)) # type: ignore[attr-defined] + if instance: + params_to_check = { + "thread_local": (thread_local, instance.is_thread_local()), + "timeout": (timeout, instance.timeout), + "mode": (mode, instance.mode), + "blocking": (blocking, instance.blocking), + } + + non_matching_params = { + name: (passed_param, set_param) + for name, (passed_param, set_param) in params_to_check.items() + if passed_param != set_param + } + if not non_matching_params: + return cast("BaseFileLock", instance) + + # parameters do not match; raise error + msg = "Singleton lock instances cannot be initialized with differing arguments" + msg += "\nNon-matching arguments: " + for param_name, (passed_param, set_param) in non_matching_params.items(): + msg += f"\n\t{param_name} (existing lock has {set_param} but {passed_param} was passed)" + raise ValueError(msg) + + # Workaround to make `__init__`'s params optional in subclasses + # E.g. virtualenv changes the signature of the `__init__` method in the `BaseFileLock` class descendant + # (https://github.com/tox-dev/filelock/pull/340) + + all_params = { + "timeout": timeout, + "mode": mode, + "thread_local": thread_local, + "blocking": blocking, + "is_singleton": is_singleton, + **kwargs, + } + + present_params = inspect.signature(cls.__init__).parameters # type: ignore[misc] + init_params = {key: value for key, value in all_params.items() if key in present_params} + + instance = super().__call__(lock_file, **init_params) + + if is_singleton: + cls._instances[str(lock_file)] = instance # type: ignore[attr-defined] + + return cast("BaseFileLock", instance) + + +class BaseFileLock(contextlib.ContextDecorator, metaclass=FileLockMeta): + """Abstract base class for a file lock object.""" + + _instances: WeakValueDictionary[str, BaseFileLock] + + def __init_subclass__(cls, **kwargs: dict[str, Any]) -> None: + """Setup unique state for lock subclasses.""" + super().__init_subclass__(**kwargs) + cls._instances = WeakValueDictionary() + + def __init__( # noqa: PLR0913 + self, + lock_file: str | os.PathLike[str], + timeout: float = -1, + mode: int = 0o644, + thread_local: bool = True, # noqa: FBT001, FBT002 + *, + blocking: bool = True, + is_singleton: bool = False, + ) -> None: + """ + Create a new lock object. + + :param lock_file: path to the file + :param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in \ + the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it \ + to a negative value. A timeout of 0 means that there is exactly one attempt to acquire the file lock. + :param mode: file permissions for the lockfile + :param thread_local: Whether this object's internal context should be thread local or not. If this is set to \ + ``False`` then the lock will be reentrant across threads. + :param blocking: whether the lock should be blocking or not + :param is_singleton: If this is set to ``True`` then only one instance of this class will be created \ + per lock file. This is useful if you want to use the lock object for reentrant locking without needing \ + to pass the same object around. + + """ + self._is_thread_local = thread_local + self._is_singleton = is_singleton + + # Create the context. Note that external code should not work with the context directly and should instead use + # properties of this class. + kwargs: dict[str, Any] = { + "lock_file": os.fspath(lock_file), + "timeout": timeout, + "mode": mode, + "blocking": blocking, + } + self._context: FileLockContext = (ThreadLocalFileContext if thread_local else FileLockContext)(**kwargs) + + def is_thread_local(self) -> bool: + """:return: a flag indicating if this lock is thread local or not""" + return self._is_thread_local + + @property + def is_singleton(self) -> bool: + """:return: a flag indicating if this lock is singleton or not""" + return self._is_singleton + + @property + def lock_file(self) -> str: + """:return: path to the lock file""" + return self._context.lock_file + + @property + def timeout(self) -> float: + """ + :return: the default timeout value, in seconds + + .. versionadded:: 2.0.0 + """ + return self._context.timeout + + @timeout.setter + def timeout(self, value: float | str) -> None: + """ + Change the default timeout value. + + :param value: the new value, in seconds + + """ + self._context.timeout = float(value) + + @property + def blocking(self) -> bool: + """:return: whether the locking is blocking or not""" + return self._context.blocking + + @blocking.setter + def blocking(self, value: bool) -> None: + """ + Change the default blocking value. + + :param value: the new value as bool + + """ + self._context.blocking = value + + @property + def mode(self) -> int: + """:return: the file permissions for the lockfile""" + return self._context.mode + + @abstractmethod + def _acquire(self) -> None: + """If the file lock could be acquired, self._context.lock_file_fd holds the file descriptor of the lock file.""" + raise NotImplementedError + + @abstractmethod + def _release(self) -> None: + """Releases the lock and sets self._context.lock_file_fd to None.""" + raise NotImplementedError + + @property + def is_locked(self) -> bool: + """ + + :return: A boolean indicating if the lock file is holding the lock currently. + + .. versionchanged:: 2.0.0 + + This was previously a method and is now a property. + """ + return self._context.lock_file_fd is not None + + @property + def lock_counter(self) -> int: + """:return: The number of times this lock has been acquired (but not yet released).""" + return self._context.lock_counter + + def acquire( + self, + timeout: float | None = None, + poll_interval: float = 0.05, + *, + poll_intervall: float | None = None, + blocking: bool | None = None, + ) -> AcquireReturnProxy: + """ + Try to acquire the file lock. + + :param timeout: maximum wait time for acquiring the lock, ``None`` means use the default :attr:`~timeout` is and + if ``timeout < 0``, there is no timeout and this method will block until the lock could be acquired + :param poll_interval: interval of trying to acquire the lock file + :param poll_intervall: deprecated, kept for backwards compatibility, use ``poll_interval`` instead + :param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the + first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired. + :raises Timeout: if fails to acquire lock within the timeout period + :return: a context object that will unlock the file when the context is exited + + .. code-block:: python + + # You can use this method in the context manager (recommended) + with lock.acquire(): + pass + + # Or use an equivalent try-finally construct: + lock.acquire() + try: + pass + finally: + lock.release() + + .. versionchanged:: 2.0.0 + + This method returns now a *proxy* object instead of *self*, + so that it can be used in a with statement without side effects. + + """ + # Use the default timeout, if no timeout is provided. + if timeout is None: + timeout = self._context.timeout + + if blocking is None: + blocking = self._context.blocking + + if poll_intervall is not None: + msg = "use poll_interval instead of poll_intervall" + warnings.warn(msg, DeprecationWarning, stacklevel=2) + poll_interval = poll_intervall + + # Increment the number right at the beginning. We can still undo it, if something fails. + self._context.lock_counter += 1 + + lock_id = id(self) + lock_filename = self.lock_file + start_time = time.perf_counter() + try: + while True: + if not self.is_locked: + _LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename) + self._acquire() + if self.is_locked: + _LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename) + break + if blocking is False: + _LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename) + raise Timeout(lock_filename) # noqa: TRY301 + if 0 <= timeout < time.perf_counter() - start_time: + _LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename) + raise Timeout(lock_filename) # noqa: TRY301 + msg = "Lock %s not acquired on %s, waiting %s seconds ..." + _LOGGER.debug(msg, lock_id, lock_filename, poll_interval) + time.sleep(poll_interval) + except BaseException: # Something did go wrong, so decrement the counter. + self._context.lock_counter = max(0, self._context.lock_counter - 1) + raise + return AcquireReturnProxy(lock=self) + + def release(self, force: bool = False) -> None: # noqa: FBT001, FBT002 + """ + Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0. + Also note, that the lock file itself is not automatically deleted. + + :param force: If true, the lock counter is ignored and the lock is released in every case/ + + """ + if self.is_locked: + self._context.lock_counter -= 1 + + if self._context.lock_counter == 0 or force: + lock_id, lock_filename = id(self), self.lock_file + + _LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename) + self._release() + self._context.lock_counter = 0 + _LOGGER.debug("Lock %s released on %s", lock_id, lock_filename) + + def __enter__(self) -> Self: + """ + Acquire the lock. + + :return: the lock object + + """ + self.acquire() + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: + """ + Release the lock. + + :param exc_type: the exception type if raised + :param exc_value: the exception value if raised + :param traceback: the exception traceback if raised + + """ + self.release() + + def __del__(self) -> None: + """Called when the lock object is deleted.""" + self.release(force=True) + + +__all__ = [ + "AcquireReturnProxy", + "BaseFileLock", +] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/_error.py b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/_error.py new file mode 100644 index 0000000000000000000000000000000000000000..f7ff08c0f508ad7077eb6ed1990898840c952b3a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/_error.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import Any + + +class Timeout(TimeoutError): # noqa: N818 + """Raised when the lock could not be acquired in *timeout* seconds.""" + + def __init__(self, lock_file: str) -> None: + super().__init__() + self._lock_file = lock_file + + def __reduce__(self) -> str | tuple[Any, ...]: + return self.__class__, (self._lock_file,) # Properly pickle the exception + + def __str__(self) -> str: + return f"The file lock '{self._lock_file}' could not be acquired." + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.lock_file!r})" + + @property + def lock_file(self) -> str: + """:return: The path of the file lock.""" + return self._lock_file + + +__all__ = [ + "Timeout", +] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/_soft.py b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/_soft.py new file mode 100644 index 0000000000000000000000000000000000000000..28c67f74cc82b8f55e47afd6a71972cc1fb95eb6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/_soft.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import os +import sys +from contextlib import suppress +from errno import EACCES, EEXIST +from pathlib import Path + +from ._api import BaseFileLock +from ._util import ensure_directory_exists, raise_on_not_writable_file + + +class SoftFileLock(BaseFileLock): + """Simply watches the existence of the lock file.""" + + def _acquire(self) -> None: + raise_on_not_writable_file(self.lock_file) + ensure_directory_exists(self.lock_file) + # first check for exists and read-only mode as the open will mask this case as EEXIST + flags = ( + os.O_WRONLY # open for writing only + | os.O_CREAT + | os.O_EXCL # together with above raise EEXIST if the file specified by filename exists + | os.O_TRUNC # truncate the file to zero byte + ) + try: + file_handler = os.open(self.lock_file, flags, self._context.mode) + except OSError as exception: # re-raise unless expected exception + if not ( + exception.errno == EEXIST # lock already exist + or (exception.errno == EACCES and sys.platform == "win32") # has no access to this lock + ): # pragma: win32 no cover + raise + else: + self._context.lock_file_fd = file_handler + + def _release(self) -> None: + assert self._context.lock_file_fd is not None # noqa: S101 + os.close(self._context.lock_file_fd) # the lock file is definitely not None + self._context.lock_file_fd = None + with suppress(OSError): # the file is already deleted and that's what we want + Path(self.lock_file).unlink() + + +__all__ = [ + "SoftFileLock", +] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/_unix.py b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/_unix.py new file mode 100644 index 0000000000000000000000000000000000000000..b2fd0f33d25d2bdf4a2a883380154771b4a25f9b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/_unix.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +import os +import sys +from contextlib import suppress +from errno import ENOSYS +from pathlib import Path +from typing import cast + +from ._api import BaseFileLock +from ._util import ensure_directory_exists + +#: a flag to indicate if the fcntl API is available +has_fcntl = False +if sys.platform == "win32": # pragma: win32 cover + + class UnixFileLock(BaseFileLock): + """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems.""" + + def _acquire(self) -> None: + raise NotImplementedError + + def _release(self) -> None: + raise NotImplementedError + +else: # pragma: win32 no cover + try: + import fcntl + + _ = (fcntl.flock, fcntl.LOCK_EX, fcntl.LOCK_NB, fcntl.LOCK_UN) + except (ImportError, AttributeError): + pass + else: + has_fcntl = True + + class UnixFileLock(BaseFileLock): + """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems.""" + + def _acquire(self) -> None: + ensure_directory_exists(self.lock_file) + open_flags = os.O_RDWR | os.O_TRUNC + if not Path(self.lock_file).exists(): + open_flags |= os.O_CREAT + fd = os.open(self.lock_file, open_flags, self._context.mode) + with suppress(PermissionError): # This locked is not owned by this UID + os.fchmod(fd, self._context.mode) + try: + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + except OSError as exception: + os.close(fd) + if exception.errno == ENOSYS: # NotImplemented error + msg = "FileSystem does not appear to support flock; use SoftFileLock instead" + raise NotImplementedError(msg) from exception + else: + self._context.lock_file_fd = fd + + def _release(self) -> None: + # Do not remove the lockfile: + # https://github.com/tox-dev/py-filelock/issues/31 + # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition + fd = cast("int", self._context.lock_file_fd) + self._context.lock_file_fd = None + fcntl.flock(fd, fcntl.LOCK_UN) + os.close(fd) + + +__all__ = [ + "UnixFileLock", + "has_fcntl", +] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/_util.py b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/_util.py new file mode 100644 index 0000000000000000000000000000000000000000..c671e8533873948f0e1b5575ff952c722019f067 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/_util.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import os +import stat +import sys +from errno import EACCES, EISDIR +from pathlib import Path + + +def raise_on_not_writable_file(filename: str) -> None: + """ + Raise an exception if attempting to open the file for writing would fail. + + This is done so files that will never be writable can be separated from files that are writable but currently + locked. + + :param filename: file to check + :raises OSError: as if the file was opened for writing. + + """ + try: # use stat to do exists + can write to check without race condition + file_stat = os.stat(filename) # noqa: PTH116 + except OSError: + return # swallow does not exist or other errors + + if file_stat.st_mtime != 0: # if os.stat returns but modification is zero that's an invalid os.stat - ignore it + if not (file_stat.st_mode & stat.S_IWUSR): + raise PermissionError(EACCES, "Permission denied", filename) + + if stat.S_ISDIR(file_stat.st_mode): + if sys.platform == "win32": # pragma: win32 cover + # On Windows, this is PermissionError + raise PermissionError(EACCES, "Permission denied", filename) + else: # pragma: win32 no cover # noqa: RET506 + # On linux / macOS, this is IsADirectoryError + raise IsADirectoryError(EISDIR, "Is a directory", filename) + + +def ensure_directory_exists(filename: Path | str) -> None: + """ + Ensure the directory containing the file exists (create it if necessary). + + :param filename: file. + + """ + Path(filename).parent.mkdir(parents=True, exist_ok=True) + + +__all__ = [ + "ensure_directory_exists", + "raise_on_not_writable_file", +] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/_windows.py b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/_windows.py new file mode 100644 index 0000000000000000000000000000000000000000..348251d1067c28c55a6a267f8d11337abfae837f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/_windows.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import os +import sys +from contextlib import suppress +from errno import EACCES +from pathlib import Path +from typing import cast + +from ._api import BaseFileLock +from ._util import ensure_directory_exists, raise_on_not_writable_file + +if sys.platform == "win32": # pragma: win32 cover + import msvcrt + + class WindowsFileLock(BaseFileLock): + """Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems.""" + + def _acquire(self) -> None: + raise_on_not_writable_file(self.lock_file) + ensure_directory_exists(self.lock_file) + flags = ( + os.O_RDWR # open for read and write + | os.O_CREAT # create file if not exists + | os.O_TRUNC # truncate file if not empty + ) + try: + fd = os.open(self.lock_file, flags, self._context.mode) + except OSError as exception: + if exception.errno != EACCES: # has no access to this lock + raise + else: + try: + msvcrt.locking(fd, msvcrt.LK_NBLCK, 1) + except OSError as exception: + os.close(fd) # close file first + if exception.errno != EACCES: # file is already locked + raise + else: + self._context.lock_file_fd = fd + + def _release(self) -> None: + fd = cast("int", self._context.lock_file_fd) + self._context.lock_file_fd = None + msvcrt.locking(fd, msvcrt.LK_UNLCK, 1) + os.close(fd) + + with suppress(OSError): # Probably another instance of the application hat acquired the file lock. + Path(self.lock_file).unlink() + +else: # pragma: win32 no cover + + class WindowsFileLock(BaseFileLock): + """Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems.""" + + def _acquire(self) -> None: + raise NotImplementedError + + def _release(self) -> None: + raise NotImplementedError + + +__all__ = [ + "WindowsFileLock", +] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/asyncio.py b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/asyncio.py new file mode 100644 index 0000000000000000000000000000000000000000..022d0ef6995dbcf34764366313f4f3f1e9ce8ed6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/asyncio.py @@ -0,0 +1,344 @@ +"""An asyncio-based implementation of the file lock.""" + +from __future__ import annotations + +import asyncio +import contextlib +import logging +import os +import time +from dataclasses import dataclass +from inspect import iscoroutinefunction +from threading import local +from typing import TYPE_CHECKING, Any, NoReturn, cast + +from ._api import BaseFileLock, FileLockContext, FileLockMeta +from ._error import Timeout +from ._soft import SoftFileLock +from ._unix import UnixFileLock +from ._windows import WindowsFileLock + +if TYPE_CHECKING: + import sys + from collections.abc import Callable + from concurrent import futures + from types import TracebackType + + if sys.version_info >= (3, 11): # pragma: no cover (py311+) + from typing import Self + else: # pragma: no cover ( None: # noqa: D107 + self.lock = lock + + async def __aenter__(self) -> BaseAsyncFileLock: # noqa: D105 + return self.lock + + async def __aexit__( # noqa: D105 + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: + await self.lock.release() + + +class AsyncFileLockMeta(FileLockMeta): + def __call__( # type: ignore[override] # noqa: PLR0913 + cls, # noqa: N805 + lock_file: str | os.PathLike[str], + timeout: float = -1, + mode: int = 0o644, + thread_local: bool = False, # noqa: FBT001, FBT002 + *, + blocking: bool = True, + is_singleton: bool = False, + loop: asyncio.AbstractEventLoop | None = None, + run_in_executor: bool = True, + executor: futures.Executor | None = None, + ) -> BaseAsyncFileLock: + if thread_local and run_in_executor: + msg = "run_in_executor is not supported when thread_local is True" + raise ValueError(msg) + instance = super().__call__( + lock_file=lock_file, + timeout=timeout, + mode=mode, + thread_local=thread_local, + blocking=blocking, + is_singleton=is_singleton, + loop=loop, + run_in_executor=run_in_executor, + executor=executor, + ) + return cast("BaseAsyncFileLock", instance) + + +class BaseAsyncFileLock(BaseFileLock, metaclass=AsyncFileLockMeta): + """Base class for asynchronous file locks.""" + + def __init__( # noqa: PLR0913 + self, + lock_file: str | os.PathLike[str], + timeout: float = -1, + mode: int = 0o644, + thread_local: bool = False, # noqa: FBT001, FBT002 + *, + blocking: bool = True, + is_singleton: bool = False, + loop: asyncio.AbstractEventLoop | None = None, + run_in_executor: bool = True, + executor: futures.Executor | None = None, + ) -> None: + """ + Create a new lock object. + + :param lock_file: path to the file + :param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in \ + the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it \ + to a negative value. A timeout of 0 means that there is exactly one attempt to acquire the file lock. + :param mode: file permissions for the lockfile + :param thread_local: Whether this object's internal context should be thread local or not. If this is set to \ + ``False`` then the lock will be reentrant across threads. + :param blocking: whether the lock should be blocking or not + :param is_singleton: If this is set to ``True`` then only one instance of this class will be created \ + per lock file. This is useful if you want to use the lock object for reentrant locking without needing \ + to pass the same object around. + :param loop: The event loop to use. If not specified, the running event loop will be used. + :param run_in_executor: If this is set to ``True`` then the lock will be acquired in an executor. + :param executor: The executor to use. If not specified, the default executor will be used. + + """ + self._is_thread_local = thread_local + self._is_singleton = is_singleton + + # Create the context. Note that external code should not work with the context directly and should instead use + # properties of this class. + kwargs: dict[str, Any] = { + "lock_file": os.fspath(lock_file), + "timeout": timeout, + "mode": mode, + "blocking": blocking, + "loop": loop, + "run_in_executor": run_in_executor, + "executor": executor, + } + self._context: AsyncFileLockContext = (AsyncThreadLocalFileContext if thread_local else AsyncFileLockContext)( + **kwargs + ) + + @property + def run_in_executor(self) -> bool: + """::return: whether run in executor.""" + return self._context.run_in_executor + + @property + def executor(self) -> futures.Executor | None: + """::return: the executor.""" + return self._context.executor + + @executor.setter + def executor(self, value: futures.Executor | None) -> None: # pragma: no cover + """ + Change the executor. + + :param value: the new executor or ``None`` + :type value: futures.Executor | None + + """ + self._context.executor = value + + @property + def loop(self) -> asyncio.AbstractEventLoop | None: + """::return: the event loop.""" + return self._context.loop + + async def acquire( # type: ignore[override] + self, + timeout: float | None = None, + poll_interval: float = 0.05, + *, + blocking: bool | None = None, + ) -> AsyncAcquireReturnProxy: + """ + Try to acquire the file lock. + + :param timeout: maximum wait time for acquiring the lock, ``None`` means use the default + :attr:`~BaseFileLock.timeout` is and if ``timeout < 0``, there is no timeout and + this method will block until the lock could be acquired + :param poll_interval: interval of trying to acquire the lock file + :param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the + first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired. + :raises Timeout: if fails to acquire lock within the timeout period + :return: a context object that will unlock the file when the context is exited + + .. code-block:: python + + # You can use this method in the context manager (recommended) + with lock.acquire(): + pass + + # Or use an equivalent try-finally construct: + lock.acquire() + try: + pass + finally: + lock.release() + + """ + # Use the default timeout, if no timeout is provided. + if timeout is None: + timeout = self._context.timeout + + if blocking is None: + blocking = self._context.blocking + + # Increment the number right at the beginning. We can still undo it, if something fails. + self._context.lock_counter += 1 + + lock_id = id(self) + lock_filename = self.lock_file + start_time = time.perf_counter() + try: + while True: + if not self.is_locked: + _LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename) + await self._run_internal_method(self._acquire) + if self.is_locked: + _LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename) + break + if blocking is False: + _LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename) + raise Timeout(lock_filename) # noqa: TRY301 + if 0 <= timeout < time.perf_counter() - start_time: + _LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename) + raise Timeout(lock_filename) # noqa: TRY301 + msg = "Lock %s not acquired on %s, waiting %s seconds ..." + _LOGGER.debug(msg, lock_id, lock_filename, poll_interval) + await asyncio.sleep(poll_interval) + except BaseException: # Something did go wrong, so decrement the counter. + self._context.lock_counter = max(0, self._context.lock_counter - 1) + raise + return AsyncAcquireReturnProxy(lock=self) + + async def release(self, force: bool = False) -> None: # type: ignore[override] # noqa: FBT001, FBT002 + """ + Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0. + Also note, that the lock file itself is not automatically deleted. + + :param force: If true, the lock counter is ignored and the lock is released in every case/ + + """ + if self.is_locked: + self._context.lock_counter -= 1 + + if self._context.lock_counter == 0 or force: + lock_id, lock_filename = id(self), self.lock_file + + _LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename) + await self._run_internal_method(self._release) + self._context.lock_counter = 0 + _LOGGER.debug("Lock %s released on %s", lock_id, lock_filename) + + async def _run_internal_method(self, method: Callable[[], Any]) -> None: + if iscoroutinefunction(method): + await method() + elif self.run_in_executor: + loop = self.loop or asyncio.get_running_loop() + await loop.run_in_executor(self.executor, method) + else: + method() + + def __enter__(self) -> NoReturn: + """ + Replace old __enter__ method to avoid using it. + + NOTE: DO NOT USE `with` FOR ASYNCIO LOCKS, USE `async with` INSTEAD. + + :return: none + :rtype: NoReturn + """ + msg = "Do not use `with` for asyncio locks, use `async with` instead." + raise NotImplementedError(msg) + + async def __aenter__(self) -> Self: + """ + Acquire the lock. + + :return: the lock object + + """ + await self.acquire() + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: + """ + Release the lock. + + :param exc_type: the exception type if raised + :param exc_value: the exception value if raised + :param traceback: the exception traceback if raised + + """ + await self.release() + + def __del__(self) -> None: + """Called when the lock object is deleted.""" + with contextlib.suppress(RuntimeError): + loop = self.loop or asyncio.get_running_loop() + if not loop.is_running(): # pragma: no cover + loop.run_until_complete(self.release(force=True)) + else: + loop.create_task(self.release(force=True)) + + +class AsyncSoftFileLock(SoftFileLock, BaseAsyncFileLock): + """Simply watches the existence of the lock file.""" + + +class AsyncUnixFileLock(UnixFileLock, BaseAsyncFileLock): + """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems.""" + + +class AsyncWindowsFileLock(WindowsFileLock, BaseAsyncFileLock): + """Uses the :func:`msvcrt.locking` to hard lock the lock file on windows systems.""" + + +__all__ = [ + "AsyncAcquireReturnProxy", + "AsyncSoftFileLock", + "AsyncUnixFileLock", + "AsyncWindowsFileLock", + "BaseAsyncFileLock", +] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/py.typed b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/version.py b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/version.py new file mode 100644 index 0000000000000000000000000000000000000000..093125cd6ae5a94fa087734b389ccbb3082495a1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/filelock/version.py @@ -0,0 +1,34 @@ +# file generated by setuptools-scm +# don't change, don't track in version control + +__all__ = [ + "__version__", + "__version_tuple__", + "version", + "version_tuple", + "__commit_id__", + "commit_id", +] + +TYPE_CHECKING = False +if TYPE_CHECKING: + from typing import Tuple + from typing import Union + + VERSION_TUPLE = Tuple[Union[int, str], ...] + COMMIT_ID = Union[str, None] +else: + VERSION_TUPLE = object + COMMIT_ID = object + +version: str +__version__: str +__version_tuple__: VERSION_TUPLE +version_tuple: VERSION_TUPLE +commit_id: COMMIT_ID +__commit_id__: COMMIT_ID + +__version__ = version = '3.20.0' +__version_tuple__ = version_tuple = (3, 20, 0) + +__commit_id__ = commit_id = None diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/fsspec-2025.12.0.dist-info/licenses/LICENSE b/URSA/.venv_ursa/lib/python3.12/site-packages/fsspec-2025.12.0.dist-info/licenses/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..67590a5e5be5a5a2dde3fe53a7512e404a896c22 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/fsspec-2025.12.0.dist-info/licenses/LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2018, Martin Durant +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/functorch/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/functorch/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0aef38c8a9bb84a9833c4c2c9c34ad528d564b32 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/functorch/__init__.py @@ -0,0 +1,39 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +import torch +from torch._functorch.deprecated import ( + combine_state_for_ensemble, + functionalize, + grad, + grad_and_value, + hessian, + jacfwd, + jacrev, + jvp, + make_functional, + make_functional_with_buffers, + vjp, + vmap, +) + +# utilities. Maybe these should go in their own namespace in the future? +from torch._functorch.make_functional import ( + FunctionalModule, + FunctionalModuleWithBuffers, +) + +# Was never documented +from torch._functorch.python_key import make_fx + + +# Top-level APIs. Please think carefully before adding something to the +# top-level namespace: +# - private helper functions should go into torch._functorch +# - very experimental things should go into functorch.experimental +# - compilation related things should go into functorch.compile + + +__version__ = torch.__version__ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/h11/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..989e92c3458681a6f0be72ae4105ea742750d328 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/__init__.py @@ -0,0 +1,62 @@ +# A highish-level implementation of the HTTP/1.1 wire protocol (RFC 7230), +# containing no networking code at all, loosely modelled on hyper-h2's generic +# implementation of HTTP/2 (and in particular the h2.connection.H2Connection +# class). There's still a bunch of subtle details you need to get right if you +# want to make this actually useful, because it doesn't implement all the +# semantics to check that what you're asking to write to the wire is sensible, +# but at least it gets you out of dealing with the wire itself. + +from h11._connection import Connection, NEED_DATA, PAUSED +from h11._events import ( + ConnectionClosed, + Data, + EndOfMessage, + Event, + InformationalResponse, + Request, + Response, +) +from h11._state import ( + CLIENT, + CLOSED, + DONE, + ERROR, + IDLE, + MIGHT_SWITCH_PROTOCOL, + MUST_CLOSE, + SEND_BODY, + SEND_RESPONSE, + SERVER, + SWITCHED_PROTOCOL, +) +from h11._util import LocalProtocolError, ProtocolError, RemoteProtocolError +from h11._version import __version__ + +PRODUCT_ID = "python-h11/" + __version__ + + +__all__ = ( + "Connection", + "NEED_DATA", + "PAUSED", + "ConnectionClosed", + "Data", + "EndOfMessage", + "Event", + "InformationalResponse", + "Request", + "Response", + "CLIENT", + "CLOSED", + "DONE", + "ERROR", + "IDLE", + "MUST_CLOSE", + "SEND_BODY", + "SEND_RESPONSE", + "SERVER", + "SWITCHED_PROTOCOL", + "ProtocolError", + "LocalProtocolError", + "RemoteProtocolError", +) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_abnf.py b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_abnf.py new file mode 100644 index 0000000000000000000000000000000000000000..933587fba22290d7eb7df4c88e12f1e61702b8ce --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_abnf.py @@ -0,0 +1,132 @@ +# We use native strings for all the re patterns, to take advantage of string +# formatting, and then convert to bytestrings when compiling the final re +# objects. + +# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#whitespace +# OWS = *( SP / HTAB ) +# ; optional whitespace +OWS = r"[ \t]*" + +# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#rule.token.separators +# token = 1*tchar +# +# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" +# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" +# / DIGIT / ALPHA +# ; any VCHAR, except delimiters +token = r"[-!#$%&'*+.^_`|~0-9a-zA-Z]+" + +# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#header.fields +# field-name = token +field_name = token + +# The standard says: +# +# field-value = *( field-content / obs-fold ) +# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] +# field-vchar = VCHAR / obs-text +# obs-fold = CRLF 1*( SP / HTAB ) +# ; obsolete line folding +# ; see Section 3.2.4 +# +# https://tools.ietf.org/html/rfc5234#appendix-B.1 +# +# VCHAR = %x21-7E +# ; visible (printing) characters +# +# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#rule.quoted-string +# obs-text = %x80-FF +# +# However, the standard definition of field-content is WRONG! It disallows +# fields containing a single visible character surrounded by whitespace, +# e.g. "foo a bar". +# +# See: https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189 +# +# So our definition of field_content attempts to fix it up... +# +# Also, we allow lots of control characters, because apparently people assume +# that they're legal in practice (e.g., google analytics makes cookies with +# \x01 in them!): +# https://github.com/python-hyper/h11/issues/57 +# We still don't allow NUL or whitespace, because those are often treated as +# meta-characters and letting them through can lead to nasty issues like SSRF. +vchar = r"[\x21-\x7e]" +vchar_or_obs_text = r"[^\x00\s]" +field_vchar = vchar_or_obs_text +field_content = r"{field_vchar}+(?:[ \t]+{field_vchar}+)*".format(**globals()) + +# We handle obs-fold at a different level, and our fixed-up field_content +# already grows to swallow the whole value, so ? instead of * +field_value = r"({field_content})?".format(**globals()) + +# header-field = field-name ":" OWS field-value OWS +header_field = ( + r"(?P{field_name})" + r":" + r"{OWS}" + r"(?P{field_value})" + r"{OWS}".format(**globals()) +) + +# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#request.line +# +# request-line = method SP request-target SP HTTP-version CRLF +# method = token +# HTTP-version = HTTP-name "/" DIGIT "." DIGIT +# HTTP-name = %x48.54.54.50 ; "HTTP", case-sensitive +# +# request-target is complicated (see RFC 7230 sec 5.3) -- could be path, full +# URL, host+port (for connect), or even "*", but in any case we are guaranteed +# that it contists of the visible printing characters. +method = token +request_target = r"{vchar}+".format(**globals()) +http_version = r"HTTP/(?P[0-9]\.[0-9])" +request_line = ( + r"(?P{method})" + r" " + r"(?P{request_target})" + r" " + r"{http_version}".format(**globals()) +) + +# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#status.line +# +# status-line = HTTP-version SP status-code SP reason-phrase CRLF +# status-code = 3DIGIT +# reason-phrase = *( HTAB / SP / VCHAR / obs-text ) +status_code = r"[0-9]{3}" +reason_phrase = r"([ \t]|{vchar_or_obs_text})*".format(**globals()) +status_line = ( + r"{http_version}" + r" " + r"(?P{status_code})" + # However, there are apparently a few too many servers out there that just + # leave out the reason phrase: + # https://github.com/scrapy/scrapy/issues/345#issuecomment-281756036 + # https://github.com/seanmonstar/httparse/issues/29 + # so make it optional. ?: is a non-capturing group. + r"(?: (?P{reason_phrase}))?".format(**globals()) +) + +HEXDIG = r"[0-9A-Fa-f]" +# Actually +# +# chunk-size = 1*HEXDIG +# +# but we impose an upper-limit to avoid ridiculosity. len(str(2**64)) == 20 +chunk_size = r"({HEXDIG}){{1,20}}".format(**globals()) +# Actually +# +# chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) +# +# but we aren't parsing the things so we don't really care. +chunk_ext = r";.*" +chunk_header = ( + r"(?P{chunk_size})" + r"(?P{chunk_ext})?" + r"{OWS}\r\n".format( + **globals() + ) # Even though the specification does not allow for extra whitespaces, + # we are lenient with trailing whitespaces because some servers on the wild use it. +) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_connection.py b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_connection.py new file mode 100644 index 0000000000000000000000000000000000000000..d1752707598154d190d69b2c26f3098b74656652 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_connection.py @@ -0,0 +1,633 @@ +# This contains the main Connection class. Everything in h11 revolves around +# this. +from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Type, Union + +from ._events import ( + ConnectionClosed, + Data, + EndOfMessage, + Event, + InformationalResponse, + Request, + Response, +) +from ._headers import get_comma_header, has_expect_100_continue, set_comma_header +from ._readers import READERS, ReadersType +from ._receivebuffer import ReceiveBuffer +from ._state import ( + _SWITCH_CONNECT, + _SWITCH_UPGRADE, + CLIENT, + ConnectionState, + DONE, + ERROR, + MIGHT_SWITCH_PROTOCOL, + SEND_BODY, + SERVER, + SWITCHED_PROTOCOL, +) +from ._util import ( # Import the internal things we need + LocalProtocolError, + RemoteProtocolError, + Sentinel, +) +from ._writers import WRITERS, WritersType + +# Everything in __all__ gets re-exported as part of the h11 public API. +__all__ = ["Connection", "NEED_DATA", "PAUSED"] + + +class NEED_DATA(Sentinel, metaclass=Sentinel): + pass + + +class PAUSED(Sentinel, metaclass=Sentinel): + pass + + +# If we ever have this much buffered without it making a complete parseable +# event, we error out. The only time we really buffer is when reading the +# request/response line + headers together, so this is effectively the limit on +# the size of that. +# +# Some precedents for defaults: +# - node.js: 80 * 1024 +# - tomcat: 8 * 1024 +# - IIS: 16 * 1024 +# - Apache: <8 KiB per line> +DEFAULT_MAX_INCOMPLETE_EVENT_SIZE = 16 * 1024 + +# RFC 7230's rules for connection lifecycles: +# - If either side says they want to close the connection, then the connection +# must close. +# - HTTP/1.1 defaults to keep-alive unless someone says Connection: close +# - HTTP/1.0 defaults to close unless both sides say Connection: keep-alive +# (and even this is a mess -- e.g. if you're implementing a proxy then +# sending Connection: keep-alive is forbidden). +# +# We simplify life by simply not supporting keep-alive with HTTP/1.0 peers. So +# our rule is: +# - If someone says Connection: close, we will close +# - If someone uses HTTP/1.0, we will close. +def _keep_alive(event: Union[Request, Response]) -> bool: + connection = get_comma_header(event.headers, b"connection") + if b"close" in connection: + return False + if getattr(event, "http_version", b"1.1") < b"1.1": + return False + return True + + +def _body_framing( + request_method: bytes, event: Union[Request, Response] +) -> Tuple[str, Union[Tuple[()], Tuple[int]]]: + # Called when we enter SEND_BODY to figure out framing information for + # this body. + # + # These are the only two events that can trigger a SEND_BODY state: + assert type(event) in (Request, Response) + # Returns one of: + # + # ("content-length", count) + # ("chunked", ()) + # ("http/1.0", ()) + # + # which are (lookup key, *args) for constructing body reader/writer + # objects. + # + # Reference: https://tools.ietf.org/html/rfc7230#section-3.3.3 + # + # Step 1: some responses always have an empty body, regardless of what the + # headers say. + if type(event) is Response: + if ( + event.status_code in (204, 304) + or request_method == b"HEAD" + or (request_method == b"CONNECT" and 200 <= event.status_code < 300) + ): + return ("content-length", (0,)) + # Section 3.3.3 also lists another case -- responses with status_code + # < 200. For us these are InformationalResponses, not Responses, so + # they can't get into this function in the first place. + assert event.status_code >= 200 + + # Step 2: check for Transfer-Encoding (T-E beats C-L): + transfer_encodings = get_comma_header(event.headers, b"transfer-encoding") + if transfer_encodings: + assert transfer_encodings == [b"chunked"] + return ("chunked", ()) + + # Step 3: check for Content-Length + content_lengths = get_comma_header(event.headers, b"content-length") + if content_lengths: + return ("content-length", (int(content_lengths[0]),)) + + # Step 4: no applicable headers; fallback/default depends on type + if type(event) is Request: + return ("content-length", (0,)) + else: + return ("http/1.0", ()) + + +################################################################ +# +# The main Connection class +# +################################################################ + + +class Connection: + """An object encapsulating the state of an HTTP connection. + + Args: + our_role: If you're implementing a client, pass :data:`h11.CLIENT`. If + you're implementing a server, pass :data:`h11.SERVER`. + + max_incomplete_event_size (int): + The maximum number of bytes we're willing to buffer of an + incomplete event. In practice this mostly sets a limit on the + maximum size of the request/response line + headers. If this is + exceeded, then :meth:`next_event` will raise + :exc:`RemoteProtocolError`. + + """ + + def __init__( + self, + our_role: Type[Sentinel], + max_incomplete_event_size: int = DEFAULT_MAX_INCOMPLETE_EVENT_SIZE, + ) -> None: + self._max_incomplete_event_size = max_incomplete_event_size + # State and role tracking + if our_role not in (CLIENT, SERVER): + raise ValueError("expected CLIENT or SERVER, not {!r}".format(our_role)) + self.our_role = our_role + self.their_role: Type[Sentinel] + if our_role is CLIENT: + self.their_role = SERVER + else: + self.their_role = CLIENT + self._cstate = ConnectionState() + + # Callables for converting data->events or vice-versa given the + # current state + self._writer = self._get_io_object(self.our_role, None, WRITERS) + self._reader = self._get_io_object(self.their_role, None, READERS) + + # Holds any unprocessed received data + self._receive_buffer = ReceiveBuffer() + # If this is true, then it indicates that the incoming connection was + # closed *after* the end of whatever's in self._receive_buffer: + self._receive_buffer_closed = False + + # Extra bits of state that don't fit into the state machine. + # + # These two are only used to interpret framing headers for figuring + # out how to read/write response bodies. their_http_version is also + # made available as a convenient public API. + self.their_http_version: Optional[bytes] = None + self._request_method: Optional[bytes] = None + # This is pure flow-control and doesn't at all affect the set of legal + # transitions, so no need to bother ConnectionState with it: + self.client_is_waiting_for_100_continue = False + + @property + def states(self) -> Dict[Type[Sentinel], Type[Sentinel]]: + """A dictionary like:: + + {CLIENT: , SERVER: } + + See :ref:`state-machine` for details. + + """ + return dict(self._cstate.states) + + @property + def our_state(self) -> Type[Sentinel]: + """The current state of whichever role we are playing. See + :ref:`state-machine` for details. + """ + return self._cstate.states[self.our_role] + + @property + def their_state(self) -> Type[Sentinel]: + """The current state of whichever role we are NOT playing. See + :ref:`state-machine` for details. + """ + return self._cstate.states[self.their_role] + + @property + def they_are_waiting_for_100_continue(self) -> bool: + return self.their_role is CLIENT and self.client_is_waiting_for_100_continue + + def start_next_cycle(self) -> None: + """Attempt to reset our connection state for a new request/response + cycle. + + If both client and server are in :data:`DONE` state, then resets them + both to :data:`IDLE` state in preparation for a new request/response + cycle on this same connection. Otherwise, raises a + :exc:`LocalProtocolError`. + + See :ref:`keepalive-and-pipelining`. + + """ + old_states = dict(self._cstate.states) + self._cstate.start_next_cycle() + self._request_method = None + # self.their_http_version gets left alone, since it presumably lasts + # beyond a single request/response cycle + assert not self.client_is_waiting_for_100_continue + self._respond_to_state_changes(old_states) + + def _process_error(self, role: Type[Sentinel]) -> None: + old_states = dict(self._cstate.states) + self._cstate.process_error(role) + self._respond_to_state_changes(old_states) + + def _server_switch_event(self, event: Event) -> Optional[Type[Sentinel]]: + if type(event) is InformationalResponse and event.status_code == 101: + return _SWITCH_UPGRADE + if type(event) is Response: + if ( + _SWITCH_CONNECT in self._cstate.pending_switch_proposals + and 200 <= event.status_code < 300 + ): + return _SWITCH_CONNECT + return None + + # All events go through here + def _process_event(self, role: Type[Sentinel], event: Event) -> None: + # First, pass the event through the state machine to make sure it + # succeeds. + old_states = dict(self._cstate.states) + if role is CLIENT and type(event) is Request: + if event.method == b"CONNECT": + self._cstate.process_client_switch_proposal(_SWITCH_CONNECT) + if get_comma_header(event.headers, b"upgrade"): + self._cstate.process_client_switch_proposal(_SWITCH_UPGRADE) + server_switch_event = None + if role is SERVER: + server_switch_event = self._server_switch_event(event) + self._cstate.process_event(role, type(event), server_switch_event) + + # Then perform the updates triggered by it. + + if type(event) is Request: + self._request_method = event.method + + if role is self.their_role and type(event) in ( + Request, + Response, + InformationalResponse, + ): + event = cast(Union[Request, Response, InformationalResponse], event) + self.their_http_version = event.http_version + + # Keep alive handling + # + # RFC 7230 doesn't really say what one should do if Connection: close + # shows up on a 1xx InformationalResponse. I think the idea is that + # this is not supposed to happen. In any case, if it does happen, we + # ignore it. + if type(event) in (Request, Response) and not _keep_alive( + cast(Union[Request, Response], event) + ): + self._cstate.process_keep_alive_disabled() + + # 100-continue + if type(event) is Request and has_expect_100_continue(event): + self.client_is_waiting_for_100_continue = True + if type(event) in (InformationalResponse, Response): + self.client_is_waiting_for_100_continue = False + if role is CLIENT and type(event) in (Data, EndOfMessage): + self.client_is_waiting_for_100_continue = False + + self._respond_to_state_changes(old_states, event) + + def _get_io_object( + self, + role: Type[Sentinel], + event: Optional[Event], + io_dict: Union[ReadersType, WritersType], + ) -> Optional[Callable[..., Any]]: + # event may be None; it's only used when entering SEND_BODY + state = self._cstate.states[role] + if state is SEND_BODY: + # Special case: the io_dict has a dict of reader/writer factories + # that depend on the request/response framing. + framing_type, args = _body_framing( + cast(bytes, self._request_method), cast(Union[Request, Response], event) + ) + return io_dict[SEND_BODY][framing_type](*args) # type: ignore[index] + else: + # General case: the io_dict just has the appropriate reader/writer + # for this state + return io_dict.get((role, state)) # type: ignore[return-value] + + # This must be called after any action that might have caused + # self._cstate.states to change. + def _respond_to_state_changes( + self, + old_states: Dict[Type[Sentinel], Type[Sentinel]], + event: Optional[Event] = None, + ) -> None: + # Update reader/writer + if self.our_state != old_states[self.our_role]: + self._writer = self._get_io_object(self.our_role, event, WRITERS) + if self.their_state != old_states[self.their_role]: + self._reader = self._get_io_object(self.their_role, event, READERS) + + @property + def trailing_data(self) -> Tuple[bytes, bool]: + """Data that has been received, but not yet processed, represented as + a tuple with two elements, where the first is a byte-string containing + the unprocessed data itself, and the second is a bool that is True if + the receive connection was closed. + + See :ref:`switching-protocols` for discussion of why you'd want this. + """ + return (bytes(self._receive_buffer), self._receive_buffer_closed) + + def receive_data(self, data: bytes) -> None: + """Add data to our internal receive buffer. + + This does not actually do any processing on the data, just stores + it. To trigger processing, you have to call :meth:`next_event`. + + Args: + data (:term:`bytes-like object`): + The new data that was just received. + + Special case: If *data* is an empty byte-string like ``b""``, + then this indicates that the remote side has closed the + connection (end of file). Normally this is convenient, because + standard Python APIs like :meth:`file.read` or + :meth:`socket.recv` use ``b""`` to indicate end-of-file, while + other failures to read are indicated using other mechanisms + like raising :exc:`TimeoutError`. When using such an API you + can just blindly pass through whatever you get from ``read`` + to :meth:`receive_data`, and everything will work. + + But, if you have an API where reading an empty string is a + valid non-EOF condition, then you need to be aware of this and + make sure to check for such strings and avoid passing them to + :meth:`receive_data`. + + Returns: + Nothing, but after calling this you should call :meth:`next_event` + to parse the newly received data. + + Raises: + RuntimeError: + Raised if you pass an empty *data*, indicating EOF, and then + pass a non-empty *data*, indicating more data that somehow + arrived after the EOF. + + (Calling ``receive_data(b"")`` multiple times is fine, + and equivalent to calling it once.) + + """ + if data: + if self._receive_buffer_closed: + raise RuntimeError("received close, then received more data?") + self._receive_buffer += data + else: + self._receive_buffer_closed = True + + def _extract_next_receive_event( + self, + ) -> Union[Event, Type[NEED_DATA], Type[PAUSED]]: + state = self.their_state + # We don't pause immediately when they enter DONE, because even in + # DONE state we can still process a ConnectionClosed() event. But + # if we have data in our buffer, then we definitely aren't getting + # a ConnectionClosed() immediately and we need to pause. + if state is DONE and self._receive_buffer: + return PAUSED + if state is MIGHT_SWITCH_PROTOCOL or state is SWITCHED_PROTOCOL: + return PAUSED + assert self._reader is not None + event = self._reader(self._receive_buffer) + if event is None: + if not self._receive_buffer and self._receive_buffer_closed: + # In some unusual cases (basically just HTTP/1.0 bodies), EOF + # triggers an actual protocol event; in that case, we want to + # return that event, and then the state will change and we'll + # get called again to generate the actual ConnectionClosed(). + if hasattr(self._reader, "read_eof"): + event = self._reader.read_eof() # type: ignore[attr-defined] + else: + event = ConnectionClosed() + if event is None: + event = NEED_DATA + return event # type: ignore[no-any-return] + + def next_event(self) -> Union[Event, Type[NEED_DATA], Type[PAUSED]]: + """Parse the next event out of our receive buffer, update our internal + state, and return it. + + This is a mutating operation -- think of it like calling :func:`next` + on an iterator. + + Returns: + : One of three things: + + 1) An event object -- see :ref:`events`. + + 2) The special constant :data:`NEED_DATA`, which indicates that + you need to read more data from your socket and pass it to + :meth:`receive_data` before this method will be able to return + any more events. + + 3) The special constant :data:`PAUSED`, which indicates that we + are not in a state where we can process incoming data (usually + because the peer has finished their part of the current + request/response cycle, and you have not yet called + :meth:`start_next_cycle`). See :ref:`flow-control` for details. + + Raises: + RemoteProtocolError: + The peer has misbehaved. You should close the connection + (possibly after sending some kind of 4xx response). + + Once this method returns :class:`ConnectionClosed` once, then all + subsequent calls will also return :class:`ConnectionClosed`. + + If this method raises any exception besides :exc:`RemoteProtocolError` + then that's a bug -- if it happens please file a bug report! + + If this method raises any exception then it also sets + :attr:`Connection.their_state` to :data:`ERROR` -- see + :ref:`error-handling` for discussion. + + """ + + if self.their_state is ERROR: + raise RemoteProtocolError("Can't receive data when peer state is ERROR") + try: + event = self._extract_next_receive_event() + if event not in [NEED_DATA, PAUSED]: + self._process_event(self.their_role, cast(Event, event)) + if event is NEED_DATA: + if len(self._receive_buffer) > self._max_incomplete_event_size: + # 431 is "Request header fields too large" which is pretty + # much the only situation where we can get here + raise RemoteProtocolError( + "Receive buffer too long", error_status_hint=431 + ) + if self._receive_buffer_closed: + # We're still trying to complete some event, but that's + # never going to happen because no more data is coming + raise RemoteProtocolError("peer unexpectedly closed connection") + return event + except BaseException as exc: + self._process_error(self.their_role) + if isinstance(exc, LocalProtocolError): + exc._reraise_as_remote_protocol_error() + else: + raise + + def send(self, event: Event) -> Optional[bytes]: + """Convert a high-level event into bytes that can be sent to the peer, + while updating our internal state machine. + + Args: + event: The :ref:`event ` to send. + + Returns: + If ``type(event) is ConnectionClosed``, then returns + ``None``. Otherwise, returns a :term:`bytes-like object`. + + Raises: + LocalProtocolError: + Sending this event at this time would violate our + understanding of the HTTP/1.1 protocol. + + If this method raises any exception then it also sets + :attr:`Connection.our_state` to :data:`ERROR` -- see + :ref:`error-handling` for discussion. + + """ + data_list = self.send_with_data_passthrough(event) + if data_list is None: + return None + else: + return b"".join(data_list) + + def send_with_data_passthrough(self, event: Event) -> Optional[List[bytes]]: + """Identical to :meth:`send`, except that in situations where + :meth:`send` returns a single :term:`bytes-like object`, this instead + returns a list of them -- and when sending a :class:`Data` event, this + list is guaranteed to contain the exact object you passed in as + :attr:`Data.data`. See :ref:`sendfile` for discussion. + + """ + if self.our_state is ERROR: + raise LocalProtocolError("Can't send data when our state is ERROR") + try: + if type(event) is Response: + event = self._clean_up_response_headers_for_sending(event) + # We want to call _process_event before calling the writer, + # because if someone tries to do something invalid then this will + # give a sensible error message, while our writers all just assume + # they will only receive valid events. But, _process_event might + # change self._writer. So we have to do a little dance: + writer = self._writer + self._process_event(self.our_role, event) + if type(event) is ConnectionClosed: + return None + else: + # In any situation where writer is None, process_event should + # have raised ProtocolError + assert writer is not None + data_list: List[bytes] = [] + writer(event, data_list.append) + return data_list + except: + self._process_error(self.our_role) + raise + + def send_failed(self) -> None: + """Notify the state machine that we failed to send the data it gave + us. + + This causes :attr:`Connection.our_state` to immediately become + :data:`ERROR` -- see :ref:`error-handling` for discussion. + + """ + self._process_error(self.our_role) + + # When sending a Response, we take responsibility for a few things: + # + # - Sometimes you MUST set Connection: close. We take care of those + # times. (You can also set it yourself if you want, and if you do then + # we'll respect that and close the connection at the right time. But you + # don't have to worry about that unless you want to.) + # + # - The user has to set Content-Length if they want it. Otherwise, for + # responses that have bodies (e.g. not HEAD), then we will automatically + # select the right mechanism for streaming a body of unknown length, + # which depends on depending on the peer's HTTP version. + # + # This function's *only* responsibility is making sure headers are set up + # right -- everything downstream just looks at the headers. There are no + # side channels. + def _clean_up_response_headers_for_sending(self, response: Response) -> Response: + assert type(response) is Response + + headers = response.headers + need_close = False + + # HEAD requests need some special handling: they always act like they + # have Content-Length: 0, and that's how _body_framing treats + # them. But their headers are supposed to match what we would send if + # the request was a GET. (Technically there is one deviation allowed: + # we're allowed to leave out the framing headers -- see + # https://tools.ietf.org/html/rfc7231#section-4.3.2 . But it's just as + # easy to get them right.) + method_for_choosing_headers = cast(bytes, self._request_method) + if method_for_choosing_headers == b"HEAD": + method_for_choosing_headers = b"GET" + framing_type, _ = _body_framing(method_for_choosing_headers, response) + if framing_type in ("chunked", "http/1.0"): + # This response has a body of unknown length. + # If our peer is HTTP/1.1, we use Transfer-Encoding: chunked + # If our peer is HTTP/1.0, we use no framing headers, and close the + # connection afterwards. + # + # Make sure to clear Content-Length (in principle user could have + # set both and then we ignored Content-Length b/c + # Transfer-Encoding overwrote it -- this would be naughty of them, + # but the HTTP spec says that if our peer does this then we have + # to fix it instead of erroring out, so we'll accord the user the + # same respect). + headers = set_comma_header(headers, b"content-length", []) + if self.their_http_version is None or self.their_http_version < b"1.1": + # Either we never got a valid request and are sending back an + # error (their_http_version is None), so we assume the worst; + # or else we did get a valid HTTP/1.0 request, so we know that + # they don't understand chunked encoding. + headers = set_comma_header(headers, b"transfer-encoding", []) + # This is actually redundant ATM, since currently we + # unconditionally disable keep-alive when talking to HTTP/1.0 + # peers. But let's be defensive just in case we add + # Connection: keep-alive support later: + if self._request_method != b"HEAD": + need_close = True + else: + headers = set_comma_header(headers, b"transfer-encoding", [b"chunked"]) + + if not self._cstate.keep_alive or need_close: + # Make sure Connection: close is set + connection = set(get_comma_header(headers, b"connection")) + connection.discard(b"keep-alive") + connection.add(b"close") + headers = set_comma_header(headers, b"connection", sorted(connection)) + + return Response( + headers=headers, + status_code=response.status_code, + http_version=response.http_version, + reason=response.reason, + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_events.py b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_events.py new file mode 100644 index 0000000000000000000000000000000000000000..075bf8a469d44d2388b08ec3d009fe55d44cb6eb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_events.py @@ -0,0 +1,369 @@ +# High level events that make up HTTP/1.1 conversations. Loosely inspired by +# the corresponding events in hyper-h2: +# +# http://python-hyper.org/h2/en/stable/api.html#events +# +# Don't subclass these. Stuff will break. + +import re +from abc import ABC +from dataclasses import dataclass, field +from typing import Any, cast, Dict, List, Tuple, Union + +from ._abnf import method, request_target +from ._headers import Headers, normalize_and_validate +from ._util import bytesify, LocalProtocolError, validate + +# Everything in __all__ gets re-exported as part of the h11 public API. +__all__ = [ + "Event", + "Request", + "InformationalResponse", + "Response", + "Data", + "EndOfMessage", + "ConnectionClosed", +] + +method_re = re.compile(method.encode("ascii")) +request_target_re = re.compile(request_target.encode("ascii")) + + +class Event(ABC): + """ + Base class for h11 events. + """ + + __slots__ = () + + +@dataclass(init=False, frozen=True) +class Request(Event): + """The beginning of an HTTP request. + + Fields: + + .. attribute:: method + + An HTTP method, e.g. ``b"GET"`` or ``b"POST"``. Always a byte + string. :term:`Bytes-like objects ` and native + strings containing only ascii characters will be automatically + converted to byte strings. + + .. attribute:: target + + The target of an HTTP request, e.g. ``b"/index.html"``, or one of the + more exotic formats described in `RFC 7320, section 5.3 + `_. Always a byte + string. :term:`Bytes-like objects ` and native + strings containing only ascii characters will be automatically + converted to byte strings. + + .. attribute:: headers + + Request headers, represented as a list of (name, value) pairs. See + :ref:`the header normalization rules ` for details. + + .. attribute:: http_version + + The HTTP protocol version, represented as a byte string like + ``b"1.1"``. See :ref:`the HTTP version normalization rules + ` for details. + + """ + + __slots__ = ("method", "headers", "target", "http_version") + + method: bytes + headers: Headers + target: bytes + http_version: bytes + + def __init__( + self, + *, + method: Union[bytes, str], + headers: Union[Headers, List[Tuple[bytes, bytes]], List[Tuple[str, str]]], + target: Union[bytes, str], + http_version: Union[bytes, str] = b"1.1", + _parsed: bool = False, + ) -> None: + super().__init__() + if isinstance(headers, Headers): + object.__setattr__(self, "headers", headers) + else: + object.__setattr__( + self, "headers", normalize_and_validate(headers, _parsed=_parsed) + ) + if not _parsed: + object.__setattr__(self, "method", bytesify(method)) + object.__setattr__(self, "target", bytesify(target)) + object.__setattr__(self, "http_version", bytesify(http_version)) + else: + object.__setattr__(self, "method", method) + object.__setattr__(self, "target", target) + object.__setattr__(self, "http_version", http_version) + + # "A server MUST respond with a 400 (Bad Request) status code to any + # HTTP/1.1 request message that lacks a Host header field and to any + # request message that contains more than one Host header field or a + # Host header field with an invalid field-value." + # -- https://tools.ietf.org/html/rfc7230#section-5.4 + host_count = 0 + for name, value in self.headers: + if name == b"host": + host_count += 1 + if self.http_version == b"1.1" and host_count == 0: + raise LocalProtocolError("Missing mandatory Host: header") + if host_count > 1: + raise LocalProtocolError("Found multiple Host: headers") + + validate(method_re, self.method, "Illegal method characters") + validate(request_target_re, self.target, "Illegal target characters") + + # This is an unhashable type. + __hash__ = None # type: ignore + + +@dataclass(init=False, frozen=True) +class _ResponseBase(Event): + __slots__ = ("headers", "http_version", "reason", "status_code") + + headers: Headers + http_version: bytes + reason: bytes + status_code: int + + def __init__( + self, + *, + headers: Union[Headers, List[Tuple[bytes, bytes]], List[Tuple[str, str]]], + status_code: int, + http_version: Union[bytes, str] = b"1.1", + reason: Union[bytes, str] = b"", + _parsed: bool = False, + ) -> None: + super().__init__() + if isinstance(headers, Headers): + object.__setattr__(self, "headers", headers) + else: + object.__setattr__( + self, "headers", normalize_and_validate(headers, _parsed=_parsed) + ) + if not _parsed: + object.__setattr__(self, "reason", bytesify(reason)) + object.__setattr__(self, "http_version", bytesify(http_version)) + if not isinstance(status_code, int): + raise LocalProtocolError("status code must be integer") + # Because IntEnum objects are instances of int, but aren't + # duck-compatible (sigh), see gh-72. + object.__setattr__(self, "status_code", int(status_code)) + else: + object.__setattr__(self, "reason", reason) + object.__setattr__(self, "http_version", http_version) + object.__setattr__(self, "status_code", status_code) + + self.__post_init__() + + def __post_init__(self) -> None: + pass + + # This is an unhashable type. + __hash__ = None # type: ignore + + +@dataclass(init=False, frozen=True) +class InformationalResponse(_ResponseBase): + """An HTTP informational response. + + Fields: + + .. attribute:: status_code + + The status code of this response, as an integer. For an + :class:`InformationalResponse`, this is always in the range [100, + 200). + + .. attribute:: headers + + Request headers, represented as a list of (name, value) pairs. See + :ref:`the header normalization rules ` for + details. + + .. attribute:: http_version + + The HTTP protocol version, represented as a byte string like + ``b"1.1"``. See :ref:`the HTTP version normalization rules + ` for details. + + .. attribute:: reason + + The reason phrase of this response, as a byte string. For example: + ``b"OK"``, or ``b"Not Found"``. + + """ + + def __post_init__(self) -> None: + if not (100 <= self.status_code < 200): + raise LocalProtocolError( + "InformationalResponse status_code should be in range " + "[100, 200), not {}".format(self.status_code) + ) + + # This is an unhashable type. + __hash__ = None # type: ignore + + +@dataclass(init=False, frozen=True) +class Response(_ResponseBase): + """The beginning of an HTTP response. + + Fields: + + .. attribute:: status_code + + The status code of this response, as an integer. For an + :class:`Response`, this is always in the range [200, + 1000). + + .. attribute:: headers + + Request headers, represented as a list of (name, value) pairs. See + :ref:`the header normalization rules ` for details. + + .. attribute:: http_version + + The HTTP protocol version, represented as a byte string like + ``b"1.1"``. See :ref:`the HTTP version normalization rules + ` for details. + + .. attribute:: reason + + The reason phrase of this response, as a byte string. For example: + ``b"OK"``, or ``b"Not Found"``. + + """ + + def __post_init__(self) -> None: + if not (200 <= self.status_code < 1000): + raise LocalProtocolError( + "Response status_code should be in range [200, 1000), not {}".format( + self.status_code + ) + ) + + # This is an unhashable type. + __hash__ = None # type: ignore + + +@dataclass(init=False, frozen=True) +class Data(Event): + """Part of an HTTP message body. + + Fields: + + .. attribute:: data + + A :term:`bytes-like object` containing part of a message body. Or, if + using the ``combine=False`` argument to :meth:`Connection.send`, then + any object that your socket writing code knows what to do with, and for + which calling :func:`len` returns the number of bytes that will be + written -- see :ref:`sendfile` for details. + + .. attribute:: chunk_start + + A marker that indicates whether this data object is from the start of a + chunked transfer encoding chunk. This field is ignored when when a Data + event is provided to :meth:`Connection.send`: it is only valid on + events emitted from :meth:`Connection.next_event`. You probably + shouldn't use this attribute at all; see + :ref:`chunk-delimiters-are-bad` for details. + + .. attribute:: chunk_end + + A marker that indicates whether this data object is the last for a + given chunked transfer encoding chunk. This field is ignored when when + a Data event is provided to :meth:`Connection.send`: it is only valid + on events emitted from :meth:`Connection.next_event`. You probably + shouldn't use this attribute at all; see + :ref:`chunk-delimiters-are-bad` for details. + + """ + + __slots__ = ("data", "chunk_start", "chunk_end") + + data: bytes + chunk_start: bool + chunk_end: bool + + def __init__( + self, data: bytes, chunk_start: bool = False, chunk_end: bool = False + ) -> None: + object.__setattr__(self, "data", data) + object.__setattr__(self, "chunk_start", chunk_start) + object.__setattr__(self, "chunk_end", chunk_end) + + # This is an unhashable type. + __hash__ = None # type: ignore + + +# XX FIXME: "A recipient MUST ignore (or consider as an error) any fields that +# are forbidden to be sent in a trailer, since processing them as if they were +# present in the header section might bypass external security filters." +# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#chunked.trailer.part +# Unfortunately, the list of forbidden fields is long and vague :-/ +@dataclass(init=False, frozen=True) +class EndOfMessage(Event): + """The end of an HTTP message. + + Fields: + + .. attribute:: headers + + Default value: ``[]`` + + Any trailing headers attached to this message, represented as a list of + (name, value) pairs. See :ref:`the header normalization rules + ` for details. + + Must be empty unless ``Transfer-Encoding: chunked`` is in use. + + """ + + __slots__ = ("headers",) + + headers: Headers + + def __init__( + self, + *, + headers: Union[ + Headers, List[Tuple[bytes, bytes]], List[Tuple[str, str]], None + ] = None, + _parsed: bool = False, + ) -> None: + super().__init__() + if headers is None: + headers = Headers([]) + elif not isinstance(headers, Headers): + headers = normalize_and_validate(headers, _parsed=_parsed) + + object.__setattr__(self, "headers", headers) + + # This is an unhashable type. + __hash__ = None # type: ignore + + +@dataclass(frozen=True) +class ConnectionClosed(Event): + """This event indicates that the sender has closed their outgoing + connection. + + Note that this does not necessarily mean that they can't *receive* further + data, because TCP connections are composed to two one-way channels which + can be closed independently. See :ref:`closing` for details. + + No fields. + """ + + pass diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_headers.py b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_headers.py new file mode 100644 index 0000000000000000000000000000000000000000..b97d020b634a9f47f5ae6aa3b30e2bd13a6c48c4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_headers.py @@ -0,0 +1,278 @@ +import re +from typing import AnyStr, cast, List, overload, Sequence, Tuple, TYPE_CHECKING, Union + +from ._abnf import field_name, field_value +from ._util import bytesify, LocalProtocolError, validate + +if TYPE_CHECKING: + from ._events import Request + +try: + from typing import Literal +except ImportError: + from typing_extensions import Literal # type: ignore + + +# Facts +# ----- +# +# Headers are: +# keys: case-insensitive ascii +# values: mixture of ascii and raw bytes +# +# "Historically, HTTP has allowed field content with text in the ISO-8859-1 +# charset [ISO-8859-1], supporting other charsets only through use of +# [RFC2047] encoding. In practice, most HTTP header field values use only a +# subset of the US-ASCII charset [USASCII]. Newly defined header fields SHOULD +# limit their field values to US-ASCII octets. A recipient SHOULD treat other +# octets in field content (obs-text) as opaque data." +# And it deprecates all non-ascii values +# +# Leading/trailing whitespace in header names is forbidden +# +# Values get leading/trailing whitespace stripped +# +# Content-Disposition actually needs to contain unicode semantically; to +# accomplish this it has a terrifically weird way of encoding the filename +# itself as ascii (and even this still has lots of cross-browser +# incompatibilities) +# +# Order is important: +# "a proxy MUST NOT change the order of these field values when forwarding a +# message" +# (and there are several headers where the order indicates a preference) +# +# Multiple occurences of the same header: +# "A sender MUST NOT generate multiple header fields with the same field name +# in a message unless either the entire field value for that header field is +# defined as a comma-separated list [or the header is Set-Cookie which gets a +# special exception]" - RFC 7230. (cookies are in RFC 6265) +# +# So every header aside from Set-Cookie can be merged by b", ".join if it +# occurs repeatedly. But, of course, they can't necessarily be split by +# .split(b","), because quoting. +# +# Given all this mess (case insensitive, duplicates allowed, order is +# important, ...), there doesn't appear to be any standard way to handle +# headers in Python -- they're almost like dicts, but... actually just +# aren't. For now we punt and just use a super simple representation: headers +# are a list of pairs +# +# [(name1, value1), (name2, value2), ...] +# +# where all entries are bytestrings, names are lowercase and have no +# leading/trailing whitespace, and values are bytestrings with no +# leading/trailing whitespace. Searching and updating are done via naive O(n) +# methods. +# +# Maybe a dict-of-lists would be better? + +_content_length_re = re.compile(rb"[0-9]+") +_field_name_re = re.compile(field_name.encode("ascii")) +_field_value_re = re.compile(field_value.encode("ascii")) + + +class Headers(Sequence[Tuple[bytes, bytes]]): + """ + A list-like interface that allows iterating over headers as byte-pairs + of (lowercased-name, value). + + Internally we actually store the representation as three-tuples, + including both the raw original casing, in order to preserve casing + over-the-wire, and the lowercased name, for case-insensitive comparisions. + + r = Request( + method="GET", + target="/", + headers=[("Host", "example.org"), ("Connection", "keep-alive")], + http_version="1.1", + ) + assert r.headers == [ + (b"host", b"example.org"), + (b"connection", b"keep-alive") + ] + assert r.headers.raw_items() == [ + (b"Host", b"example.org"), + (b"Connection", b"keep-alive") + ] + """ + + __slots__ = "_full_items" + + def __init__(self, full_items: List[Tuple[bytes, bytes, bytes]]) -> None: + self._full_items = full_items + + def __bool__(self) -> bool: + return bool(self._full_items) + + def __eq__(self, other: object) -> bool: + return list(self) == list(other) # type: ignore + + def __len__(self) -> int: + return len(self._full_items) + + def __repr__(self) -> str: + return "" % repr(list(self)) + + def __getitem__(self, idx: int) -> Tuple[bytes, bytes]: # type: ignore[override] + _, name, value = self._full_items[idx] + return (name, value) + + def raw_items(self) -> List[Tuple[bytes, bytes]]: + return [(raw_name, value) for raw_name, _, value in self._full_items] + + +HeaderTypes = Union[ + List[Tuple[bytes, bytes]], + List[Tuple[bytes, str]], + List[Tuple[str, bytes]], + List[Tuple[str, str]], +] + + +@overload +def normalize_and_validate(headers: Headers, _parsed: Literal[True]) -> Headers: + ... + + +@overload +def normalize_and_validate(headers: HeaderTypes, _parsed: Literal[False]) -> Headers: + ... + + +@overload +def normalize_and_validate( + headers: Union[Headers, HeaderTypes], _parsed: bool = False +) -> Headers: + ... + + +def normalize_and_validate( + headers: Union[Headers, HeaderTypes], _parsed: bool = False +) -> Headers: + new_headers = [] + seen_content_length = None + saw_transfer_encoding = False + for name, value in headers: + # For headers coming out of the parser, we can safely skip some steps, + # because it always returns bytes and has already run these regexes + # over the data: + if not _parsed: + name = bytesify(name) + value = bytesify(value) + validate(_field_name_re, name, "Illegal header name {!r}", name) + validate(_field_value_re, value, "Illegal header value {!r}", value) + assert isinstance(name, bytes) + assert isinstance(value, bytes) + + raw_name = name + name = name.lower() + if name == b"content-length": + lengths = {length.strip() for length in value.split(b",")} + if len(lengths) != 1: + raise LocalProtocolError("conflicting Content-Length headers") + value = lengths.pop() + validate(_content_length_re, value, "bad Content-Length") + if seen_content_length is None: + seen_content_length = value + new_headers.append((raw_name, name, value)) + elif seen_content_length != value: + raise LocalProtocolError("conflicting Content-Length headers") + elif name == b"transfer-encoding": + # "A server that receives a request message with a transfer coding + # it does not understand SHOULD respond with 501 (Not + # Implemented)." + # https://tools.ietf.org/html/rfc7230#section-3.3.1 + if saw_transfer_encoding: + raise LocalProtocolError( + "multiple Transfer-Encoding headers", error_status_hint=501 + ) + # "All transfer-coding names are case-insensitive" + # -- https://tools.ietf.org/html/rfc7230#section-4 + value = value.lower() + if value != b"chunked": + raise LocalProtocolError( + "Only Transfer-Encoding: chunked is supported", + error_status_hint=501, + ) + saw_transfer_encoding = True + new_headers.append((raw_name, name, value)) + else: + new_headers.append((raw_name, name, value)) + return Headers(new_headers) + + +def get_comma_header(headers: Headers, name: bytes) -> List[bytes]: + # Should only be used for headers whose value is a list of + # comma-separated, case-insensitive values. + # + # The header name `name` is expected to be lower-case bytes. + # + # Connection: meets these criteria (including cast insensitivity). + # + # Content-Length: technically is just a single value (1*DIGIT), but the + # standard makes reference to implementations that do multiple values, and + # using this doesn't hurt. Ditto, case insensitivity doesn't things either + # way. + # + # Transfer-Encoding: is more complex (allows for quoted strings), so + # splitting on , is actually wrong. For example, this is legal: + # + # Transfer-Encoding: foo; options="1,2", chunked + # + # and should be parsed as + # + # foo; options="1,2" + # chunked + # + # but this naive function will parse it as + # + # foo; options="1 + # 2" + # chunked + # + # However, this is okay because the only thing we are going to do with + # any Transfer-Encoding is reject ones that aren't just "chunked", so + # both of these will be treated the same anyway. + # + # Expect: the only legal value is the literal string + # "100-continue". Splitting on commas is harmless. Case insensitive. + # + out: List[bytes] = [] + for _, found_name, found_raw_value in headers._full_items: + if found_name == name: + found_raw_value = found_raw_value.lower() + for found_split_value in found_raw_value.split(b","): + found_split_value = found_split_value.strip() + if found_split_value: + out.append(found_split_value) + return out + + +def set_comma_header(headers: Headers, name: bytes, new_values: List[bytes]) -> Headers: + # The header name `name` is expected to be lower-case bytes. + # + # Note that when we store the header we use title casing for the header + # names, in order to match the conventional HTTP header style. + # + # Simply calling `.title()` is a blunt approach, but it's correct + # here given the cases where we're using `set_comma_header`... + # + # Connection, Content-Length, Transfer-Encoding. + new_headers: List[Tuple[bytes, bytes]] = [] + for found_raw_name, found_name, found_raw_value in headers._full_items: + if found_name != name: + new_headers.append((found_raw_name, found_raw_value)) + for new_value in new_values: + new_headers.append((name.title(), new_value)) + return normalize_and_validate(new_headers) + + +def has_expect_100_continue(request: "Request") -> bool: + # https://tools.ietf.org/html/rfc7231#section-5.1.1 + # "A server that receives a 100-continue expectation in an HTTP/1.0 request + # MUST ignore that expectation." + if request.http_version < b"1.1": + return False + expect = get_comma_header(request.headers, b"expect") + return b"100-continue" in expect diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_readers.py b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_readers.py new file mode 100644 index 0000000000000000000000000000000000000000..08a9574da4a89d82dfb71b3087b14c8644102dd6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_readers.py @@ -0,0 +1,247 @@ +# Code to read HTTP data +# +# Strategy: each reader is a callable which takes a ReceiveBuffer object, and +# either: +# 1) consumes some of it and returns an Event +# 2) raises a LocalProtocolError (for consistency -- e.g. we call validate() +# and it might raise a LocalProtocolError, so simpler just to always use +# this) +# 3) returns None, meaning "I need more data" +# +# If they have a .read_eof attribute, then this will be called if an EOF is +# received -- but this is optional. Either way, the actual ConnectionClosed +# event will be generated afterwards. +# +# READERS is a dict describing how to pick a reader. It maps states to either: +# - a reader +# - or, for body readers, a dict of per-framing reader factories + +import re +from typing import Any, Callable, Dict, Iterable, NoReturn, Optional, Tuple, Type, Union + +from ._abnf import chunk_header, header_field, request_line, status_line +from ._events import Data, EndOfMessage, InformationalResponse, Request, Response +from ._receivebuffer import ReceiveBuffer +from ._state import ( + CLIENT, + CLOSED, + DONE, + IDLE, + MUST_CLOSE, + SEND_BODY, + SEND_RESPONSE, + SERVER, +) +from ._util import LocalProtocolError, RemoteProtocolError, Sentinel, validate + +__all__ = ["READERS"] + +header_field_re = re.compile(header_field.encode("ascii")) +obs_fold_re = re.compile(rb"[ \t]+") + + +def _obsolete_line_fold(lines: Iterable[bytes]) -> Iterable[bytes]: + it = iter(lines) + last: Optional[bytes] = None + for line in it: + match = obs_fold_re.match(line) + if match: + if last is None: + raise LocalProtocolError("continuation line at start of headers") + if not isinstance(last, bytearray): + # Cast to a mutable type, avoiding copy on append to ensure O(n) time + last = bytearray(last) + last += b" " + last += line[match.end() :] + else: + if last is not None: + yield last + last = line + if last is not None: + yield last + + +def _decode_header_lines( + lines: Iterable[bytes], +) -> Iterable[Tuple[bytes, bytes]]: + for line in _obsolete_line_fold(lines): + matches = validate(header_field_re, line, "illegal header line: {!r}", line) + yield (matches["field_name"], matches["field_value"]) + + +request_line_re = re.compile(request_line.encode("ascii")) + + +def maybe_read_from_IDLE_client(buf: ReceiveBuffer) -> Optional[Request]: + lines = buf.maybe_extract_lines() + if lines is None: + if buf.is_next_line_obviously_invalid_request_line(): + raise LocalProtocolError("illegal request line") + return None + if not lines: + raise LocalProtocolError("no request line received") + matches = validate( + request_line_re, lines[0], "illegal request line: {!r}", lines[0] + ) + return Request( + headers=list(_decode_header_lines(lines[1:])), _parsed=True, **matches + ) + + +status_line_re = re.compile(status_line.encode("ascii")) + + +def maybe_read_from_SEND_RESPONSE_server( + buf: ReceiveBuffer, +) -> Union[InformationalResponse, Response, None]: + lines = buf.maybe_extract_lines() + if lines is None: + if buf.is_next_line_obviously_invalid_request_line(): + raise LocalProtocolError("illegal request line") + return None + if not lines: + raise LocalProtocolError("no response line received") + matches = validate(status_line_re, lines[0], "illegal status line: {!r}", lines[0]) + http_version = ( + b"1.1" if matches["http_version"] is None else matches["http_version"] + ) + reason = b"" if matches["reason"] is None else matches["reason"] + status_code = int(matches["status_code"]) + class_: Union[Type[InformationalResponse], Type[Response]] = ( + InformationalResponse if status_code < 200 else Response + ) + return class_( + headers=list(_decode_header_lines(lines[1:])), + _parsed=True, + status_code=status_code, + reason=reason, + http_version=http_version, + ) + + +class ContentLengthReader: + def __init__(self, length: int) -> None: + self._length = length + self._remaining = length + + def __call__(self, buf: ReceiveBuffer) -> Union[Data, EndOfMessage, None]: + if self._remaining == 0: + return EndOfMessage() + data = buf.maybe_extract_at_most(self._remaining) + if data is None: + return None + self._remaining -= len(data) + return Data(data=data) + + def read_eof(self) -> NoReturn: + raise RemoteProtocolError( + "peer closed connection without sending complete message body " + "(received {} bytes, expected {})".format( + self._length - self._remaining, self._length + ) + ) + + +chunk_header_re = re.compile(chunk_header.encode("ascii")) + + +class ChunkedReader: + def __init__(self) -> None: + self._bytes_in_chunk = 0 + # After reading a chunk, we have to throw away the trailing \r\n; if + # this is >0 then we discard that many bytes before resuming regular + # de-chunkification. + self._bytes_to_discard = 0 + self._reading_trailer = False + + def __call__(self, buf: ReceiveBuffer) -> Union[Data, EndOfMessage, None]: + if self._reading_trailer: + lines = buf.maybe_extract_lines() + if lines is None: + return None + return EndOfMessage(headers=list(_decode_header_lines(lines))) + if self._bytes_to_discard > 0: + data = buf.maybe_extract_at_most(self._bytes_to_discard) + if data is None: + return None + self._bytes_to_discard -= len(data) + if self._bytes_to_discard > 0: + return None + # else, fall through and read some more + assert self._bytes_to_discard == 0 + if self._bytes_in_chunk == 0: + # We need to refill our chunk count + chunk_header = buf.maybe_extract_next_line() + if chunk_header is None: + return None + matches = validate( + chunk_header_re, + chunk_header, + "illegal chunk header: {!r}", + chunk_header, + ) + # XX FIXME: we discard chunk extensions. Does anyone care? + self._bytes_in_chunk = int(matches["chunk_size"], base=16) + if self._bytes_in_chunk == 0: + self._reading_trailer = True + return self(buf) + chunk_start = True + else: + chunk_start = False + assert self._bytes_in_chunk > 0 + data = buf.maybe_extract_at_most(self._bytes_in_chunk) + if data is None: + return None + self._bytes_in_chunk -= len(data) + if self._bytes_in_chunk == 0: + self._bytes_to_discard = 2 + chunk_end = True + else: + chunk_end = False + return Data(data=data, chunk_start=chunk_start, chunk_end=chunk_end) + + def read_eof(self) -> NoReturn: + raise RemoteProtocolError( + "peer closed connection without sending complete message body " + "(incomplete chunked read)" + ) + + +class Http10Reader: + def __call__(self, buf: ReceiveBuffer) -> Optional[Data]: + data = buf.maybe_extract_at_most(999999999) + if data is None: + return None + return Data(data=data) + + def read_eof(self) -> EndOfMessage: + return EndOfMessage() + + +def expect_nothing(buf: ReceiveBuffer) -> None: + if buf: + raise LocalProtocolError("Got data when expecting EOF") + return None + + +ReadersType = Dict[ + Union[Type[Sentinel], Tuple[Type[Sentinel], Type[Sentinel]]], + Union[Callable[..., Any], Dict[str, Callable[..., Any]]], +] + +READERS: ReadersType = { + (CLIENT, IDLE): maybe_read_from_IDLE_client, + (SERVER, IDLE): maybe_read_from_SEND_RESPONSE_server, + (SERVER, SEND_RESPONSE): maybe_read_from_SEND_RESPONSE_server, + (CLIENT, DONE): expect_nothing, + (CLIENT, MUST_CLOSE): expect_nothing, + (CLIENT, CLOSED): expect_nothing, + (SERVER, DONE): expect_nothing, + (SERVER, MUST_CLOSE): expect_nothing, + (SERVER, CLOSED): expect_nothing, + SEND_BODY: { + "chunked": ChunkedReader, + "content-length": ContentLengthReader, + "http/1.0": Http10Reader, + }, +} diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_receivebuffer.py b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_receivebuffer.py new file mode 100644 index 0000000000000000000000000000000000000000..e5c4e08a56f5081e87103f38b4add6ce1b730204 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_receivebuffer.py @@ -0,0 +1,153 @@ +import re +import sys +from typing import List, Optional, Union + +__all__ = ["ReceiveBuffer"] + + +# Operations we want to support: +# - find next \r\n or \r\n\r\n (\n or \n\n are also acceptable), +# or wait until there is one +# - read at-most-N bytes +# Goals: +# - on average, do this fast +# - worst case, do this in O(n) where n is the number of bytes processed +# Plan: +# - store bytearray, offset, how far we've searched for a separator token +# - use the how-far-we've-searched data to avoid rescanning +# - while doing a stream of uninterrupted processing, advance offset instead +# of constantly copying +# WARNING: +# - I haven't benchmarked or profiled any of this yet. +# +# Note that starting in Python 3.4, deleting the initial n bytes from a +# bytearray is amortized O(n), thanks to some excellent work by Antoine +# Martin: +# +# https://bugs.python.org/issue19087 +# +# This means that if we only supported 3.4+, we could get rid of the code here +# involving self._start and self.compress, because it's doing exactly the same +# thing that bytearray now does internally. +# +# BUT unfortunately, we still support 2.7, and reading short segments out of a +# long buffer MUST be O(bytes read) to avoid DoS issues, so we can't actually +# delete this code. Yet: +# +# https://pythonclock.org/ +# +# (Two things to double-check first though: make sure PyPy also has the +# optimization, and benchmark to make sure it's a win, since we do have a +# slightly clever thing where we delay calling compress() until we've +# processed a whole event, which could in theory be slightly more efficient +# than the internal bytearray support.) +blank_line_regex = re.compile(b"\n\r?\n", re.MULTILINE) + + +class ReceiveBuffer: + def __init__(self) -> None: + self._data = bytearray() + self._next_line_search = 0 + self._multiple_lines_search = 0 + + def __iadd__(self, byteslike: Union[bytes, bytearray]) -> "ReceiveBuffer": + self._data += byteslike + return self + + def __bool__(self) -> bool: + return bool(len(self)) + + def __len__(self) -> int: + return len(self._data) + + # for @property unprocessed_data + def __bytes__(self) -> bytes: + return bytes(self._data) + + def _extract(self, count: int) -> bytearray: + # extracting an initial slice of the data buffer and return it + out = self._data[:count] + del self._data[:count] + + self._next_line_search = 0 + self._multiple_lines_search = 0 + + return out + + def maybe_extract_at_most(self, count: int) -> Optional[bytearray]: + """ + Extract a fixed number of bytes from the buffer. + """ + out = self._data[:count] + if not out: + return None + + return self._extract(count) + + def maybe_extract_next_line(self) -> Optional[bytearray]: + """ + Extract the first line, if it is completed in the buffer. + """ + # Only search in buffer space that we've not already looked at. + search_start_index = max(0, self._next_line_search - 1) + partial_idx = self._data.find(b"\r\n", search_start_index) + + if partial_idx == -1: + self._next_line_search = len(self._data) + return None + + # + 2 is to compensate len(b"\r\n") + idx = partial_idx + 2 + + return self._extract(idx) + + def maybe_extract_lines(self) -> Optional[List[bytearray]]: + """ + Extract everything up to the first blank line, and return a list of lines. + """ + # Handle the case where we have an immediate empty line. + if self._data[:1] == b"\n": + self._extract(1) + return [] + + if self._data[:2] == b"\r\n": + self._extract(2) + return [] + + # Only search in buffer space that we've not already looked at. + match = blank_line_regex.search(self._data, self._multiple_lines_search) + if match is None: + self._multiple_lines_search = max(0, len(self._data) - 2) + return None + + # Truncate the buffer and return it. + idx = match.span(0)[-1] + out = self._extract(idx) + lines = out.split(b"\n") + + for line in lines: + if line.endswith(b"\r"): + del line[-1] + + assert lines[-2] == lines[-1] == b"" + + del lines[-2:] + + return lines + + # In theory we should wait until `\r\n` before starting to validate + # incoming data. However it's interesting to detect (very) invalid data + # early given they might not even contain `\r\n` at all (hence only + # timeout will get rid of them). + # This is not a 100% effective detection but more of a cheap sanity check + # allowing for early abort in some useful cases. + # This is especially interesting when peer is messing up with HTTPS and + # sent us a TLS stream where we were expecting plain HTTP given all + # versions of TLS so far start handshake with a 0x16 message type code. + def is_next_line_obviously_invalid_request_line(self) -> bool: + try: + # HTTP header line must not contain non-printable characters + # and should not start with a space + return self._data[0] < 0x21 + except IndexError: + return False diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_state.py b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_state.py new file mode 100644 index 0000000000000000000000000000000000000000..3593430a74f21f6e0c2faf495e1627551eebfc30 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_state.py @@ -0,0 +1,367 @@ +################################################################ +# The core state machine +################################################################ +# +# Rule 1: everything that affects the state machine and state transitions must +# live here in this file. As much as possible goes into the table-based +# representation, but for the bits that don't quite fit, the actual code and +# state must nonetheless live here. +# +# Rule 2: this file does not know about what role we're playing; it only knows +# about HTTP request/response cycles in the abstract. This ensures that we +# don't cheat and apply different rules to local and remote parties. +# +# +# Theory of operation +# =================== +# +# Possibly the simplest way to think about this is that we actually have 5 +# different state machines here. Yes, 5. These are: +# +# 1) The client state, with its complicated automaton (see the docs) +# 2) The server state, with its complicated automaton (see the docs) +# 3) The keep-alive state, with possible states {True, False} +# 4) The SWITCH_CONNECT state, with possible states {False, True} +# 5) The SWITCH_UPGRADE state, with possible states {False, True} +# +# For (3)-(5), the first state listed is the initial state. +# +# (1)-(3) are stored explicitly in member variables. The last +# two are stored implicitly in the pending_switch_proposals set as: +# (state of 4) == (_SWITCH_CONNECT in pending_switch_proposals) +# (state of 5) == (_SWITCH_UPGRADE in pending_switch_proposals) +# +# And each of these machines has two different kinds of transitions: +# +# a) Event-triggered +# b) State-triggered +# +# Event triggered is the obvious thing that you'd think it is: some event +# happens, and if it's the right event at the right time then a transition +# happens. But there are somewhat complicated rules for which machines can +# "see" which events. (As a rule of thumb, if a machine "sees" an event, this +# means two things: the event can affect the machine, and if the machine is +# not in a state where it expects that event then it's an error.) These rules +# are: +# +# 1) The client machine sees all h11.events objects emitted by the client. +# +# 2) The server machine sees all h11.events objects emitted by the server. +# +# It also sees the client's Request event. +# +# And sometimes, server events are annotated with a _SWITCH_* event. For +# example, we can have a (Response, _SWITCH_CONNECT) event, which is +# different from a regular Response event. +# +# 3) The keep-alive machine sees the process_keep_alive_disabled() event +# (which is derived from Request/Response events), and this event +# transitions it from True -> False, or from False -> False. There's no way +# to transition back. +# +# 4&5) The _SWITCH_* machines transition from False->True when we get a +# Request that proposes the relevant type of switch (via +# process_client_switch_proposals), and they go from True->False when we +# get a Response that has no _SWITCH_* annotation. +# +# So that's event-triggered transitions. +# +# State-triggered transitions are less standard. What they do here is couple +# the machines together. The way this works is, when certain *joint* +# configurations of states are achieved, then we automatically transition to a +# new *joint* state. So, for example, if we're ever in a joint state with +# +# client: DONE +# keep-alive: False +# +# then the client state immediately transitions to: +# +# client: MUST_CLOSE +# +# This is fundamentally different from an event-based transition, because it +# doesn't matter how we arrived at the {client: DONE, keep-alive: False} state +# -- maybe the client transitioned SEND_BODY -> DONE, or keep-alive +# transitioned True -> False. Either way, once this precondition is satisfied, +# this transition is immediately triggered. +# +# What if two conflicting state-based transitions get enabled at the same +# time? In practice there's only one case where this arises (client DONE -> +# MIGHT_SWITCH_PROTOCOL versus DONE -> MUST_CLOSE), and we resolve it by +# explicitly prioritizing the DONE -> MIGHT_SWITCH_PROTOCOL transition. +# +# Implementation +# -------------- +# +# The event-triggered transitions for the server and client machines are all +# stored explicitly in a table. Ditto for the state-triggered transitions that +# involve just the server and client state. +# +# The transitions for the other machines, and the state-triggered transitions +# that involve the other machines, are written out as explicit Python code. +# +# It'd be nice if there were some cleaner way to do all this. This isn't +# *too* terrible, but I feel like it could probably be better. +# +# WARNING +# ------- +# +# The script that generates the state machine diagrams for the docs knows how +# to read out the EVENT_TRIGGERED_TRANSITIONS and STATE_TRIGGERED_TRANSITIONS +# tables. But it can't automatically read the transitions that are written +# directly in Python code. So if you touch those, you need to also update the +# script to keep it in sync! +from typing import cast, Dict, Optional, Set, Tuple, Type, Union + +from ._events import * +from ._util import LocalProtocolError, Sentinel + +# Everything in __all__ gets re-exported as part of the h11 public API. +__all__ = [ + "CLIENT", + "SERVER", + "IDLE", + "SEND_RESPONSE", + "SEND_BODY", + "DONE", + "MUST_CLOSE", + "CLOSED", + "MIGHT_SWITCH_PROTOCOL", + "SWITCHED_PROTOCOL", + "ERROR", +] + + +class CLIENT(Sentinel, metaclass=Sentinel): + pass + + +class SERVER(Sentinel, metaclass=Sentinel): + pass + + +# States +class IDLE(Sentinel, metaclass=Sentinel): + pass + + +class SEND_RESPONSE(Sentinel, metaclass=Sentinel): + pass + + +class SEND_BODY(Sentinel, metaclass=Sentinel): + pass + + +class DONE(Sentinel, metaclass=Sentinel): + pass + + +class MUST_CLOSE(Sentinel, metaclass=Sentinel): + pass + + +class CLOSED(Sentinel, metaclass=Sentinel): + pass + + +class ERROR(Sentinel, metaclass=Sentinel): + pass + + +# Switch types +class MIGHT_SWITCH_PROTOCOL(Sentinel, metaclass=Sentinel): + pass + + +class SWITCHED_PROTOCOL(Sentinel, metaclass=Sentinel): + pass + + +class _SWITCH_UPGRADE(Sentinel, metaclass=Sentinel): + pass + + +class _SWITCH_CONNECT(Sentinel, metaclass=Sentinel): + pass + + +EventTransitionType = Dict[ + Type[Sentinel], + Dict[ + Type[Sentinel], + Dict[Union[Type[Event], Tuple[Type[Event], Type[Sentinel]]], Type[Sentinel]], + ], +] + +EVENT_TRIGGERED_TRANSITIONS: EventTransitionType = { + CLIENT: { + IDLE: {Request: SEND_BODY, ConnectionClosed: CLOSED}, + SEND_BODY: {Data: SEND_BODY, EndOfMessage: DONE}, + DONE: {ConnectionClosed: CLOSED}, + MUST_CLOSE: {ConnectionClosed: CLOSED}, + CLOSED: {ConnectionClosed: CLOSED}, + MIGHT_SWITCH_PROTOCOL: {}, + SWITCHED_PROTOCOL: {}, + ERROR: {}, + }, + SERVER: { + IDLE: { + ConnectionClosed: CLOSED, + Response: SEND_BODY, + # Special case: server sees client Request events, in this form + (Request, CLIENT): SEND_RESPONSE, + }, + SEND_RESPONSE: { + InformationalResponse: SEND_RESPONSE, + Response: SEND_BODY, + (InformationalResponse, _SWITCH_UPGRADE): SWITCHED_PROTOCOL, + (Response, _SWITCH_CONNECT): SWITCHED_PROTOCOL, + }, + SEND_BODY: {Data: SEND_BODY, EndOfMessage: DONE}, + DONE: {ConnectionClosed: CLOSED}, + MUST_CLOSE: {ConnectionClosed: CLOSED}, + CLOSED: {ConnectionClosed: CLOSED}, + SWITCHED_PROTOCOL: {}, + ERROR: {}, + }, +} + +StateTransitionType = Dict[ + Tuple[Type[Sentinel], Type[Sentinel]], Dict[Type[Sentinel], Type[Sentinel]] +] + +# NB: there are also some special-case state-triggered transitions hard-coded +# into _fire_state_triggered_transitions below. +STATE_TRIGGERED_TRANSITIONS: StateTransitionType = { + # (Client state, Server state) -> new states + # Protocol negotiation + (MIGHT_SWITCH_PROTOCOL, SWITCHED_PROTOCOL): {CLIENT: SWITCHED_PROTOCOL}, + # Socket shutdown + (CLOSED, DONE): {SERVER: MUST_CLOSE}, + (CLOSED, IDLE): {SERVER: MUST_CLOSE}, + (ERROR, DONE): {SERVER: MUST_CLOSE}, + (DONE, CLOSED): {CLIENT: MUST_CLOSE}, + (IDLE, CLOSED): {CLIENT: MUST_CLOSE}, + (DONE, ERROR): {CLIENT: MUST_CLOSE}, +} + + +class ConnectionState: + def __init__(self) -> None: + # Extra bits of state that don't quite fit into the state model. + + # If this is False then it enables the automatic DONE -> MUST_CLOSE + # transition. Don't set this directly; call .keep_alive_disabled() + self.keep_alive = True + + # This is a subset of {UPGRADE, CONNECT}, containing the proposals + # made by the client for switching protocols. + self.pending_switch_proposals: Set[Type[Sentinel]] = set() + + self.states: Dict[Type[Sentinel], Type[Sentinel]] = {CLIENT: IDLE, SERVER: IDLE} + + def process_error(self, role: Type[Sentinel]) -> None: + self.states[role] = ERROR + self._fire_state_triggered_transitions() + + def process_keep_alive_disabled(self) -> None: + self.keep_alive = False + self._fire_state_triggered_transitions() + + def process_client_switch_proposal(self, switch_event: Type[Sentinel]) -> None: + self.pending_switch_proposals.add(switch_event) + self._fire_state_triggered_transitions() + + def process_event( + self, + role: Type[Sentinel], + event_type: Type[Event], + server_switch_event: Optional[Type[Sentinel]] = None, + ) -> None: + _event_type: Union[Type[Event], Tuple[Type[Event], Type[Sentinel]]] = event_type + if server_switch_event is not None: + assert role is SERVER + if server_switch_event not in self.pending_switch_proposals: + raise LocalProtocolError( + "Received server {} event without a pending proposal".format( + server_switch_event + ) + ) + _event_type = (event_type, server_switch_event) + if server_switch_event is None and _event_type is Response: + self.pending_switch_proposals = set() + self._fire_event_triggered_transitions(role, _event_type) + # Special case: the server state does get to see Request + # events. + if _event_type is Request: + assert role is CLIENT + self._fire_event_triggered_transitions(SERVER, (Request, CLIENT)) + self._fire_state_triggered_transitions() + + def _fire_event_triggered_transitions( + self, + role: Type[Sentinel], + event_type: Union[Type[Event], Tuple[Type[Event], Type[Sentinel]]], + ) -> None: + state = self.states[role] + try: + new_state = EVENT_TRIGGERED_TRANSITIONS[role][state][event_type] + except KeyError: + event_type = cast(Type[Event], event_type) + raise LocalProtocolError( + "can't handle event type {} when role={} and state={}".format( + event_type.__name__, role, self.states[role] + ) + ) from None + self.states[role] = new_state + + def _fire_state_triggered_transitions(self) -> None: + # We apply these rules repeatedly until converging on a fixed point + while True: + start_states = dict(self.states) + + # It could happen that both these special-case transitions are + # enabled at the same time: + # + # DONE -> MIGHT_SWITCH_PROTOCOL + # DONE -> MUST_CLOSE + # + # For example, this will always be true of a HTTP/1.0 client + # requesting CONNECT. If this happens, the protocol switch takes + # priority. From there the client will either go to + # SWITCHED_PROTOCOL, in which case it's none of our business when + # they close the connection, or else the server will deny the + # request, in which case the client will go back to DONE and then + # from there to MUST_CLOSE. + if self.pending_switch_proposals: + if self.states[CLIENT] is DONE: + self.states[CLIENT] = MIGHT_SWITCH_PROTOCOL + + if not self.pending_switch_proposals: + if self.states[CLIENT] is MIGHT_SWITCH_PROTOCOL: + self.states[CLIENT] = DONE + + if not self.keep_alive: + for role in (CLIENT, SERVER): + if self.states[role] is DONE: + self.states[role] = MUST_CLOSE + + # Tabular state-triggered transitions + joint_state = (self.states[CLIENT], self.states[SERVER]) + changes = STATE_TRIGGERED_TRANSITIONS.get(joint_state, {}) + self.states.update(changes) + + if self.states == start_states: + # Fixed point reached + return + + def start_next_cycle(self) -> None: + if self.states != {CLIENT: DONE, SERVER: DONE}: + raise LocalProtocolError( + "not in a reusable state. self.states={}".format(self.states) + ) + # Can't reach DONE/DONE with any of these active, but still, let's be + # sure. + assert self.keep_alive + assert not self.pending_switch_proposals + self.states = {CLIENT: IDLE, SERVER: IDLE} diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_util.py b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_util.py new file mode 100644 index 0000000000000000000000000000000000000000..6718445290770e028ea2f1f662026c9a0b0991db --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_util.py @@ -0,0 +1,135 @@ +from typing import Any, Dict, NoReturn, Pattern, Tuple, Type, TypeVar, Union + +__all__ = [ + "ProtocolError", + "LocalProtocolError", + "RemoteProtocolError", + "validate", + "bytesify", +] + + +class ProtocolError(Exception): + """Exception indicating a violation of the HTTP/1.1 protocol. + + This as an abstract base class, with two concrete base classes: + :exc:`LocalProtocolError`, which indicates that you tried to do something + that HTTP/1.1 says is illegal, and :exc:`RemoteProtocolError`, which + indicates that the remote peer tried to do something that HTTP/1.1 says is + illegal. See :ref:`error-handling` for details. + + In addition to the normal :exc:`Exception` features, it has one attribute: + + .. attribute:: error_status_hint + + This gives a suggestion as to what status code a server might use if + this error occurred as part of a request. + + For a :exc:`RemoteProtocolError`, this is useful as a suggestion for + how you might want to respond to a misbehaving peer, if you're + implementing a server. + + For a :exc:`LocalProtocolError`, this can be taken as a suggestion for + how your peer might have responded to *you* if h11 had allowed you to + continue. + + The default is 400 Bad Request, a generic catch-all for protocol + violations. + + """ + + def __init__(self, msg: str, error_status_hint: int = 400) -> None: + if type(self) is ProtocolError: + raise TypeError("tried to directly instantiate ProtocolError") + Exception.__init__(self, msg) + self.error_status_hint = error_status_hint + + +# Strategy: there are a number of public APIs where a LocalProtocolError can +# be raised (send(), all the different event constructors, ...), and only one +# public API where RemoteProtocolError can be raised +# (receive_data()). Therefore we always raise LocalProtocolError internally, +# and then receive_data will translate this into a RemoteProtocolError. +# +# Internally: +# LocalProtocolError is the generic "ProtocolError". +# Externally: +# LocalProtocolError is for local errors and RemoteProtocolError is for +# remote errors. +class LocalProtocolError(ProtocolError): + def _reraise_as_remote_protocol_error(self) -> NoReturn: + # After catching a LocalProtocolError, use this method to re-raise it + # as a RemoteProtocolError. This method must be called from inside an + # except: block. + # + # An easy way to get an equivalent RemoteProtocolError is just to + # modify 'self' in place. + self.__class__ = RemoteProtocolError # type: ignore + # But the re-raising is somewhat non-trivial -- you might think that + # now that we've modified the in-flight exception object, that just + # doing 'raise' to re-raise it would be enough. But it turns out that + # this doesn't work, because Python tracks the exception type + # (exc_info[0]) separately from the exception object (exc_info[1]), + # and we only modified the latter. So we really do need to re-raise + # the new type explicitly. + # On py3, the traceback is part of the exception object, so our + # in-place modification preserved it and we can just re-raise: + raise self + + +class RemoteProtocolError(ProtocolError): + pass + + +def validate( + regex: Pattern[bytes], data: bytes, msg: str = "malformed data", *format_args: Any +) -> Dict[str, bytes]: + match = regex.fullmatch(data) + if not match: + if format_args: + msg = msg.format(*format_args) + raise LocalProtocolError(msg) + return match.groupdict() + + +# Sentinel values +# +# - Inherit identity-based comparison and hashing from object +# - Have a nice repr +# - Have a *bonus property*: type(sentinel) is sentinel +# +# The bonus property is useful if you want to take the return value from +# next_event() and do some sort of dispatch based on type(event). + +_T_Sentinel = TypeVar("_T_Sentinel", bound="Sentinel") + + +class Sentinel(type): + def __new__( + cls: Type[_T_Sentinel], + name: str, + bases: Tuple[type, ...], + namespace: Dict[str, Any], + **kwds: Any + ) -> _T_Sentinel: + assert bases == (Sentinel,) + v = super().__new__(cls, name, bases, namespace, **kwds) + v.__class__ = v # type: ignore + return v + + def __repr__(self) -> str: + return self.__name__ + + +# Used for methods, request targets, HTTP versions, header names, and header +# values. Accepts ascii-strings, or bytes/bytearray/memoryview/..., and always +# returns bytes. +def bytesify(s: Union[bytes, bytearray, memoryview, int, str]) -> bytes: + # Fast-path: + if type(s) is bytes: + return s + if isinstance(s, str): + s = s.encode("ascii") + if isinstance(s, int): + raise TypeError("expected bytes-like object, not int") + return bytes(s) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_version.py b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_version.py new file mode 100644 index 0000000000000000000000000000000000000000..4c8911305680c1083b2da9b87ece12bc36f3a9e1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_version.py @@ -0,0 +1,16 @@ +# This file must be kept very simple, because it is consumed from several +# places -- it is imported by h11/__init__.py, execfile'd by setup.py, etc. + +# We use a simple scheme: +# 1.0.0 -> 1.0.0+dev -> 1.1.0 -> 1.1.0+dev +# where the +dev versions are never released into the wild, they're just what +# we stick into the VCS in between releases. +# +# This is compatible with PEP 440: +# http://legacy.python.org/dev/peps/pep-0440/ +# via the use of the "local suffix" "+dev", which is disallowed on index +# servers and causes 1.0.0+dev to sort after plain 1.0.0, which is what we +# want. (Contrast with the special suffix 1.0.0.dev, which sorts *before* +# 1.0.0.) + +__version__ = "0.14.0" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_writers.py b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_writers.py new file mode 100644 index 0000000000000000000000000000000000000000..939cdb912a9debaea07fbf3a9ac04549c44d077c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/_writers.py @@ -0,0 +1,145 @@ +# Code to read HTTP data +# +# Strategy: each writer takes an event + a write-some-bytes function, which is +# calls. +# +# WRITERS is a dict describing how to pick a reader. It maps states to either: +# - a writer +# - or, for body writers, a dict of framin-dependent writer factories + +from typing import Any, Callable, Dict, List, Tuple, Type, Union + +from ._events import Data, EndOfMessage, Event, InformationalResponse, Request, Response +from ._headers import Headers +from ._state import CLIENT, IDLE, SEND_BODY, SEND_RESPONSE, SERVER +from ._util import LocalProtocolError, Sentinel + +__all__ = ["WRITERS"] + +Writer = Callable[[bytes], Any] + + +def write_headers(headers: Headers, write: Writer) -> None: + # "Since the Host field-value is critical information for handling a + # request, a user agent SHOULD generate Host as the first header field + # following the request-line." - RFC 7230 + raw_items = headers._full_items + for raw_name, name, value in raw_items: + if name == b"host": + write(b"%s: %s\r\n" % (raw_name, value)) + for raw_name, name, value in raw_items: + if name != b"host": + write(b"%s: %s\r\n" % (raw_name, value)) + write(b"\r\n") + + +def write_request(request: Request, write: Writer) -> None: + if request.http_version != b"1.1": + raise LocalProtocolError("I only send HTTP/1.1") + write(b"%s %s HTTP/1.1\r\n" % (request.method, request.target)) + write_headers(request.headers, write) + + +# Shared between InformationalResponse and Response +def write_any_response( + response: Union[InformationalResponse, Response], write: Writer +) -> None: + if response.http_version != b"1.1": + raise LocalProtocolError("I only send HTTP/1.1") + status_bytes = str(response.status_code).encode("ascii") + # We don't bother sending ascii status messages like "OK"; they're + # optional and ignored by the protocol. (But the space after the numeric + # status code is mandatory.) + # + # XX FIXME: could at least make an effort to pull out the status message + # from stdlib's http.HTTPStatus table. Or maybe just steal their enums + # (either by import or copy/paste). We already accept them as status codes + # since they're of type IntEnum < int. + write(b"HTTP/1.1 %s %s\r\n" % (status_bytes, response.reason)) + write_headers(response.headers, write) + + +class BodyWriter: + def __call__(self, event: Event, write: Writer) -> None: + if type(event) is Data: + self.send_data(event.data, write) + elif type(event) is EndOfMessage: + self.send_eom(event.headers, write) + else: # pragma: no cover + assert False + + def send_data(self, data: bytes, write: Writer) -> None: + pass + + def send_eom(self, headers: Headers, write: Writer) -> None: + pass + + +# +# These are all careful not to do anything to 'data' except call len(data) and +# write(data). This allows us to transparently pass-through funny objects, +# like placeholder objects referring to files on disk that will be sent via +# sendfile(2). +# +class ContentLengthWriter(BodyWriter): + def __init__(self, length: int) -> None: + self._length = length + + def send_data(self, data: bytes, write: Writer) -> None: + self._length -= len(data) + if self._length < 0: + raise LocalProtocolError("Too much data for declared Content-Length") + write(data) + + def send_eom(self, headers: Headers, write: Writer) -> None: + if self._length != 0: + raise LocalProtocolError("Too little data for declared Content-Length") + if headers: + raise LocalProtocolError("Content-Length and trailers don't mix") + + +class ChunkedWriter(BodyWriter): + def send_data(self, data: bytes, write: Writer) -> None: + # if we encoded 0-length data in the naive way, it would look like an + # end-of-message. + if not data: + return + write(b"%x\r\n" % len(data)) + write(data) + write(b"\r\n") + + def send_eom(self, headers: Headers, write: Writer) -> None: + write(b"0\r\n") + write_headers(headers, write) + + +class Http10Writer(BodyWriter): + def send_data(self, data: bytes, write: Writer) -> None: + write(data) + + def send_eom(self, headers: Headers, write: Writer) -> None: + if headers: + raise LocalProtocolError("can't send trailers to HTTP/1.0 client") + # no need to close the socket ourselves, that will be taken care of by + # Connection: close machinery + + +WritersType = Dict[ + Union[Tuple[Type[Sentinel], Type[Sentinel]], Type[Sentinel]], + Union[ + Dict[str, Type[BodyWriter]], + Callable[[Union[InformationalResponse, Response], Writer], None], + Callable[[Request, Writer], None], + ], +] + +WRITERS: WritersType = { + (CLIENT, IDLE): write_request, + (SERVER, IDLE): write_any_response, + (SERVER, SEND_RESPONSE): write_any_response, + SEND_BODY: { + "chunked": ChunkedWriter, + "content-length": ContentLengthWriter, + "http/1.0": Http10Writer, + }, +} diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/h11/py.typed b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..f5642f79f21d872f010979dcf6f0c4a415acc19d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/h11/py.typed @@ -0,0 +1 @@ +Marker diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/hf_xet-1.3.2.dist-info/INSTALLER b/URSA/.venv_ursa/lib/python3.12/site-packages/hf_xet-1.3.2.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/hf_xet-1.3.2.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/hf_xet-1.3.2.dist-info/METADATA b/URSA/.venv_ursa/lib/python3.12/site-packages/hf_xet-1.3.2.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..eed417e382589ca226f658db5da489d93802d3a4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/hf_xet-1.3.2.dist-info/METADATA @@ -0,0 +1,87 @@ +Metadata-Version: 2.4 +Name: hf-xet +Version: 1.3.2 +Classifier: Development Status :: 5 - Production/Stable +Classifier: License :: OSI Approved :: Apache Software License +Classifier: Programming Language :: Rust +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: 3.14 +Classifier: Programming Language :: Python :: Free Threading +Classifier: Programming Language :: Python :: Free Threading :: 2 - Beta +Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence +Requires-Dist: pytest ; extra == 'tests' +Provides-Extra: tests +License-File: LICENSE +Summary: Fast transfer of large files with the Hugging Face Hub. +Maintainer-email: Rajat Arya , Jared Sulzdorf , Di Xiao , Assaf Vayner , Hoyt Koepke +License-Expression: Apache-2.0 +Requires-Python: >=3.8 +Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM +Project-URL: Documentation, https://huggingface.co/docs/hub/xet/index +Project-URL: Homepage, https://github.com/huggingface/xet-core +Project-URL: Issues, https://github.com/huggingface/xet-core/issues +Project-URL: Repository, https://github.com/huggingface/xet-core.git + + +

+ License + GitHub release + Contributor Covenant +

+ +

+

🤗 hf-xet - xet client tech, used in huggingface_hub

+

+ +## Welcome + +`hf-xet` enables `huggingface_hub` to utilize xet storage for uploading and downloading to HF Hub. Xet storage provides chunk-based deduplication, efficient storage/retrieval with local disk caching, and backwards compatibility with Git LFS. This library is not meant to be used directly, and is instead intended to be used from [huggingface_hub](https://pypi.org/project/huggingface-hub). + +## Key features + +♻ **chunk-based deduplication implementation**: avoid transferring and storing chunks that are shared across binary files (models, datasets, etc). + +🤗 **Python bindings**: bindings for [huggingface_hub](https://github.com/huggingface/huggingface_hub/) package. + +↔ **network communications**: concurrent communication to HF Hub Xet backend services (CAS). + +🔖 **local disk caching**: chunk-based cache that sits alongside the existing [huggingface_hub disk cache](https://huggingface.co/docs/huggingface_hub/guides/manage-cache). + +## Installation + +Install the `hf_xet` package with [pip](https://pypi.org/project/hf-xet/): + +```bash +pip install hf_xet +``` + +## Quick Start + +`hf_xet` is not intended to be run independently as it is expected to be used from `huggingface_hub`, so to get started with `huggingface_hub` check out the documentation [here]("https://hf.co/docs/huggingface_hub"). + +## Contributions (feature requests, bugs, etc.) are encouraged & appreciated 💙💚💛💜🧡❤️ + +Please join us in making hf-xet better. We value everyone's contributions. Code is not the only way to help. Answering questions, helping each other, improving documentation, filing issues all help immensely. If you are interested in contributing (please do!), check out the [contribution guide](https://github.com/huggingface/xet-core/blob/main/CONTRIBUTING.md) for this repository. diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/hf_xet-1.3.2.dist-info/RECORD b/URSA/.venv_ursa/lib/python3.12/site-packages/hf_xet-1.3.2.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..3943f0b7838d0a67f488c360d8eaa244896318bb --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/hf_xet-1.3.2.dist-info/RECORD @@ -0,0 +1,9 @@ +hf_xet-1.3.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +hf_xet-1.3.2.dist-info/METADATA,sha256=VdBkvkyMsWS6C51XipEai-iZGPIt9ydUqE6iReFQdVI,4882 +hf_xet-1.3.2.dist-info/RECORD,, +hf_xet-1.3.2.dist-info/WHEEL,sha256=ycZRBBQNh-_vBSCR_X0ck7NlL8NDEI7G-tWH-Cjb8uw,143 +hf_xet-1.3.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357 +hf_xet-1.3.2.dist-info/sboms/hf_xet.cyclonedx.json,sha256=Z8lgnfjLuQ1OS7SIhJmZNn9qdETBotCqRMS1kgQOnxE,394826 +hf_xet/__init__.py,sha256=E8UDdyQ8glZ_nve9hHEf22bPang8-RKx4VuApXYeQUo,107 +hf_xet/__pycache__/__init__.cpython-312.pyc,, +hf_xet/hf_xet.abi3.so,sha256=35NZSsDbLEwZbmB-1aRzdjHhZD_MwMLDqP-tmCeN2gE,10594520 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/hf_xet-1.3.2.dist-info/WHEEL b/URSA/.venv_ursa/lib/python3.12/site-packages/hf_xet-1.3.2.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..e3c7eedd8e94fbe7277127c82058423fa8d80b38 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/hf_xet-1.3.2.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: maturin (1.12.4) +Root-Is-Purelib: false +Tag: cp37-abi3-manylinux_2_17_x86_64 +Tag: cp37-abi3-manylinux2014_x86_64 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpx-0.28.1.dist-info/INSTALLER b/URSA/.venv_ursa/lib/python3.12/site-packages/httpx-0.28.1.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpx-0.28.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpx-0.28.1.dist-info/METADATA b/URSA/.venv_ursa/lib/python3.12/site-packages/httpx-0.28.1.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..b0d2b196385e98259971519793447c1fd7a9a643 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpx-0.28.1.dist-info/METADATA @@ -0,0 +1,203 @@ +Metadata-Version: 2.3 +Name: httpx +Version: 0.28.1 +Summary: The next generation HTTP client. +Project-URL: Changelog, https://github.com/encode/httpx/blob/master/CHANGELOG.md +Project-URL: Documentation, https://www.python-httpx.org +Project-URL: Homepage, https://github.com/encode/httpx +Project-URL: Source, https://github.com/encode/httpx +Author-email: Tom Christie +License: BSD-3-Clause +Classifier: Development Status :: 4 - Beta +Classifier: Environment :: Web Environment +Classifier: Framework :: AsyncIO +Classifier: Framework :: Trio +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Topic :: Internet :: WWW/HTTP +Requires-Python: >=3.8 +Requires-Dist: anyio +Requires-Dist: certifi +Requires-Dist: httpcore==1.* +Requires-Dist: idna +Provides-Extra: brotli +Requires-Dist: brotli; (platform_python_implementation == 'CPython') and extra == 'brotli' +Requires-Dist: brotlicffi; (platform_python_implementation != 'CPython') and extra == 'brotli' +Provides-Extra: cli +Requires-Dist: click==8.*; extra == 'cli' +Requires-Dist: pygments==2.*; extra == 'cli' +Requires-Dist: rich<14,>=10; extra == 'cli' +Provides-Extra: http2 +Requires-Dist: h2<5,>=3; extra == 'http2' +Provides-Extra: socks +Requires-Dist: socksio==1.*; extra == 'socks' +Provides-Extra: zstd +Requires-Dist: zstandard>=0.18.0; extra == 'zstd' +Description-Content-Type: text/markdown + +

+ HTTPX +

+ +

HTTPX - A next-generation HTTP client for Python.

+ +

+ + Test Suite + + + Package version + +

+ +HTTPX is a fully featured HTTP client library for Python 3. It includes **an integrated command line client**, has support for both **HTTP/1.1 and HTTP/2**, and provides both **sync and async APIs**. + +--- + +Install HTTPX using pip: + +```shell +$ pip install httpx +``` + +Now, let's get started: + +```pycon +>>> import httpx +>>> r = httpx.get('https://www.example.org/') +>>> r + +>>> r.status_code +200 +>>> r.headers['content-type'] +'text/html; charset=UTF-8' +>>> r.text +'\n\n\nExample Domain...' +``` + +Or, using the command-line client. + +```shell +$ pip install 'httpx[cli]' # The command line client is an optional dependency. +``` + +Which now allows us to use HTTPX directly from the command-line... + +

+ httpx --help +

+ +Sending a request... + +

+ httpx http://httpbin.org/json +

+ +## Features + +HTTPX builds on the well-established usability of `requests`, and gives you: + +* A broadly [requests-compatible API](https://www.python-httpx.org/compatibility/). +* An integrated command-line client. +* HTTP/1.1 [and HTTP/2 support](https://www.python-httpx.org/http2/). +* Standard synchronous interface, but with [async support if you need it](https://www.python-httpx.org/async/). +* Ability to make requests directly to [WSGI applications](https://www.python-httpx.org/advanced/transports/#wsgi-transport) or [ASGI applications](https://www.python-httpx.org/advanced/transports/#asgi-transport). +* Strict timeouts everywhere. +* Fully type annotated. +* 100% test coverage. + +Plus all the standard features of `requests`... + +* International Domains and URLs +* Keep-Alive & Connection Pooling +* Sessions with Cookie Persistence +* Browser-style SSL Verification +* Basic/Digest Authentication +* Elegant Key/Value Cookies +* Automatic Decompression +* Automatic Content Decoding +* Unicode Response Bodies +* Multipart File Uploads +* HTTP(S) Proxy Support +* Connection Timeouts +* Streaming Downloads +* .netrc Support +* Chunked Requests + +## Installation + +Install with pip: + +```shell +$ pip install httpx +``` + +Or, to include the optional HTTP/2 support, use: + +```shell +$ pip install httpx[http2] +``` + +HTTPX requires Python 3.8+. + +## Documentation + +Project documentation is available at [https://www.python-httpx.org/](https://www.python-httpx.org/). + +For a run-through of all the basics, head over to the [QuickStart](https://www.python-httpx.org/quickstart/). + +For more advanced topics, see the [Advanced Usage](https://www.python-httpx.org/advanced/) section, the [async support](https://www.python-httpx.org/async/) section, or the [HTTP/2](https://www.python-httpx.org/http2/) section. + +The [Developer Interface](https://www.python-httpx.org/api/) provides a comprehensive API reference. + +To find out about tools that integrate with HTTPX, see [Third Party Packages](https://www.python-httpx.org/third_party_packages/). + +## Contribute + +If you want to contribute with HTTPX check out the [Contributing Guide](https://www.python-httpx.org/contributing/) to learn how to start. + +## Dependencies + +The HTTPX project relies on these excellent libraries: + +* `httpcore` - The underlying transport implementation for `httpx`. + * `h11` - HTTP/1.1 support. +* `certifi` - SSL certificates. +* `idna` - Internationalized domain name support. +* `sniffio` - Async library autodetection. + +As well as these optional installs: + +* `h2` - HTTP/2 support. *(Optional, with `httpx[http2]`)* +* `socksio` - SOCKS proxy support. *(Optional, with `httpx[socks]`)* +* `rich` - Rich terminal support. *(Optional, with `httpx[cli]`)* +* `click` - Command line client support. *(Optional, with `httpx[cli]`)* +* `brotli` or `brotlicffi` - Decoding for "brotli" compressed responses. *(Optional, with `httpx[brotli]`)* +* `zstandard` - Decoding for "zstd" compressed responses. *(Optional, with `httpx[zstd]`)* + +A huge amount of credit is due to `requests` for the API layout that +much of this work follows, as well as to `urllib3` for plenty of design +inspiration around the lower-level networking details. + +--- + +

HTTPX is BSD licensed code.
Designed & crafted with care.

— 🦋 —

+ +## Release Information + +### Fixed + +* Reintroduced supposedly-private `URLTypes` shortcut. (#2673) + + +--- + +[Full changelog](https://github.com/encode/httpx/blob/master/CHANGELOG.md) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpx-0.28.1.dist-info/RECORD b/URSA/.venv_ursa/lib/python3.12/site-packages/httpx-0.28.1.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..1f24aad9e68bdc3e6ec2eb6e219abe784cac126e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpx-0.28.1.dist-info/RECORD @@ -0,0 +1,54 @@ +../../../bin/httpx,sha256=UgQdoALUTT7PdT7QEKgUsY0eOunKj-ijUOSHdNlrRS4,252 +httpx-0.28.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +httpx-0.28.1.dist-info/METADATA,sha256=_rubD48-gNV8gZnDBPNcQzboWB0dGNeYPJJ2a4J5OyU,7052 +httpx-0.28.1.dist-info/RECORD,, +httpx-0.28.1.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87 +httpx-0.28.1.dist-info/entry_points.txt,sha256=2lVkdQmxLA1pNMgSN2eV89o90HCZezhmNwsy6ryKDSA,37 +httpx-0.28.1.dist-info/licenses/LICENSE.md,sha256=TsWdVE8StfU5o6cW_TIaxYzNgDC0ZSIfLIgCAM3yjY0,1508 +httpx/__init__.py,sha256=CsaZe6yZj0rHg6322AWKWHGTMVr9txgEfD5P3_Rrz60,2171 +httpx/__pycache__/__init__.cpython-312.pyc,, +httpx/__pycache__/__version__.cpython-312.pyc,, +httpx/__pycache__/_api.cpython-312.pyc,, +httpx/__pycache__/_auth.cpython-312.pyc,, +httpx/__pycache__/_client.cpython-312.pyc,, +httpx/__pycache__/_config.cpython-312.pyc,, +httpx/__pycache__/_content.cpython-312.pyc,, +httpx/__pycache__/_decoders.cpython-312.pyc,, +httpx/__pycache__/_exceptions.cpython-312.pyc,, +httpx/__pycache__/_main.cpython-312.pyc,, +httpx/__pycache__/_models.cpython-312.pyc,, +httpx/__pycache__/_multipart.cpython-312.pyc,, +httpx/__pycache__/_status_codes.cpython-312.pyc,, +httpx/__pycache__/_types.cpython-312.pyc,, +httpx/__pycache__/_urlparse.cpython-312.pyc,, +httpx/__pycache__/_urls.cpython-312.pyc,, +httpx/__pycache__/_utils.cpython-312.pyc,, +httpx/__version__.py,sha256=LoUyYeOXTieGzuP_64UL0wxdtxjuu_QbOvE7NOg-IqU,108 +httpx/_api.py,sha256=r_Zgs4jIpcPJLqK5dbbSayqo_iVMKFaxZCd-oOHxLEs,11743 +httpx/_auth.py,sha256=Yr3QwaUSK17rGYx-7j-FdicFIzz4Y9FFV-1F4-7RXX4,11891 +httpx/_client.py,sha256=xD-UG67-WMkeltAAOeGGj-cZ2RRTAm19sWRxlFY7_40,65714 +httpx/_config.py,sha256=pPp2U-wicfcKsF-KYRE1LYdt3e6ERGeIoXZ8Gjo3LWc,8547 +httpx/_content.py,sha256=LGGzrJTR3OvN4Mb1GVVNLXkXJH-6oKlwAttO9p5w_yg,8161 +httpx/_decoders.py,sha256=p0dX8I0NEHexs3UGp4SsZutiMhsXrrWl6-GnqVb0iKM,12041 +httpx/_exceptions.py,sha256=bxW7fxzgVMAdNTbwT0Vnq04gJDW1_gI_GFiQPuMyjL0,8527 +httpx/_main.py,sha256=Cg9GMabiTT_swaDfUgIRitSwxLRMSwUDOm7LdSGqlA4,15626 +httpx/_models.py,sha256=4__Guyv1gLxuZChwim8kfQNiIOcJ9acreFOSurvZfms,44700 +httpx/_multipart.py,sha256=KOHEZZl6oohg9mPaKyyu345qq1rJLg35TUG3YAzXB3Y,9843 +httpx/_status_codes.py,sha256=DYn-2ufBgMeXy5s8x3_TB7wjAuAAMewTakPrm5rXEsc,5639 +httpx/_transports/__init__.py,sha256=GbUoBSAOp7z-l-9j5YhMhR3DMIcn6FVLhj072O3Nnno,275 +httpx/_transports/__pycache__/__init__.cpython-312.pyc,, +httpx/_transports/__pycache__/asgi.cpython-312.pyc,, +httpx/_transports/__pycache__/base.cpython-312.pyc,, +httpx/_transports/__pycache__/default.cpython-312.pyc,, +httpx/_transports/__pycache__/mock.cpython-312.pyc,, +httpx/_transports/__pycache__/wsgi.cpython-312.pyc,, +httpx/_transports/asgi.py,sha256=HRfiDYMPt4wQH2gFgHZg4c-i3sblo6bL5GTqcET-xz8,5501 +httpx/_transports/base.py,sha256=kZS_VMbViYfF570pogUCJ1bulz-ybfL51Pqs9yktebU,2523 +httpx/_transports/default.py,sha256=AzeaRUyVwCccTyyNJexDf0n1dFfzzydpdIQgvw7PLnk,13983 +httpx/_transports/mock.py,sha256=PTo0d567RITXxGrki6kN7_67wwAxfwiMDcuXJiZCjEo,1232 +httpx/_transports/wsgi.py,sha256=NcPX3Xap_EwCFZWO_OaSyQNuInCYx1QMNbO8GAei6jY,4825 +httpx/_types.py,sha256=Jyh41GQq7AOev8IOWKDAg7zCbvHAfufmW5g_PiTtErY,2965 +httpx/_urlparse.py,sha256=ZAmH47ONfkxrrj-PPYhGeiHjb6AjKCS-ANWIN4OL_KY,18546 +httpx/_urls.py,sha256=dX99VR1DSOHpgo9Aq7PzYO4FKdxqKjwyNp8grf8dHN0,21550 +httpx/_utils.py,sha256=_TVeqAKvxJkKHdz7dFeb4s0LZqQXgeFkXSgfiHBK_1o,8285 +httpx/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpx-0.28.1.dist-info/WHEEL b/URSA/.venv_ursa/lib/python3.12/site-packages/httpx-0.28.1.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..21aaa72961a8af71c17d2cb3b76d5f7f567100e4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpx-0.28.1.dist-info/WHEEL @@ -0,0 +1,4 @@ +Wheel-Version: 1.0 +Generator: hatchling 1.26.3 +Root-Is-Purelib: true +Tag: py3-none-any diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/httpx-0.28.1.dist-info/entry_points.txt b/URSA/.venv_ursa/lib/python3.12/site-packages/httpx-0.28.1.dist-info/entry_points.txt new file mode 100644 index 0000000000000000000000000000000000000000..8ae96007f7d725813fd02dc1d06d3834ee1939e4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/httpx-0.28.1.dist-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +httpx = httpx:main diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..643ef9ac20f22f22c4d2d383de4840983849ba30 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/__init__.py @@ -0,0 +1,1554 @@ +# Copyright 2020 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# *********** +# `huggingface_hub` init has 2 modes: +# - Normal usage: +# If imported to use it, all modules and functions are lazy-loaded. This means +# they exist at top level in module but are imported only the first time they are +# used. This way, `from huggingface_hub import something` will import `something` +# quickly without the hassle of importing all the features from `huggingface_hub`. +# - Static check: +# If statically analyzed, all modules and functions are loaded normally. This way +# static typing check works properly as well as autocomplete in text editors and +# IDEs. +# +# The static model imports are done inside the `if TYPE_CHECKING:` statement at +# the bottom of this file. Since module/functions imports are duplicated, it is +# mandatory to make sure to add them twice when adding one. This is checked in the +# `make quality` command. +# +# To update the static imports, please run the following command and commit the changes. +# ``` +# # Use script +# python utils/check_static_imports.py --update-file +# +# # Or run style on codebase +# make style +# ``` +# +# *********** +# Lazy loader vendored from https://github.com/scientific-python/lazy_loader +import importlib +import os +import sys +from typing import TYPE_CHECKING + + +__version__ = "0.36.2" + +# Alphabetical order of definitions is ensured in tests +# WARNING: any comment added in this dictionary definition will be lost when +# re-generating the file ! +_SUBMOD_ATTRS = { + "_commit_scheduler": [ + "CommitScheduler", + ], + "_inference_endpoints": [ + "InferenceEndpoint", + "InferenceEndpointError", + "InferenceEndpointStatus", + "InferenceEndpointTimeoutError", + "InferenceEndpointType", + ], + "_jobs_api": [ + "JobInfo", + "JobOwner", + "JobStage", + "JobStatus", + ], + "_login": [ + "auth_list", + "auth_switch", + "interpreter_login", + "login", + "logout", + "notebook_login", + ], + "_oauth": [ + "OAuthInfo", + "OAuthOrgInfo", + "OAuthUserInfo", + "attach_huggingface_oauth", + "parse_huggingface_oauth", + ], + "_snapshot_download": [ + "snapshot_download", + ], + "_space_api": [ + "SpaceHardware", + "SpaceRuntime", + "SpaceStage", + "SpaceStorage", + "SpaceVariable", + ], + "_tensorboard_logger": [ + "HFSummaryWriter", + ], + "_webhooks_payload": [ + "WebhookPayload", + "WebhookPayloadComment", + "WebhookPayloadDiscussion", + "WebhookPayloadDiscussionChanges", + "WebhookPayloadEvent", + "WebhookPayloadMovedTo", + "WebhookPayloadRepo", + "WebhookPayloadUrl", + "WebhookPayloadWebhook", + ], + "_webhooks_server": [ + "WebhooksServer", + "webhook_endpoint", + ], + "community": [ + "Discussion", + "DiscussionComment", + "DiscussionCommit", + "DiscussionEvent", + "DiscussionStatusChange", + "DiscussionTitleChange", + "DiscussionWithDetails", + ], + "constants": [ + "CONFIG_NAME", + "FLAX_WEIGHTS_NAME", + "HUGGINGFACE_CO_URL_HOME", + "HUGGINGFACE_CO_URL_TEMPLATE", + "PYTORCH_WEIGHTS_NAME", + "REPO_TYPE_DATASET", + "REPO_TYPE_MODEL", + "REPO_TYPE_SPACE", + "TF2_WEIGHTS_NAME", + "TF_WEIGHTS_NAME", + ], + "fastai_utils": [ + "_save_pretrained_fastai", + "from_pretrained_fastai", + "push_to_hub_fastai", + ], + "file_download": [ + "HfFileMetadata", + "_CACHED_NO_EXIST", + "get_hf_file_metadata", + "hf_hub_download", + "hf_hub_url", + "try_to_load_from_cache", + ], + "hf_api": [ + "Collection", + "CollectionItem", + "CommitInfo", + "CommitOperation", + "CommitOperationAdd", + "CommitOperationCopy", + "CommitOperationDelete", + "DatasetInfo", + "GitCommitInfo", + "GitRefInfo", + "GitRefs", + "HfApi", + "ModelInfo", + "Organization", + "RepoUrl", + "SpaceInfo", + "User", + "UserLikes", + "WebhookInfo", + "WebhookWatchedItem", + "accept_access_request", + "add_collection_item", + "add_space_secret", + "add_space_variable", + "auth_check", + "cancel_access_request", + "cancel_job", + "change_discussion_status", + "comment_discussion", + "create_branch", + "create_collection", + "create_commit", + "create_discussion", + "create_inference_endpoint", + "create_inference_endpoint_from_catalog", + "create_pull_request", + "create_repo", + "create_scheduled_job", + "create_scheduled_uv_job", + "create_tag", + "create_webhook", + "dataset_info", + "delete_branch", + "delete_collection", + "delete_collection_item", + "delete_file", + "delete_folder", + "delete_inference_endpoint", + "delete_repo", + "delete_scheduled_job", + "delete_space_secret", + "delete_space_storage", + "delete_space_variable", + "delete_tag", + "delete_webhook", + "disable_webhook", + "duplicate_space", + "edit_discussion_comment", + "enable_webhook", + "fetch_job_logs", + "file_exists", + "get_collection", + "get_dataset_tags", + "get_discussion_details", + "get_full_repo_name", + "get_inference_endpoint", + "get_model_tags", + "get_organization_overview", + "get_paths_info", + "get_repo_discussions", + "get_safetensors_metadata", + "get_space_runtime", + "get_space_variables", + "get_token_permission", + "get_user_overview", + "get_webhook", + "grant_access", + "inspect_job", + "inspect_scheduled_job", + "list_accepted_access_requests", + "list_collections", + "list_datasets", + "list_inference_catalog", + "list_inference_endpoints", + "list_jobs", + "list_lfs_files", + "list_liked_repos", + "list_models", + "list_organization_members", + "list_papers", + "list_pending_access_requests", + "list_rejected_access_requests", + "list_repo_commits", + "list_repo_files", + "list_repo_likers", + "list_repo_refs", + "list_repo_tree", + "list_spaces", + "list_user_followers", + "list_user_following", + "list_webhooks", + "merge_pull_request", + "model_info", + "move_repo", + "paper_info", + "parse_safetensors_file_metadata", + "pause_inference_endpoint", + "pause_space", + "permanently_delete_lfs_files", + "preupload_lfs_files", + "reject_access_request", + "rename_discussion", + "repo_exists", + "repo_info", + "repo_type_and_id_from_hf_id", + "request_space_hardware", + "request_space_storage", + "restart_space", + "resume_inference_endpoint", + "resume_scheduled_job", + "revision_exists", + "run_as_future", + "run_job", + "run_uv_job", + "scale_to_zero_inference_endpoint", + "set_space_sleep_time", + "space_info", + "super_squash_history", + "suspend_scheduled_job", + "unlike", + "update_collection_item", + "update_collection_metadata", + "update_inference_endpoint", + "update_repo_settings", + "update_repo_visibility", + "update_webhook", + "upload_file", + "upload_folder", + "upload_large_folder", + "whoami", + ], + "hf_file_system": [ + "HfFileSystem", + "HfFileSystemFile", + "HfFileSystemResolvedPath", + "HfFileSystemStreamFile", + ], + "hub_mixin": [ + "ModelHubMixin", + "PyTorchModelHubMixin", + ], + "inference._client": [ + "InferenceClient", + "InferenceTimeoutError", + ], + "inference._generated._async_client": [ + "AsyncInferenceClient", + ], + "inference._generated.types": [ + "AudioClassificationInput", + "AudioClassificationOutputElement", + "AudioClassificationOutputTransform", + "AudioClassificationParameters", + "AudioToAudioInput", + "AudioToAudioOutputElement", + "AutomaticSpeechRecognitionEarlyStoppingEnum", + "AutomaticSpeechRecognitionGenerationParameters", + "AutomaticSpeechRecognitionInput", + "AutomaticSpeechRecognitionOutput", + "AutomaticSpeechRecognitionOutputChunk", + "AutomaticSpeechRecognitionParameters", + "ChatCompletionInput", + "ChatCompletionInputFunctionDefinition", + "ChatCompletionInputFunctionName", + "ChatCompletionInputGrammarType", + "ChatCompletionInputJSONSchema", + "ChatCompletionInputMessage", + "ChatCompletionInputMessageChunk", + "ChatCompletionInputMessageChunkType", + "ChatCompletionInputResponseFormatJSONObject", + "ChatCompletionInputResponseFormatJSONSchema", + "ChatCompletionInputResponseFormatText", + "ChatCompletionInputStreamOptions", + "ChatCompletionInputTool", + "ChatCompletionInputToolCall", + "ChatCompletionInputToolChoiceClass", + "ChatCompletionInputToolChoiceEnum", + "ChatCompletionInputURL", + "ChatCompletionOutput", + "ChatCompletionOutputComplete", + "ChatCompletionOutputFunctionDefinition", + "ChatCompletionOutputLogprob", + "ChatCompletionOutputLogprobs", + "ChatCompletionOutputMessage", + "ChatCompletionOutputToolCall", + "ChatCompletionOutputTopLogprob", + "ChatCompletionOutputUsage", + "ChatCompletionStreamOutput", + "ChatCompletionStreamOutputChoice", + "ChatCompletionStreamOutputDelta", + "ChatCompletionStreamOutputDeltaToolCall", + "ChatCompletionStreamOutputFunction", + "ChatCompletionStreamOutputLogprob", + "ChatCompletionStreamOutputLogprobs", + "ChatCompletionStreamOutputTopLogprob", + "ChatCompletionStreamOutputUsage", + "DepthEstimationInput", + "DepthEstimationOutput", + "DocumentQuestionAnsweringInput", + "DocumentQuestionAnsweringInputData", + "DocumentQuestionAnsweringOutputElement", + "DocumentQuestionAnsweringParameters", + "FeatureExtractionInput", + "FeatureExtractionInputTruncationDirection", + "FillMaskInput", + "FillMaskOutputElement", + "FillMaskParameters", + "ImageClassificationInput", + "ImageClassificationOutputElement", + "ImageClassificationOutputTransform", + "ImageClassificationParameters", + "ImageSegmentationInput", + "ImageSegmentationOutputElement", + "ImageSegmentationParameters", + "ImageSegmentationSubtask", + "ImageToImageInput", + "ImageToImageOutput", + "ImageToImageParameters", + "ImageToImageTargetSize", + "ImageToTextEarlyStoppingEnum", + "ImageToTextGenerationParameters", + "ImageToTextInput", + "ImageToTextOutput", + "ImageToTextParameters", + "ImageToVideoInput", + "ImageToVideoOutput", + "ImageToVideoParameters", + "ImageToVideoTargetSize", + "ObjectDetectionBoundingBox", + "ObjectDetectionInput", + "ObjectDetectionOutputElement", + "ObjectDetectionParameters", + "Padding", + "QuestionAnsweringInput", + "QuestionAnsweringInputData", + "QuestionAnsweringOutputElement", + "QuestionAnsweringParameters", + "SentenceSimilarityInput", + "SentenceSimilarityInputData", + "SummarizationInput", + "SummarizationOutput", + "SummarizationParameters", + "SummarizationTruncationStrategy", + "TableQuestionAnsweringInput", + "TableQuestionAnsweringInputData", + "TableQuestionAnsweringOutputElement", + "TableQuestionAnsweringParameters", + "Text2TextGenerationInput", + "Text2TextGenerationOutput", + "Text2TextGenerationParameters", + "Text2TextGenerationTruncationStrategy", + "TextClassificationInput", + "TextClassificationOutputElement", + "TextClassificationOutputTransform", + "TextClassificationParameters", + "TextGenerationInput", + "TextGenerationInputGenerateParameters", + "TextGenerationInputGrammarType", + "TextGenerationOutput", + "TextGenerationOutputBestOfSequence", + "TextGenerationOutputDetails", + "TextGenerationOutputFinishReason", + "TextGenerationOutputPrefillToken", + "TextGenerationOutputToken", + "TextGenerationStreamOutput", + "TextGenerationStreamOutputStreamDetails", + "TextGenerationStreamOutputToken", + "TextToAudioEarlyStoppingEnum", + "TextToAudioGenerationParameters", + "TextToAudioInput", + "TextToAudioOutput", + "TextToAudioParameters", + "TextToImageInput", + "TextToImageOutput", + "TextToImageParameters", + "TextToSpeechEarlyStoppingEnum", + "TextToSpeechGenerationParameters", + "TextToSpeechInput", + "TextToSpeechOutput", + "TextToSpeechParameters", + "TextToVideoInput", + "TextToVideoOutput", + "TextToVideoParameters", + "TokenClassificationAggregationStrategy", + "TokenClassificationInput", + "TokenClassificationOutputElement", + "TokenClassificationParameters", + "TranslationInput", + "TranslationOutput", + "TranslationParameters", + "TranslationTruncationStrategy", + "TypeEnum", + "VideoClassificationInput", + "VideoClassificationOutputElement", + "VideoClassificationOutputTransform", + "VideoClassificationParameters", + "VisualQuestionAnsweringInput", + "VisualQuestionAnsweringInputData", + "VisualQuestionAnsweringOutputElement", + "VisualQuestionAnsweringParameters", + "ZeroShotClassificationInput", + "ZeroShotClassificationOutputElement", + "ZeroShotClassificationParameters", + "ZeroShotImageClassificationInput", + "ZeroShotImageClassificationOutputElement", + "ZeroShotImageClassificationParameters", + "ZeroShotObjectDetectionBoundingBox", + "ZeroShotObjectDetectionInput", + "ZeroShotObjectDetectionOutputElement", + "ZeroShotObjectDetectionParameters", + ], + "inference._mcp.agent": [ + "Agent", + ], + "inference._mcp.mcp_client": [ + "MCPClient", + ], + "inference_api": [ + "InferenceApi", + ], + "keras_mixin": [ + "KerasModelHubMixin", + "from_pretrained_keras", + "push_to_hub_keras", + "save_pretrained_keras", + ], + "repocard": [ + "DatasetCard", + "ModelCard", + "RepoCard", + "SpaceCard", + "metadata_eval_result", + "metadata_load", + "metadata_save", + "metadata_update", + ], + "repocard_data": [ + "CardData", + "DatasetCardData", + "EvalResult", + "ModelCardData", + "SpaceCardData", + ], + "repository": [ + "Repository", + ], + "serialization": [ + "StateDictSplit", + "get_tf_storage_size", + "get_torch_storage_id", + "get_torch_storage_size", + "load_state_dict_from_file", + "load_torch_model", + "save_torch_model", + "save_torch_state_dict", + "split_state_dict_into_shards_factory", + "split_tf_state_dict_into_shards", + "split_torch_state_dict_into_shards", + ], + "serialization._dduf": [ + "DDUFEntry", + "export_entries_as_dduf", + "export_folder_as_dduf", + "read_dduf_file", + ], + "utils": [ + "CacheNotFound", + "CachedFileInfo", + "CachedRepoInfo", + "CachedRevisionInfo", + "CorruptedCacheException", + "DeleteCacheStrategy", + "HFCacheInfo", + "HfFolder", + "cached_assets_path", + "configure_http_backend", + "dump_environment_info", + "get_session", + "get_token", + "logging", + "scan_cache_dir", + ], +} + +# WARNING: __all__ is generated automatically, Any manual edit will be lost when re-generating this file ! +# +# To update the static imports, please run the following command and commit the changes. +# ``` +# # Use script +# python utils/check_all_variable.py --update +# +# # Or run style on codebase +# make style +# ``` + +__all__ = [ + "Agent", + "AsyncInferenceClient", + "AudioClassificationInput", + "AudioClassificationOutputElement", + "AudioClassificationOutputTransform", + "AudioClassificationParameters", + "AudioToAudioInput", + "AudioToAudioOutputElement", + "AutomaticSpeechRecognitionEarlyStoppingEnum", + "AutomaticSpeechRecognitionGenerationParameters", + "AutomaticSpeechRecognitionInput", + "AutomaticSpeechRecognitionOutput", + "AutomaticSpeechRecognitionOutputChunk", + "AutomaticSpeechRecognitionParameters", + "CONFIG_NAME", + "CacheNotFound", + "CachedFileInfo", + "CachedRepoInfo", + "CachedRevisionInfo", + "CardData", + "ChatCompletionInput", + "ChatCompletionInputFunctionDefinition", + "ChatCompletionInputFunctionName", + "ChatCompletionInputGrammarType", + "ChatCompletionInputJSONSchema", + "ChatCompletionInputMessage", + "ChatCompletionInputMessageChunk", + "ChatCompletionInputMessageChunkType", + "ChatCompletionInputResponseFormatJSONObject", + "ChatCompletionInputResponseFormatJSONSchema", + "ChatCompletionInputResponseFormatText", + "ChatCompletionInputStreamOptions", + "ChatCompletionInputTool", + "ChatCompletionInputToolCall", + "ChatCompletionInputToolChoiceClass", + "ChatCompletionInputToolChoiceEnum", + "ChatCompletionInputURL", + "ChatCompletionOutput", + "ChatCompletionOutputComplete", + "ChatCompletionOutputFunctionDefinition", + "ChatCompletionOutputLogprob", + "ChatCompletionOutputLogprobs", + "ChatCompletionOutputMessage", + "ChatCompletionOutputToolCall", + "ChatCompletionOutputTopLogprob", + "ChatCompletionOutputUsage", + "ChatCompletionStreamOutput", + "ChatCompletionStreamOutputChoice", + "ChatCompletionStreamOutputDelta", + "ChatCompletionStreamOutputDeltaToolCall", + "ChatCompletionStreamOutputFunction", + "ChatCompletionStreamOutputLogprob", + "ChatCompletionStreamOutputLogprobs", + "ChatCompletionStreamOutputTopLogprob", + "ChatCompletionStreamOutputUsage", + "Collection", + "CollectionItem", + "CommitInfo", + "CommitOperation", + "CommitOperationAdd", + "CommitOperationCopy", + "CommitOperationDelete", + "CommitScheduler", + "CorruptedCacheException", + "DDUFEntry", + "DatasetCard", + "DatasetCardData", + "DatasetInfo", + "DeleteCacheStrategy", + "DepthEstimationInput", + "DepthEstimationOutput", + "Discussion", + "DiscussionComment", + "DiscussionCommit", + "DiscussionEvent", + "DiscussionStatusChange", + "DiscussionTitleChange", + "DiscussionWithDetails", + "DocumentQuestionAnsweringInput", + "DocumentQuestionAnsweringInputData", + "DocumentQuestionAnsweringOutputElement", + "DocumentQuestionAnsweringParameters", + "EvalResult", + "FLAX_WEIGHTS_NAME", + "FeatureExtractionInput", + "FeatureExtractionInputTruncationDirection", + "FillMaskInput", + "FillMaskOutputElement", + "FillMaskParameters", + "GitCommitInfo", + "GitRefInfo", + "GitRefs", + "HFCacheInfo", + "HFSummaryWriter", + "HUGGINGFACE_CO_URL_HOME", + "HUGGINGFACE_CO_URL_TEMPLATE", + "HfApi", + "HfFileMetadata", + "HfFileSystem", + "HfFileSystemFile", + "HfFileSystemResolvedPath", + "HfFileSystemStreamFile", + "HfFolder", + "ImageClassificationInput", + "ImageClassificationOutputElement", + "ImageClassificationOutputTransform", + "ImageClassificationParameters", + "ImageSegmentationInput", + "ImageSegmentationOutputElement", + "ImageSegmentationParameters", + "ImageSegmentationSubtask", + "ImageToImageInput", + "ImageToImageOutput", + "ImageToImageParameters", + "ImageToImageTargetSize", + "ImageToTextEarlyStoppingEnum", + "ImageToTextGenerationParameters", + "ImageToTextInput", + "ImageToTextOutput", + "ImageToTextParameters", + "ImageToVideoInput", + "ImageToVideoOutput", + "ImageToVideoParameters", + "ImageToVideoTargetSize", + "InferenceApi", + "InferenceClient", + "InferenceEndpoint", + "InferenceEndpointError", + "InferenceEndpointStatus", + "InferenceEndpointTimeoutError", + "InferenceEndpointType", + "InferenceTimeoutError", + "JobInfo", + "JobOwner", + "JobStage", + "JobStatus", + "KerasModelHubMixin", + "MCPClient", + "ModelCard", + "ModelCardData", + "ModelHubMixin", + "ModelInfo", + "OAuthInfo", + "OAuthOrgInfo", + "OAuthUserInfo", + "ObjectDetectionBoundingBox", + "ObjectDetectionInput", + "ObjectDetectionOutputElement", + "ObjectDetectionParameters", + "Organization", + "PYTORCH_WEIGHTS_NAME", + "Padding", + "PyTorchModelHubMixin", + "QuestionAnsweringInput", + "QuestionAnsweringInputData", + "QuestionAnsweringOutputElement", + "QuestionAnsweringParameters", + "REPO_TYPE_DATASET", + "REPO_TYPE_MODEL", + "REPO_TYPE_SPACE", + "RepoCard", + "RepoUrl", + "Repository", + "SentenceSimilarityInput", + "SentenceSimilarityInputData", + "SpaceCard", + "SpaceCardData", + "SpaceHardware", + "SpaceInfo", + "SpaceRuntime", + "SpaceStage", + "SpaceStorage", + "SpaceVariable", + "StateDictSplit", + "SummarizationInput", + "SummarizationOutput", + "SummarizationParameters", + "SummarizationTruncationStrategy", + "TF2_WEIGHTS_NAME", + "TF_WEIGHTS_NAME", + "TableQuestionAnsweringInput", + "TableQuestionAnsweringInputData", + "TableQuestionAnsweringOutputElement", + "TableQuestionAnsweringParameters", + "Text2TextGenerationInput", + "Text2TextGenerationOutput", + "Text2TextGenerationParameters", + "Text2TextGenerationTruncationStrategy", + "TextClassificationInput", + "TextClassificationOutputElement", + "TextClassificationOutputTransform", + "TextClassificationParameters", + "TextGenerationInput", + "TextGenerationInputGenerateParameters", + "TextGenerationInputGrammarType", + "TextGenerationOutput", + "TextGenerationOutputBestOfSequence", + "TextGenerationOutputDetails", + "TextGenerationOutputFinishReason", + "TextGenerationOutputPrefillToken", + "TextGenerationOutputToken", + "TextGenerationStreamOutput", + "TextGenerationStreamOutputStreamDetails", + "TextGenerationStreamOutputToken", + "TextToAudioEarlyStoppingEnum", + "TextToAudioGenerationParameters", + "TextToAudioInput", + "TextToAudioOutput", + "TextToAudioParameters", + "TextToImageInput", + "TextToImageOutput", + "TextToImageParameters", + "TextToSpeechEarlyStoppingEnum", + "TextToSpeechGenerationParameters", + "TextToSpeechInput", + "TextToSpeechOutput", + "TextToSpeechParameters", + "TextToVideoInput", + "TextToVideoOutput", + "TextToVideoParameters", + "TokenClassificationAggregationStrategy", + "TokenClassificationInput", + "TokenClassificationOutputElement", + "TokenClassificationParameters", + "TranslationInput", + "TranslationOutput", + "TranslationParameters", + "TranslationTruncationStrategy", + "TypeEnum", + "User", + "UserLikes", + "VideoClassificationInput", + "VideoClassificationOutputElement", + "VideoClassificationOutputTransform", + "VideoClassificationParameters", + "VisualQuestionAnsweringInput", + "VisualQuestionAnsweringInputData", + "VisualQuestionAnsweringOutputElement", + "VisualQuestionAnsweringParameters", + "WebhookInfo", + "WebhookPayload", + "WebhookPayloadComment", + "WebhookPayloadDiscussion", + "WebhookPayloadDiscussionChanges", + "WebhookPayloadEvent", + "WebhookPayloadMovedTo", + "WebhookPayloadRepo", + "WebhookPayloadUrl", + "WebhookPayloadWebhook", + "WebhookWatchedItem", + "WebhooksServer", + "ZeroShotClassificationInput", + "ZeroShotClassificationOutputElement", + "ZeroShotClassificationParameters", + "ZeroShotImageClassificationInput", + "ZeroShotImageClassificationOutputElement", + "ZeroShotImageClassificationParameters", + "ZeroShotObjectDetectionBoundingBox", + "ZeroShotObjectDetectionInput", + "ZeroShotObjectDetectionOutputElement", + "ZeroShotObjectDetectionParameters", + "_CACHED_NO_EXIST", + "_save_pretrained_fastai", + "accept_access_request", + "add_collection_item", + "add_space_secret", + "add_space_variable", + "attach_huggingface_oauth", + "auth_check", + "auth_list", + "auth_switch", + "cached_assets_path", + "cancel_access_request", + "cancel_job", + "change_discussion_status", + "comment_discussion", + "configure_http_backend", + "create_branch", + "create_collection", + "create_commit", + "create_discussion", + "create_inference_endpoint", + "create_inference_endpoint_from_catalog", + "create_pull_request", + "create_repo", + "create_scheduled_job", + "create_scheduled_uv_job", + "create_tag", + "create_webhook", + "dataset_info", + "delete_branch", + "delete_collection", + "delete_collection_item", + "delete_file", + "delete_folder", + "delete_inference_endpoint", + "delete_repo", + "delete_scheduled_job", + "delete_space_secret", + "delete_space_storage", + "delete_space_variable", + "delete_tag", + "delete_webhook", + "disable_webhook", + "dump_environment_info", + "duplicate_space", + "edit_discussion_comment", + "enable_webhook", + "export_entries_as_dduf", + "export_folder_as_dduf", + "fetch_job_logs", + "file_exists", + "from_pretrained_fastai", + "from_pretrained_keras", + "get_collection", + "get_dataset_tags", + "get_discussion_details", + "get_full_repo_name", + "get_hf_file_metadata", + "get_inference_endpoint", + "get_model_tags", + "get_organization_overview", + "get_paths_info", + "get_repo_discussions", + "get_safetensors_metadata", + "get_session", + "get_space_runtime", + "get_space_variables", + "get_tf_storage_size", + "get_token", + "get_token_permission", + "get_torch_storage_id", + "get_torch_storage_size", + "get_user_overview", + "get_webhook", + "grant_access", + "hf_hub_download", + "hf_hub_url", + "inspect_job", + "inspect_scheduled_job", + "interpreter_login", + "list_accepted_access_requests", + "list_collections", + "list_datasets", + "list_inference_catalog", + "list_inference_endpoints", + "list_jobs", + "list_lfs_files", + "list_liked_repos", + "list_models", + "list_organization_members", + "list_papers", + "list_pending_access_requests", + "list_rejected_access_requests", + "list_repo_commits", + "list_repo_files", + "list_repo_likers", + "list_repo_refs", + "list_repo_tree", + "list_spaces", + "list_user_followers", + "list_user_following", + "list_webhooks", + "load_state_dict_from_file", + "load_torch_model", + "logging", + "login", + "logout", + "merge_pull_request", + "metadata_eval_result", + "metadata_load", + "metadata_save", + "metadata_update", + "model_info", + "move_repo", + "notebook_login", + "paper_info", + "parse_huggingface_oauth", + "parse_safetensors_file_metadata", + "pause_inference_endpoint", + "pause_space", + "permanently_delete_lfs_files", + "preupload_lfs_files", + "push_to_hub_fastai", + "push_to_hub_keras", + "read_dduf_file", + "reject_access_request", + "rename_discussion", + "repo_exists", + "repo_info", + "repo_type_and_id_from_hf_id", + "request_space_hardware", + "request_space_storage", + "restart_space", + "resume_inference_endpoint", + "resume_scheduled_job", + "revision_exists", + "run_as_future", + "run_job", + "run_uv_job", + "save_pretrained_keras", + "save_torch_model", + "save_torch_state_dict", + "scale_to_zero_inference_endpoint", + "scan_cache_dir", + "set_space_sleep_time", + "snapshot_download", + "space_info", + "split_state_dict_into_shards_factory", + "split_tf_state_dict_into_shards", + "split_torch_state_dict_into_shards", + "super_squash_history", + "suspend_scheduled_job", + "try_to_load_from_cache", + "unlike", + "update_collection_item", + "update_collection_metadata", + "update_inference_endpoint", + "update_repo_settings", + "update_repo_visibility", + "update_webhook", + "upload_file", + "upload_folder", + "upload_large_folder", + "webhook_endpoint", + "whoami", +] + + +def _attach(package_name, submodules=None, submod_attrs=None): + """Attach lazily loaded submodules, functions, or other attributes. + + Typically, modules import submodules and attributes as follows: + + ```py + import mysubmodule + import anothersubmodule + + from .foo import someattr + ``` + + The idea is to replace a package's `__getattr__`, `__dir__`, such that all imports + work exactly the way they would with normal imports, except that the import occurs + upon first use. + + The typical way to call this function, replacing the above imports, is: + + ```python + __getattr__, __dir__ = lazy.attach( + __name__, + ['mysubmodule', 'anothersubmodule'], + {'foo': ['someattr']} + ) + ``` + This functionality requires Python 3.7 or higher. + + Args: + package_name (`str`): + Typically use `__name__`. + submodules (`set`): + List of submodules to attach. + submod_attrs (`dict`): + Dictionary of submodule -> list of attributes / functions. + These attributes are imported as they are used. + + Returns: + __getattr__, __dir__, __all__ + + """ + if submod_attrs is None: + submod_attrs = {} + + if submodules is None: + submodules = set() + else: + submodules = set(submodules) + + attr_to_modules = {attr: mod for mod, attrs in submod_attrs.items() for attr in attrs} + + def __getattr__(name): + if name in submodules: + try: + return importlib.import_module(f"{package_name}.{name}") + except Exception as e: + print(f"Error importing {package_name}.{name}: {e}") + raise + elif name in attr_to_modules: + submod_path = f"{package_name}.{attr_to_modules[name]}" + try: + submod = importlib.import_module(submod_path) + except Exception as e: + print(f"Error importing {submod_path}: {e}") + raise + attr = getattr(submod, name) + + # If the attribute lives in a file (module) with the same + # name as the attribute, ensure that the attribute and *not* + # the module is accessible on the package. + if name == attr_to_modules[name]: + pkg = sys.modules[package_name] + pkg.__dict__[name] = attr + + return attr + else: + raise AttributeError(f"No {package_name} attribute {name}") + + def __dir__(): + return __all__ + + return __getattr__, __dir__ + + +__getattr__, __dir__ = _attach(__name__, submodules=[], submod_attrs=_SUBMOD_ATTRS) + +if os.environ.get("EAGER_IMPORT", ""): + for attr in __all__: + __getattr__(attr) + +# WARNING: any content below this statement is generated automatically. Any manual edit +# will be lost when re-generating this file ! +# +# To update the static imports, please run the following command and commit the changes. +# ``` +# # Use script +# python utils/check_static_imports.py --update +# +# # Or run style on codebase +# make style +# ``` +if TYPE_CHECKING: # pragma: no cover + from ._commit_scheduler import CommitScheduler # noqa: F401 + from ._inference_endpoints import ( + InferenceEndpoint, # noqa: F401 + InferenceEndpointError, # noqa: F401 + InferenceEndpointStatus, # noqa: F401 + InferenceEndpointTimeoutError, # noqa: F401 + InferenceEndpointType, # noqa: F401 + ) + from ._jobs_api import ( + JobInfo, # noqa: F401 + JobOwner, # noqa: F401 + JobStage, # noqa: F401 + JobStatus, # noqa: F401 + ) + from ._login import ( + auth_list, # noqa: F401 + auth_switch, # noqa: F401 + interpreter_login, # noqa: F401 + login, # noqa: F401 + logout, # noqa: F401 + notebook_login, # noqa: F401 + ) + from ._oauth import ( + OAuthInfo, # noqa: F401 + OAuthOrgInfo, # noqa: F401 + OAuthUserInfo, # noqa: F401 + attach_huggingface_oauth, # noqa: F401 + parse_huggingface_oauth, # noqa: F401 + ) + from ._snapshot_download import snapshot_download # noqa: F401 + from ._space_api import ( + SpaceHardware, # noqa: F401 + SpaceRuntime, # noqa: F401 + SpaceStage, # noqa: F401 + SpaceStorage, # noqa: F401 + SpaceVariable, # noqa: F401 + ) + from ._tensorboard_logger import HFSummaryWriter # noqa: F401 + from ._webhooks_payload import ( + WebhookPayload, # noqa: F401 + WebhookPayloadComment, # noqa: F401 + WebhookPayloadDiscussion, # noqa: F401 + WebhookPayloadDiscussionChanges, # noqa: F401 + WebhookPayloadEvent, # noqa: F401 + WebhookPayloadMovedTo, # noqa: F401 + WebhookPayloadRepo, # noqa: F401 + WebhookPayloadUrl, # noqa: F401 + WebhookPayloadWebhook, # noqa: F401 + ) + from ._webhooks_server import ( + WebhooksServer, # noqa: F401 + webhook_endpoint, # noqa: F401 + ) + from .community import ( + Discussion, # noqa: F401 + DiscussionComment, # noqa: F401 + DiscussionCommit, # noqa: F401 + DiscussionEvent, # noqa: F401 + DiscussionStatusChange, # noqa: F401 + DiscussionTitleChange, # noqa: F401 + DiscussionWithDetails, # noqa: F401 + ) + from .constants import ( + CONFIG_NAME, # noqa: F401 + FLAX_WEIGHTS_NAME, # noqa: F401 + HUGGINGFACE_CO_URL_HOME, # noqa: F401 + HUGGINGFACE_CO_URL_TEMPLATE, # noqa: F401 + PYTORCH_WEIGHTS_NAME, # noqa: F401 + REPO_TYPE_DATASET, # noqa: F401 + REPO_TYPE_MODEL, # noqa: F401 + REPO_TYPE_SPACE, # noqa: F401 + TF2_WEIGHTS_NAME, # noqa: F401 + TF_WEIGHTS_NAME, # noqa: F401 + ) + from .fastai_utils import ( + _save_pretrained_fastai, # noqa: F401 + from_pretrained_fastai, # noqa: F401 + push_to_hub_fastai, # noqa: F401 + ) + from .file_download import ( + _CACHED_NO_EXIST, # noqa: F401 + HfFileMetadata, # noqa: F401 + get_hf_file_metadata, # noqa: F401 + hf_hub_download, # noqa: F401 + hf_hub_url, # noqa: F401 + try_to_load_from_cache, # noqa: F401 + ) + from .hf_api import ( + Collection, # noqa: F401 + CollectionItem, # noqa: F401 + CommitInfo, # noqa: F401 + CommitOperation, # noqa: F401 + CommitOperationAdd, # noqa: F401 + CommitOperationCopy, # noqa: F401 + CommitOperationDelete, # noqa: F401 + DatasetInfo, # noqa: F401 + GitCommitInfo, # noqa: F401 + GitRefInfo, # noqa: F401 + GitRefs, # noqa: F401 + HfApi, # noqa: F401 + ModelInfo, # noqa: F401 + Organization, # noqa: F401 + RepoUrl, # noqa: F401 + SpaceInfo, # noqa: F401 + User, # noqa: F401 + UserLikes, # noqa: F401 + WebhookInfo, # noqa: F401 + WebhookWatchedItem, # noqa: F401 + accept_access_request, # noqa: F401 + add_collection_item, # noqa: F401 + add_space_secret, # noqa: F401 + add_space_variable, # noqa: F401 + auth_check, # noqa: F401 + cancel_access_request, # noqa: F401 + cancel_job, # noqa: F401 + change_discussion_status, # noqa: F401 + comment_discussion, # noqa: F401 + create_branch, # noqa: F401 + create_collection, # noqa: F401 + create_commit, # noqa: F401 + create_discussion, # noqa: F401 + create_inference_endpoint, # noqa: F401 + create_inference_endpoint_from_catalog, # noqa: F401 + create_pull_request, # noqa: F401 + create_repo, # noqa: F401 + create_scheduled_job, # noqa: F401 + create_scheduled_uv_job, # noqa: F401 + create_tag, # noqa: F401 + create_webhook, # noqa: F401 + dataset_info, # noqa: F401 + delete_branch, # noqa: F401 + delete_collection, # noqa: F401 + delete_collection_item, # noqa: F401 + delete_file, # noqa: F401 + delete_folder, # noqa: F401 + delete_inference_endpoint, # noqa: F401 + delete_repo, # noqa: F401 + delete_scheduled_job, # noqa: F401 + delete_space_secret, # noqa: F401 + delete_space_storage, # noqa: F401 + delete_space_variable, # noqa: F401 + delete_tag, # noqa: F401 + delete_webhook, # noqa: F401 + disable_webhook, # noqa: F401 + duplicate_space, # noqa: F401 + edit_discussion_comment, # noqa: F401 + enable_webhook, # noqa: F401 + fetch_job_logs, # noqa: F401 + file_exists, # noqa: F401 + get_collection, # noqa: F401 + get_dataset_tags, # noqa: F401 + get_discussion_details, # noqa: F401 + get_full_repo_name, # noqa: F401 + get_inference_endpoint, # noqa: F401 + get_model_tags, # noqa: F401 + get_organization_overview, # noqa: F401 + get_paths_info, # noqa: F401 + get_repo_discussions, # noqa: F401 + get_safetensors_metadata, # noqa: F401 + get_space_runtime, # noqa: F401 + get_space_variables, # noqa: F401 + get_token_permission, # noqa: F401 + get_user_overview, # noqa: F401 + get_webhook, # noqa: F401 + grant_access, # noqa: F401 + inspect_job, # noqa: F401 + inspect_scheduled_job, # noqa: F401 + list_accepted_access_requests, # noqa: F401 + list_collections, # noqa: F401 + list_datasets, # noqa: F401 + list_inference_catalog, # noqa: F401 + list_inference_endpoints, # noqa: F401 + list_jobs, # noqa: F401 + list_lfs_files, # noqa: F401 + list_liked_repos, # noqa: F401 + list_models, # noqa: F401 + list_organization_members, # noqa: F401 + list_papers, # noqa: F401 + list_pending_access_requests, # noqa: F401 + list_rejected_access_requests, # noqa: F401 + list_repo_commits, # noqa: F401 + list_repo_files, # noqa: F401 + list_repo_likers, # noqa: F401 + list_repo_refs, # noqa: F401 + list_repo_tree, # noqa: F401 + list_spaces, # noqa: F401 + list_user_followers, # noqa: F401 + list_user_following, # noqa: F401 + list_webhooks, # noqa: F401 + merge_pull_request, # noqa: F401 + model_info, # noqa: F401 + move_repo, # noqa: F401 + paper_info, # noqa: F401 + parse_safetensors_file_metadata, # noqa: F401 + pause_inference_endpoint, # noqa: F401 + pause_space, # noqa: F401 + permanently_delete_lfs_files, # noqa: F401 + preupload_lfs_files, # noqa: F401 + reject_access_request, # noqa: F401 + rename_discussion, # noqa: F401 + repo_exists, # noqa: F401 + repo_info, # noqa: F401 + repo_type_and_id_from_hf_id, # noqa: F401 + request_space_hardware, # noqa: F401 + request_space_storage, # noqa: F401 + restart_space, # noqa: F401 + resume_inference_endpoint, # noqa: F401 + resume_scheduled_job, # noqa: F401 + revision_exists, # noqa: F401 + run_as_future, # noqa: F401 + run_job, # noqa: F401 + run_uv_job, # noqa: F401 + scale_to_zero_inference_endpoint, # noqa: F401 + set_space_sleep_time, # noqa: F401 + space_info, # noqa: F401 + super_squash_history, # noqa: F401 + suspend_scheduled_job, # noqa: F401 + unlike, # noqa: F401 + update_collection_item, # noqa: F401 + update_collection_metadata, # noqa: F401 + update_inference_endpoint, # noqa: F401 + update_repo_settings, # noqa: F401 + update_repo_visibility, # noqa: F401 + update_webhook, # noqa: F401 + upload_file, # noqa: F401 + upload_folder, # noqa: F401 + upload_large_folder, # noqa: F401 + whoami, # noqa: F401 + ) + from .hf_file_system import ( + HfFileSystem, # noqa: F401 + HfFileSystemFile, # noqa: F401 + HfFileSystemResolvedPath, # noqa: F401 + HfFileSystemStreamFile, # noqa: F401 + ) + from .hub_mixin import ( + ModelHubMixin, # noqa: F401 + PyTorchModelHubMixin, # noqa: F401 + ) + from .inference._client import ( + InferenceClient, # noqa: F401 + InferenceTimeoutError, # noqa: F401 + ) + from .inference._generated._async_client import AsyncInferenceClient # noqa: F401 + from .inference._generated.types import ( + AudioClassificationInput, # noqa: F401 + AudioClassificationOutputElement, # noqa: F401 + AudioClassificationOutputTransform, # noqa: F401 + AudioClassificationParameters, # noqa: F401 + AudioToAudioInput, # noqa: F401 + AudioToAudioOutputElement, # noqa: F401 + AutomaticSpeechRecognitionEarlyStoppingEnum, # noqa: F401 + AutomaticSpeechRecognitionGenerationParameters, # noqa: F401 + AutomaticSpeechRecognitionInput, # noqa: F401 + AutomaticSpeechRecognitionOutput, # noqa: F401 + AutomaticSpeechRecognitionOutputChunk, # noqa: F401 + AutomaticSpeechRecognitionParameters, # noqa: F401 + ChatCompletionInput, # noqa: F401 + ChatCompletionInputFunctionDefinition, # noqa: F401 + ChatCompletionInputFunctionName, # noqa: F401 + ChatCompletionInputGrammarType, # noqa: F401 + ChatCompletionInputJSONSchema, # noqa: F401 + ChatCompletionInputMessage, # noqa: F401 + ChatCompletionInputMessageChunk, # noqa: F401 + ChatCompletionInputMessageChunkType, # noqa: F401 + ChatCompletionInputResponseFormatJSONObject, # noqa: F401 + ChatCompletionInputResponseFormatJSONSchema, # noqa: F401 + ChatCompletionInputResponseFormatText, # noqa: F401 + ChatCompletionInputStreamOptions, # noqa: F401 + ChatCompletionInputTool, # noqa: F401 + ChatCompletionInputToolCall, # noqa: F401 + ChatCompletionInputToolChoiceClass, # noqa: F401 + ChatCompletionInputToolChoiceEnum, # noqa: F401 + ChatCompletionInputURL, # noqa: F401 + ChatCompletionOutput, # noqa: F401 + ChatCompletionOutputComplete, # noqa: F401 + ChatCompletionOutputFunctionDefinition, # noqa: F401 + ChatCompletionOutputLogprob, # noqa: F401 + ChatCompletionOutputLogprobs, # noqa: F401 + ChatCompletionOutputMessage, # noqa: F401 + ChatCompletionOutputToolCall, # noqa: F401 + ChatCompletionOutputTopLogprob, # noqa: F401 + ChatCompletionOutputUsage, # noqa: F401 + ChatCompletionStreamOutput, # noqa: F401 + ChatCompletionStreamOutputChoice, # noqa: F401 + ChatCompletionStreamOutputDelta, # noqa: F401 + ChatCompletionStreamOutputDeltaToolCall, # noqa: F401 + ChatCompletionStreamOutputFunction, # noqa: F401 + ChatCompletionStreamOutputLogprob, # noqa: F401 + ChatCompletionStreamOutputLogprobs, # noqa: F401 + ChatCompletionStreamOutputTopLogprob, # noqa: F401 + ChatCompletionStreamOutputUsage, # noqa: F401 + DepthEstimationInput, # noqa: F401 + DepthEstimationOutput, # noqa: F401 + DocumentQuestionAnsweringInput, # noqa: F401 + DocumentQuestionAnsweringInputData, # noqa: F401 + DocumentQuestionAnsweringOutputElement, # noqa: F401 + DocumentQuestionAnsweringParameters, # noqa: F401 + FeatureExtractionInput, # noqa: F401 + FeatureExtractionInputTruncationDirection, # noqa: F401 + FillMaskInput, # noqa: F401 + FillMaskOutputElement, # noqa: F401 + FillMaskParameters, # noqa: F401 + ImageClassificationInput, # noqa: F401 + ImageClassificationOutputElement, # noqa: F401 + ImageClassificationOutputTransform, # noqa: F401 + ImageClassificationParameters, # noqa: F401 + ImageSegmentationInput, # noqa: F401 + ImageSegmentationOutputElement, # noqa: F401 + ImageSegmentationParameters, # noqa: F401 + ImageSegmentationSubtask, # noqa: F401 + ImageToImageInput, # noqa: F401 + ImageToImageOutput, # noqa: F401 + ImageToImageParameters, # noqa: F401 + ImageToImageTargetSize, # noqa: F401 + ImageToTextEarlyStoppingEnum, # noqa: F401 + ImageToTextGenerationParameters, # noqa: F401 + ImageToTextInput, # noqa: F401 + ImageToTextOutput, # noqa: F401 + ImageToTextParameters, # noqa: F401 + ImageToVideoInput, # noqa: F401 + ImageToVideoOutput, # noqa: F401 + ImageToVideoParameters, # noqa: F401 + ImageToVideoTargetSize, # noqa: F401 + ObjectDetectionBoundingBox, # noqa: F401 + ObjectDetectionInput, # noqa: F401 + ObjectDetectionOutputElement, # noqa: F401 + ObjectDetectionParameters, # noqa: F401 + Padding, # noqa: F401 + QuestionAnsweringInput, # noqa: F401 + QuestionAnsweringInputData, # noqa: F401 + QuestionAnsweringOutputElement, # noqa: F401 + QuestionAnsweringParameters, # noqa: F401 + SentenceSimilarityInput, # noqa: F401 + SentenceSimilarityInputData, # noqa: F401 + SummarizationInput, # noqa: F401 + SummarizationOutput, # noqa: F401 + SummarizationParameters, # noqa: F401 + SummarizationTruncationStrategy, # noqa: F401 + TableQuestionAnsweringInput, # noqa: F401 + TableQuestionAnsweringInputData, # noqa: F401 + TableQuestionAnsweringOutputElement, # noqa: F401 + TableQuestionAnsweringParameters, # noqa: F401 + Text2TextGenerationInput, # noqa: F401 + Text2TextGenerationOutput, # noqa: F401 + Text2TextGenerationParameters, # noqa: F401 + Text2TextGenerationTruncationStrategy, # noqa: F401 + TextClassificationInput, # noqa: F401 + TextClassificationOutputElement, # noqa: F401 + TextClassificationOutputTransform, # noqa: F401 + TextClassificationParameters, # noqa: F401 + TextGenerationInput, # noqa: F401 + TextGenerationInputGenerateParameters, # noqa: F401 + TextGenerationInputGrammarType, # noqa: F401 + TextGenerationOutput, # noqa: F401 + TextGenerationOutputBestOfSequence, # noqa: F401 + TextGenerationOutputDetails, # noqa: F401 + TextGenerationOutputFinishReason, # noqa: F401 + TextGenerationOutputPrefillToken, # noqa: F401 + TextGenerationOutputToken, # noqa: F401 + TextGenerationStreamOutput, # noqa: F401 + TextGenerationStreamOutputStreamDetails, # noqa: F401 + TextGenerationStreamOutputToken, # noqa: F401 + TextToAudioEarlyStoppingEnum, # noqa: F401 + TextToAudioGenerationParameters, # noqa: F401 + TextToAudioInput, # noqa: F401 + TextToAudioOutput, # noqa: F401 + TextToAudioParameters, # noqa: F401 + TextToImageInput, # noqa: F401 + TextToImageOutput, # noqa: F401 + TextToImageParameters, # noqa: F401 + TextToSpeechEarlyStoppingEnum, # noqa: F401 + TextToSpeechGenerationParameters, # noqa: F401 + TextToSpeechInput, # noqa: F401 + TextToSpeechOutput, # noqa: F401 + TextToSpeechParameters, # noqa: F401 + TextToVideoInput, # noqa: F401 + TextToVideoOutput, # noqa: F401 + TextToVideoParameters, # noqa: F401 + TokenClassificationAggregationStrategy, # noqa: F401 + TokenClassificationInput, # noqa: F401 + TokenClassificationOutputElement, # noqa: F401 + TokenClassificationParameters, # noqa: F401 + TranslationInput, # noqa: F401 + TranslationOutput, # noqa: F401 + TranslationParameters, # noqa: F401 + TranslationTruncationStrategy, # noqa: F401 + TypeEnum, # noqa: F401 + VideoClassificationInput, # noqa: F401 + VideoClassificationOutputElement, # noqa: F401 + VideoClassificationOutputTransform, # noqa: F401 + VideoClassificationParameters, # noqa: F401 + VisualQuestionAnsweringInput, # noqa: F401 + VisualQuestionAnsweringInputData, # noqa: F401 + VisualQuestionAnsweringOutputElement, # noqa: F401 + VisualQuestionAnsweringParameters, # noqa: F401 + ZeroShotClassificationInput, # noqa: F401 + ZeroShotClassificationOutputElement, # noqa: F401 + ZeroShotClassificationParameters, # noqa: F401 + ZeroShotImageClassificationInput, # noqa: F401 + ZeroShotImageClassificationOutputElement, # noqa: F401 + ZeroShotImageClassificationParameters, # noqa: F401 + ZeroShotObjectDetectionBoundingBox, # noqa: F401 + ZeroShotObjectDetectionInput, # noqa: F401 + ZeroShotObjectDetectionOutputElement, # noqa: F401 + ZeroShotObjectDetectionParameters, # noqa: F401 + ) + from .inference._mcp.agent import Agent # noqa: F401 + from .inference._mcp.mcp_client import MCPClient # noqa: F401 + from .inference_api import InferenceApi # noqa: F401 + from .keras_mixin import ( + KerasModelHubMixin, # noqa: F401 + from_pretrained_keras, # noqa: F401 + push_to_hub_keras, # noqa: F401 + save_pretrained_keras, # noqa: F401 + ) + from .repocard import ( + DatasetCard, # noqa: F401 + ModelCard, # noqa: F401 + RepoCard, # noqa: F401 + SpaceCard, # noqa: F401 + metadata_eval_result, # noqa: F401 + metadata_load, # noqa: F401 + metadata_save, # noqa: F401 + metadata_update, # noqa: F401 + ) + from .repocard_data import ( + CardData, # noqa: F401 + DatasetCardData, # noqa: F401 + EvalResult, # noqa: F401 + ModelCardData, # noqa: F401 + SpaceCardData, # noqa: F401 + ) + from .repository import Repository # noqa: F401 + from .serialization import ( + StateDictSplit, # noqa: F401 + get_tf_storage_size, # noqa: F401 + get_torch_storage_id, # noqa: F401 + get_torch_storage_size, # noqa: F401 + load_state_dict_from_file, # noqa: F401 + load_torch_model, # noqa: F401 + save_torch_model, # noqa: F401 + save_torch_state_dict, # noqa: F401 + split_state_dict_into_shards_factory, # noqa: F401 + split_tf_state_dict_into_shards, # noqa: F401 + split_torch_state_dict_into_shards, # noqa: F401 + ) + from .serialization._dduf import ( + DDUFEntry, # noqa: F401 + export_entries_as_dduf, # noqa: F401 + export_folder_as_dduf, # noqa: F401 + read_dduf_file, # noqa: F401 + ) + from .utils import ( + CachedFileInfo, # noqa: F401 + CachedRepoInfo, # noqa: F401 + CachedRevisionInfo, # noqa: F401 + CacheNotFound, # noqa: F401 + CorruptedCacheException, # noqa: F401 + DeleteCacheStrategy, # noqa: F401 + HFCacheInfo, # noqa: F401 + HfFolder, # noqa: F401 + cached_assets_path, # noqa: F401 + configure_http_backend, # noqa: F401 + dump_environment_info, # noqa: F401 + get_session, # noqa: F401 + get_token, # noqa: F401 + logging, # noqa: F401 + scan_cache_dir, # noqa: F401 + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_commit_api.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_commit_api.py new file mode 100644 index 0000000000000000000000000000000000000000..7ed64b0e5ed550c392f193239a2e00669cc3144a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_commit_api.py @@ -0,0 +1,968 @@ +""" +Type definitions and utilities for the `create_commit` API +""" + +import base64 +import io +import os +import warnings +from collections import defaultdict +from contextlib import contextmanager +from dataclasses import dataclass, field +from itertools import groupby +from pathlib import Path, PurePosixPath +from typing import TYPE_CHECKING, Any, BinaryIO, Dict, Iterable, Iterator, List, Literal, Optional, Tuple, Union + +from tqdm.contrib.concurrent import thread_map + +from . import constants +from .errors import EntryNotFoundError, HfHubHTTPError, XetAuthorizationError, XetRefreshTokenError +from .file_download import hf_hub_url +from .lfs import UploadInfo, lfs_upload, post_lfs_batch_info +from .utils import ( + FORBIDDEN_FOLDERS, + XetTokenType, + are_progress_bars_disabled, + chunk_iterable, + fetch_xet_connection_info_from_repo_info, + get_session, + hf_raise_for_status, + logging, + sha, + tqdm_stream_file, + validate_hf_hub_args, +) +from .utils import tqdm as hf_tqdm +from .utils._runtime import is_xet_available + + +if TYPE_CHECKING: + from .hf_api import RepoFile + + +logger = logging.get_logger(__name__) + + +UploadMode = Literal["lfs", "regular"] + +# Max is 1,000 per request on the Hub for HfApi.get_paths_info +# Otherwise we get: +# HfHubHTTPError: 413 Client Error: Payload Too Large for url: https://huggingface.co/api/datasets/xxx (Request ID: xxx)\n\ntoo many parameters +# See https://github.com/huggingface/huggingface_hub/issues/1503 +FETCH_LFS_BATCH_SIZE = 500 + +UPLOAD_BATCH_MAX_NUM_FILES = 256 + + +@dataclass +class CommitOperationDelete: + """ + Data structure holding necessary info to delete a file or a folder from a repository + on the Hub. + + Args: + path_in_repo (`str`): + Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"` + for a file or `"checkpoints/1fec34a/"` for a folder. + is_folder (`bool` or `Literal["auto"]`, *optional*) + Whether the Delete Operation applies to a folder or not. If "auto", the path + type (file or folder) is guessed automatically by looking if path ends with + a "/" (folder) or not (file). To explicitly set the path type, you can set + `is_folder=True` or `is_folder=False`. + """ + + path_in_repo: str + is_folder: Union[bool, Literal["auto"]] = "auto" + + def __post_init__(self): + self.path_in_repo = _validate_path_in_repo(self.path_in_repo) + + if self.is_folder == "auto": + self.is_folder = self.path_in_repo.endswith("/") + if not isinstance(self.is_folder, bool): + raise ValueError( + f"Wrong value for `is_folder`. Must be one of [`True`, `False`, `'auto'`]. Got '{self.is_folder}'." + ) + + +@dataclass +class CommitOperationCopy: + """ + Data structure holding necessary info to copy a file in a repository on the Hub. + + Limitations: + - Only LFS files can be copied. To copy a regular file, you need to download it locally and re-upload it + - Cross-repository copies are not supported. + + Note: you can combine a [`CommitOperationCopy`] and a [`CommitOperationDelete`] to rename an LFS file on the Hub. + + Args: + src_path_in_repo (`str`): + Relative filepath in the repo of the file to be copied, e.g. `"checkpoints/1fec34a/weights.bin"`. + path_in_repo (`str`): + Relative filepath in the repo where to copy the file, e.g. `"checkpoints/1fec34a/weights_copy.bin"`. + src_revision (`str`, *optional*): + The git revision of the file to be copied. Can be any valid git revision. + Default to the target commit revision. + """ + + src_path_in_repo: str + path_in_repo: str + src_revision: Optional[str] = None + # set to the OID of the file to be copied if it has already been uploaded + # useful to determine if a commit will be empty or not. + _src_oid: Optional[str] = None + # set to the OID of the file to copy to if it has already been uploaded + # useful to determine if a commit will be empty or not. + _dest_oid: Optional[str] = None + + def __post_init__(self): + self.src_path_in_repo = _validate_path_in_repo(self.src_path_in_repo) + self.path_in_repo = _validate_path_in_repo(self.path_in_repo) + + +@dataclass +class CommitOperationAdd: + """ + Data structure holding necessary info to upload a file to a repository on the Hub. + + Args: + path_in_repo (`str`): + Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"` + path_or_fileobj (`str`, `Path`, `bytes`, or `BinaryIO`): + Either: + - a path to a local file (as `str` or `pathlib.Path`) to upload + - a buffer of bytes (`bytes`) holding the content of the file to upload + - a "file object" (subclass of `io.BufferedIOBase`), typically obtained + with `open(path, "rb")`. It must support `seek()` and `tell()` methods. + + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If `path_or_fileobj` is not one of `str`, `Path`, `bytes` or `io.BufferedIOBase`. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If `path_or_fileobj` is a `str` or `Path` but not a path to an existing file. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If `path_or_fileobj` is a `io.BufferedIOBase` but it doesn't support both + `seek()` and `tell()`. + """ + + path_in_repo: str + path_or_fileobj: Union[str, Path, bytes, BinaryIO] + upload_info: UploadInfo = field(init=False, repr=False) + + # Internal attributes + + # set to "lfs" or "regular" once known + _upload_mode: Optional[UploadMode] = field(init=False, repr=False, default=None) + + # set to True if .gitignore rules prevent the file from being uploaded as LFS + # (server-side check) + _should_ignore: Optional[bool] = field(init=False, repr=False, default=None) + + # set to the remote OID of the file if it has already been uploaded + # useful to determine if a commit will be empty or not + _remote_oid: Optional[str] = field(init=False, repr=False, default=None) + + # set to True once the file has been uploaded as LFS + _is_uploaded: bool = field(init=False, repr=False, default=False) + + # set to True once the file has been committed + _is_committed: bool = field(init=False, repr=False, default=False) + + def __post_init__(self) -> None: + """Validates `path_or_fileobj` and compute `upload_info`.""" + self.path_in_repo = _validate_path_in_repo(self.path_in_repo) + + # Validate `path_or_fileobj` value + if isinstance(self.path_or_fileobj, Path): + self.path_or_fileobj = str(self.path_or_fileobj) + if isinstance(self.path_or_fileobj, str): + path_or_fileobj = os.path.normpath(os.path.expanduser(self.path_or_fileobj)) + if not os.path.isfile(path_or_fileobj): + raise ValueError(f"Provided path: '{path_or_fileobj}' is not a file on the local file system") + elif not isinstance(self.path_or_fileobj, (io.BufferedIOBase, bytes)): + # ^^ Inspired from: https://stackoverflow.com/questions/44584829/how-to-determine-if-file-is-opened-in-binary-or-text-mode + raise ValueError( + "path_or_fileobj must be either an instance of str, bytes or" + " io.BufferedIOBase. If you passed a file-like object, make sure it is" + " in binary mode." + ) + if isinstance(self.path_or_fileobj, io.BufferedIOBase): + try: + self.path_or_fileobj.tell() + self.path_or_fileobj.seek(0, os.SEEK_CUR) + except (OSError, AttributeError) as exc: + raise ValueError( + "path_or_fileobj is a file-like object but does not implement seek() and tell()" + ) from exc + + # Compute "upload_info" attribute + if isinstance(self.path_or_fileobj, str): + self.upload_info = UploadInfo.from_path(self.path_or_fileobj) + elif isinstance(self.path_or_fileobj, bytes): + self.upload_info = UploadInfo.from_bytes(self.path_or_fileobj) + else: + self.upload_info = UploadInfo.from_fileobj(self.path_or_fileobj) + + @contextmanager + def as_file(self, with_tqdm: bool = False) -> Iterator[BinaryIO]: + """ + A context manager that yields a file-like object allowing to read the underlying + data behind `path_or_fileobj`. + + Args: + with_tqdm (`bool`, *optional*, defaults to `False`): + If True, iterating over the file object will display a progress bar. Only + works if the file-like object is a path to a file. Pure bytes and buffers + are not supported. + + Example: + + ```python + >>> operation = CommitOperationAdd( + ... path_in_repo="remote/dir/weights.h5", + ... path_or_fileobj="./local/weights.h5", + ... ) + CommitOperationAdd(path_in_repo='remote/dir/weights.h5', path_or_fileobj='./local/weights.h5') + + >>> with operation.as_file() as file: + ... content = file.read() + + >>> with operation.as_file(with_tqdm=True) as file: + ... while True: + ... data = file.read(1024) + ... if not data: + ... break + config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s] + + >>> with operation.as_file(with_tqdm=True) as file: + ... requests.put(..., data=file) + config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s] + ``` + """ + if isinstance(self.path_or_fileobj, str) or isinstance(self.path_or_fileobj, Path): + if with_tqdm: + with tqdm_stream_file(self.path_or_fileobj) as file: + yield file + else: + with open(self.path_or_fileobj, "rb") as file: + yield file + elif isinstance(self.path_or_fileobj, bytes): + yield io.BytesIO(self.path_or_fileobj) + elif isinstance(self.path_or_fileobj, io.BufferedIOBase): + prev_pos = self.path_or_fileobj.tell() + yield self.path_or_fileobj + self.path_or_fileobj.seek(prev_pos, io.SEEK_SET) + + def b64content(self) -> bytes: + """ + The base64-encoded content of `path_or_fileobj` + + Returns: `bytes` + """ + with self.as_file() as file: + return base64.b64encode(file.read()) + + @property + def _local_oid(self) -> Optional[str]: + """Return the OID of the local file. + + This OID is then compared to `self._remote_oid` to check if the file has changed compared to the remote one. + If the file did not change, we won't upload it again to prevent empty commits. + + For LFS files, the OID corresponds to the SHA256 of the file content (used a LFS ref). + For regular files, the OID corresponds to the SHA1 of the file content. + Note: this is slightly different to git OID computation since the oid of an LFS file is usually the git-SHA1 of the + pointer file content (not the actual file content). However, using the SHA256 is enough to detect changes + and more convenient client-side. + """ + if self._upload_mode is None: + return None + elif self._upload_mode == "lfs": + return self.upload_info.sha256.hex() + else: + # Regular file => compute sha1 + # => no need to read by chunk since the file is guaranteed to be <=5MB. + with self.as_file() as file: + return sha.git_hash(file.read()) + + +def _validate_path_in_repo(path_in_repo: str) -> str: + # Validate `path_in_repo` value to prevent a server-side issue + if path_in_repo.startswith("/"): + path_in_repo = path_in_repo[1:] + if path_in_repo == "." or path_in_repo == ".." or path_in_repo.startswith("../"): + raise ValueError(f"Invalid `path_in_repo` in CommitOperation: '{path_in_repo}'") + if path_in_repo.startswith("./"): + path_in_repo = path_in_repo[2:] + for forbidden in FORBIDDEN_FOLDERS: + if any(part == forbidden for part in path_in_repo.split("/")): + raise ValueError( + f"Invalid `path_in_repo` in CommitOperation: cannot update files under a '{forbidden}/' folder (path:" + f" '{path_in_repo}')." + ) + return path_in_repo + + +CommitOperation = Union[CommitOperationAdd, CommitOperationCopy, CommitOperationDelete] + + +def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None: + """ + Warn user when a list of operations is expected to overwrite itself in a single + commit. + + Rules: + - If a filepath is updated by multiple `CommitOperationAdd` operations, a warning + message is triggered. + - If a filepath is updated at least once by a `CommitOperationAdd` and then deleted + by a `CommitOperationDelete`, a warning is triggered. + - If a `CommitOperationDelete` deletes a filepath that is then updated by a + `CommitOperationAdd`, no warning is triggered. This is usually useless (no need to + delete before upload) but can happen if a user deletes an entire folder and then + add new files to it. + """ + nb_additions_per_path: Dict[str, int] = defaultdict(int) + for operation in operations: + path_in_repo = operation.path_in_repo + if isinstance(operation, CommitOperationAdd): + if nb_additions_per_path[path_in_repo] > 0: + warnings.warn( + "About to update multiple times the same file in the same commit:" + f" '{path_in_repo}'. This can cause undesired inconsistencies in" + " your repo." + ) + nb_additions_per_path[path_in_repo] += 1 + for parent in PurePosixPath(path_in_repo).parents: + # Also keep track of number of updated files per folder + # => warns if deleting a folder overwrite some contained files + nb_additions_per_path[str(parent)] += 1 + if isinstance(operation, CommitOperationDelete): + if nb_additions_per_path[str(PurePosixPath(path_in_repo))] > 0: + if operation.is_folder: + warnings.warn( + "About to delete a folder containing files that have just been" + f" updated within the same commit: '{path_in_repo}'. This can" + " cause undesired inconsistencies in your repo." + ) + else: + warnings.warn( + "About to delete a file that have just been updated within the" + f" same commit: '{path_in_repo}'. This can cause undesired" + " inconsistencies in your repo." + ) + + +@validate_hf_hub_args +def _upload_files( + *, + additions: List[CommitOperationAdd], + repo_type: str, + repo_id: str, + headers: Dict[str, str], + endpoint: Optional[str] = None, + num_threads: int = 5, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, +): + """ + Negotiates per-file transfer (LFS vs Xet) and uploads in batches. + """ + xet_additions: List[CommitOperationAdd] = [] + lfs_actions: List[Dict] = [] + lfs_oid2addop: Dict[str, CommitOperationAdd] = {} + + for chunk in chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES): + chunk_list = [op for op in chunk] + + transfers: List[str] = ["basic", "multipart"] + has_buffered_io_data = any(isinstance(op.path_or_fileobj, io.BufferedIOBase) for op in chunk_list) + if is_xet_available(): + if not has_buffered_io_data: + transfers.append("xet") + else: + logger.warning( + "Uploading files as a binary IO buffer is not supported by Xet Storage. " + "Falling back to HTTP upload." + ) + + actions_chunk, errors_chunk, chosen_transfer = post_lfs_batch_info( + upload_infos=[op.upload_info for op in chunk_list], + repo_id=repo_id, + repo_type=repo_type, + revision=revision, + endpoint=endpoint, + headers=headers, + token=None, # already passed in 'headers' + transfers=transfers, + ) + if errors_chunk: + message = "\n".join( + [ + f"Encountered error for file with OID {err.get('oid')}: `{err.get('error', {}).get('message')}" + for err in errors_chunk + ] + ) + raise ValueError(f"LFS batch API returned errors:\n{message}") + + # If server returns a transfer we didn't offer (e.g "xet" while uploading from BytesIO), + # fall back to LFS for this chunk. + if chosen_transfer == "xet" and ("xet" in transfers): + xet_additions.extend(chunk_list) + else: + lfs_actions.extend(actions_chunk) + for op in chunk_list: + lfs_oid2addop[op.upload_info.sha256.hex()] = op + + if len(lfs_actions) > 0: + _upload_lfs_files( + actions=lfs_actions, + oid2addop=lfs_oid2addop, + headers=headers, + endpoint=endpoint, + num_threads=num_threads, + ) + + if len(xet_additions) > 0: + _upload_xet_files( + additions=xet_additions, + repo_type=repo_type, + repo_id=repo_id, + headers=headers, + endpoint=endpoint, + revision=revision, + create_pr=create_pr, + ) + + +@validate_hf_hub_args +def _upload_lfs_files( + *, + actions: List[Dict], + oid2addop: Dict[str, CommitOperationAdd], + headers: Dict[str, str], + endpoint: Optional[str] = None, + num_threads: int = 5, +): + """ + Uploads the content of `additions` to the Hub using the large file storage protocol. + + Relevant external documentation: + - LFS Batch API: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md + + Args: + actions (`List[Dict]`): + LFS batch actions returned by the server. + oid2addop (`Dict[str, CommitOperationAdd]`): + A dictionary mapping the OID of the file to the corresponding `CommitOperationAdd` object. + headers (`Dict[str, str]`): + Headers to use for the request, including authorization headers and user agent. + endpoint (`str`, *optional*): + The endpoint to use for the request. Defaults to `constants.ENDPOINT`. + num_threads (`int`, *optional*): + The number of concurrent threads to use when uploading. Defaults to 5. + + Raises: + [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) + If an upload failed for any reason + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`. + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + headers (`Dict[str, str]`): + Headers to use for the request, including authorization headers and user agent. + num_threads (`int`, *optional*): + The number of concurrent threads to use when uploading. Defaults to 5. + revision (`str`, *optional*): + The git revision to upload to. + + Raises: + [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) + If an upload failed for any reason + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If the server returns malformed responses + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + If the LFS batch endpoint returned an HTTP error. + """ + # Filter out files already present upstream + filtered_actions = [] + for action in actions: + if action.get("actions") is None: + logger.debug( + f"Content of file {oid2addop[action['oid']].path_in_repo} is already present upstream - skipping upload." + ) + else: + filtered_actions.append(action) + + # Upload according to server-provided actions + def _wrapped_lfs_upload(batch_action) -> None: + try: + operation = oid2addop[batch_action["oid"]] + lfs_upload(operation=operation, lfs_batch_action=batch_action, headers=headers, endpoint=endpoint) + except Exception as exc: + raise RuntimeError(f"Error while uploading '{operation.path_in_repo}' to the Hub.") from exc + + if constants.HF_HUB_ENABLE_HF_TRANSFER: + logger.debug(f"Uploading {len(filtered_actions)} LFS files to the Hub using `hf_transfer`.") + for action in hf_tqdm(filtered_actions, name="huggingface_hub.lfs_upload"): + _wrapped_lfs_upload(action) + elif len(filtered_actions) == 1: + logger.debug("Uploading 1 LFS file to the Hub") + _wrapped_lfs_upload(filtered_actions[0]) + else: + logger.debug( + f"Uploading {len(filtered_actions)} LFS files to the Hub using up to {num_threads} threads concurrently" + ) + thread_map( + _wrapped_lfs_upload, + filtered_actions, + desc=f"Upload {len(filtered_actions)} LFS files", + max_workers=num_threads, + tqdm_class=hf_tqdm, + ) + + +@validate_hf_hub_args +def _upload_xet_files( + *, + additions: List[CommitOperationAdd], + repo_type: str, + repo_id: str, + headers: Dict[str, str], + endpoint: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, +): + """ + Uploads the content of `additions` to the Hub using the xet storage protocol. + This chunks the files and deduplicates the chunks before uploading them to xetcas storage. + + Args: + additions (`List` of `CommitOperationAdd`): + The files to be uploaded. + repo_type (`str`): + Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`. + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + headers (`Dict[str, str]`): + Headers to use for the request, including authorization headers and user agent. + endpoint: (`str`, *optional*): + The endpoint to use for the xetcas service. Defaults to `constants.ENDPOINT`. + revision (`str`, *optional*): + The git revision to upload to. + create_pr (`bool`, *optional*): + Whether or not to create a Pull Request with that commit. + + Raises: + [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) + If an upload failed for any reason. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If the server returns malformed responses or if the user is unauthorized to upload to xet storage. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + If the LFS batch endpoint returned an HTTP error. + + **How it works:** + The file download system uses Xet storage, which is a content-addressable storage system that breaks files into chunks + for efficient storage and transfer. + + `hf_xet.upload_files` manages uploading files by: + - Taking a list of file paths to upload + - Breaking files into smaller chunks for efficient storage + - Avoiding duplicate storage by recognizing identical chunks across files + - Connecting to a storage server (CAS server) that manages these chunks + + The upload process works like this: + 1. Create a local folder at ~/.cache/huggingface/xet/chunk-cache to store file chunks for reuse. + 2. Process files in parallel (up to 8 files at once): + 2.1. Read the file content. + 2.2. Split the file content into smaller chunks based on content patterns: each chunk gets a unique ID based on what's in it. + 2.3. For each chunk: + - Check if it already exists in storage. + - Skip uploading chunks that already exist. + 2.4. Group chunks into larger blocks for efficient transfer. + 2.5. Upload these blocks to the storage server. + 2.6. Create and upload information about how the file is structured. + 3. Return reference files that contain information about the uploaded files, which can be used later to download them. + """ + if len(additions) == 0: + return + + # at this point, we know that hf_xet is installed + from hf_xet import upload_bytes, upload_files + + from .utils._xet_progress_reporting import XetProgressReporter + + try: + xet_connection_info = fetch_xet_connection_info_from_repo_info( + token_type=XetTokenType.WRITE, + repo_id=repo_id, + repo_type=repo_type, + revision=revision, + headers=headers, + endpoint=endpoint, + params={"create_pr": "1"} if create_pr else None, + ) + except HfHubHTTPError as e: + if e.response.status_code == 401: + raise XetAuthorizationError( + f"You are unauthorized to upload to xet storage for {repo_type}/{repo_id}. " + f"Please check that you have configured your access token with write access to the repo." + ) from e + raise + + xet_endpoint = xet_connection_info.endpoint + access_token_info = (xet_connection_info.access_token, xet_connection_info.expiration_unix_epoch) + + def token_refresher() -> Tuple[str, int]: + new_xet_connection = fetch_xet_connection_info_from_repo_info( + token_type=XetTokenType.WRITE, + repo_id=repo_id, + repo_type=repo_type, + revision=revision, + headers=headers, + endpoint=endpoint, + params={"create_pr": "1"} if create_pr else None, + ) + if new_xet_connection is None: + raise XetRefreshTokenError("Failed to refresh xet token") + return new_xet_connection.access_token, new_xet_connection.expiration_unix_epoch + + if not are_progress_bars_disabled(): + progress = XetProgressReporter() + progress_callback = progress.update_progress + else: + progress, progress_callback = None, None + + try: + all_bytes_ops = [op for op in additions if isinstance(op.path_or_fileobj, bytes)] + all_paths_ops = [op for op in additions if isinstance(op.path_or_fileobj, (str, Path))] + + if len(all_paths_ops) > 0: + all_paths = [str(op.path_or_fileobj) for op in all_paths_ops] + upload_files( + all_paths, + xet_endpoint, + access_token_info, + token_refresher, + progress_callback, + repo_type, + ) + + if len(all_bytes_ops) > 0: + all_bytes = [op.path_or_fileobj for op in all_bytes_ops] + upload_bytes( + all_bytes, + xet_endpoint, + access_token_info, + token_refresher, + progress_callback, + repo_type, + ) + + finally: + if progress is not None: + progress.close(False) + + return + + +def _validate_preupload_info(preupload_info: dict): + files = preupload_info.get("files") + if not isinstance(files, list): + raise ValueError("preupload_info is improperly formatted") + for file_info in files: + if not ( + isinstance(file_info, dict) + and isinstance(file_info.get("path"), str) + and isinstance(file_info.get("uploadMode"), str) + and (file_info["uploadMode"] in ("lfs", "regular")) + ): + raise ValueError("preupload_info is improperly formatted:") + return preupload_info + + +@validate_hf_hub_args +def _fetch_upload_modes( + additions: Iterable[CommitOperationAdd], + repo_type: str, + repo_id: str, + headers: Dict[str, str], + revision: str, + endpoint: Optional[str] = None, + create_pr: bool = False, + gitignore_content: Optional[str] = None, +) -> None: + """ + Requests the Hub "preupload" endpoint to determine whether each input file should be uploaded as a regular git blob, + as a git LFS blob, or as a XET file. Input `additions` are mutated in-place with the upload mode. + + Args: + additions (`Iterable` of :class:`CommitOperationAdd`): + Iterable of :class:`CommitOperationAdd` describing the files to + upload to the Hub. + repo_type (`str`): + Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`. + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + headers (`Dict[str, str]`): + Headers to use for the request, including authorization headers and user agent. + revision (`str`): + The git revision to upload the files to. Can be any valid git revision. + gitignore_content (`str`, *optional*): + The content of the `.gitignore` file to know which files should be ignored. The order of priority + is to first check if `gitignore_content` is passed, then check if the `.gitignore` file is present + in the list of files to commit and finally default to the `.gitignore` file already hosted on the Hub + (if any). + Raises: + [`~utils.HfHubHTTPError`] + If the Hub API returned an error. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If the Hub API response is improperly formatted. + """ + endpoint = endpoint if endpoint is not None else constants.ENDPOINT + + # Fetch upload mode (LFS or regular) chunk by chunk. + upload_modes: Dict[str, UploadMode] = {} + should_ignore_info: Dict[str, bool] = {} + oid_info: Dict[str, Optional[str]] = {} + + for chunk in chunk_iterable(additions, 256): + payload: Dict = { + "files": [ + { + "path": op.path_in_repo, + "sample": base64.b64encode(op.upload_info.sample).decode("ascii"), + "size": op.upload_info.size, + } + for op in chunk + ] + } + if gitignore_content is not None: + payload["gitIgnore"] = gitignore_content + + resp = get_session().post( + f"{endpoint}/api/{repo_type}s/{repo_id}/preupload/{revision}", + json=payload, + headers=headers, + params={"create_pr": "1"} if create_pr else None, + ) + hf_raise_for_status(resp) + preupload_info = _validate_preupload_info(resp.json()) + upload_modes.update(**{file["path"]: file["uploadMode"] for file in preupload_info["files"]}) + should_ignore_info.update(**{file["path"]: file["shouldIgnore"] for file in preupload_info["files"]}) + oid_info.update(**{file["path"]: file.get("oid") for file in preupload_info["files"]}) + + # Set upload mode for each addition operation + for addition in additions: + addition._upload_mode = upload_modes[addition.path_in_repo] + addition._should_ignore = should_ignore_info[addition.path_in_repo] + addition._remote_oid = oid_info[addition.path_in_repo] + + # Empty files cannot be uploaded as LFS (S3 would fail with a 501 Not Implemented) + # => empty files are uploaded as "regular" to still allow users to commit them. + for addition in additions: + if addition.upload_info.size == 0: + addition._upload_mode = "regular" + + +@validate_hf_hub_args +def _fetch_files_to_copy( + copies: Iterable[CommitOperationCopy], + repo_type: str, + repo_id: str, + headers: Dict[str, str], + revision: str, + endpoint: Optional[str] = None, +) -> Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]]: + """ + Fetch information about the files to copy. + + For LFS files, we only need their metadata (file size and sha256) while for regular files + we need to download the raw content from the Hub. + + Args: + copies (`Iterable` of :class:`CommitOperationCopy`): + Iterable of :class:`CommitOperationCopy` describing the files to + copy on the Hub. + repo_type (`str`): + Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`. + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + headers (`Dict[str, str]`): + Headers to use for the request, including authorization headers and user agent. + revision (`str`): + The git revision to upload the files to. Can be any valid git revision. + + Returns: `Dict[Tuple[str, Optional[str]], Union[RepoFile, bytes]]]` + Key is the file path and revision of the file to copy. + Value is the raw content as bytes (for regular files) or the file information as a RepoFile (for LFS files). + + Raises: + [`~utils.HfHubHTTPError`] + If the Hub API returned an error. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If the Hub API response is improperly formatted. + """ + from .hf_api import HfApi, RepoFolder + + hf_api = HfApi(endpoint=endpoint, headers=headers) + files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]] = {} + # Store (path, revision) -> oid mapping + oid_info: Dict[Tuple[str, Optional[str]], Optional[str]] = {} + # 1. Fetch OIDs for destination paths in batches. + dest_paths = [op.path_in_repo for op in copies] + for offset in range(0, len(dest_paths), FETCH_LFS_BATCH_SIZE): + dest_repo_files = hf_api.get_paths_info( + repo_id=repo_id, + paths=dest_paths[offset : offset + FETCH_LFS_BATCH_SIZE], + revision=revision, + repo_type=repo_type, + ) + for file in dest_repo_files: + if not isinstance(file, RepoFolder): + oid_info[(file.path, revision)] = file.blob_id + + # 2. Group by source revision and fetch source file info in batches. + for src_revision, operations in groupby(copies, key=lambda op: op.src_revision): + operations = list(operations) # type: ignore + src_paths = [op.src_path_in_repo for op in operations] + for offset in range(0, len(src_paths), FETCH_LFS_BATCH_SIZE): + src_repo_files = hf_api.get_paths_info( + repo_id=repo_id, + paths=src_paths[offset : offset + FETCH_LFS_BATCH_SIZE], + revision=src_revision or revision, + repo_type=repo_type, + ) + + for src_repo_file in src_repo_files: + if isinstance(src_repo_file, RepoFolder): + raise NotImplementedError("Copying a folder is not implemented.") + oid_info[(src_repo_file.path, src_revision)] = src_repo_file.blob_id + # If it's an LFS file, store the RepoFile object. Otherwise, download raw bytes. + if src_repo_file.lfs: + files_to_copy[(src_repo_file.path, src_revision)] = src_repo_file + else: + # TODO: (optimization) download regular files to copy concurrently + url = hf_hub_url( + endpoint=endpoint, + repo_type=repo_type, + repo_id=repo_id, + revision=src_revision or revision, + filename=src_repo_file.path, + ) + response = get_session().get(url, headers=headers) + hf_raise_for_status(response) + files_to_copy[(src_repo_file.path, src_revision)] = response.content + # 3. Ensure all operations found a corresponding file in the Hub + # and track src/dest OIDs for each operation. + for operation in operations: + if (operation.src_path_in_repo, src_revision) not in files_to_copy: + raise EntryNotFoundError( + f"Cannot copy {operation.src_path_in_repo} at revision " + f"{src_revision or revision}: file is missing on repo." + ) + operation._src_oid = oid_info.get((operation.src_path_in_repo, operation.src_revision)) + operation._dest_oid = oid_info.get((operation.path_in_repo, revision)) + return files_to_copy + + +def _prepare_commit_payload( + operations: Iterable[CommitOperation], + files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]], + commit_message: str, + commit_description: Optional[str] = None, + parent_commit: Optional[str] = None, +) -> Iterable[Dict[str, Any]]: + """ + Builds the payload to POST to the `/commit` API of the Hub. + + Payload is returned as an iterator so that it can be streamed as a ndjson in the + POST request. + + For more information, see: + - https://github.com/huggingface/huggingface_hub/issues/1085#issuecomment-1265208073 + - http://ndjson.org/ + """ + commit_description = commit_description if commit_description is not None else "" + + # 1. Send a header item with the commit metadata + header_value = {"summary": commit_message, "description": commit_description} + if parent_commit is not None: + header_value["parentCommit"] = parent_commit + yield {"key": "header", "value": header_value} + + nb_ignored_files = 0 + + # 2. Send operations, one per line + for operation in operations: + # Skip ignored files + if isinstance(operation, CommitOperationAdd) and operation._should_ignore: + logger.debug(f"Skipping file '{operation.path_in_repo}' in commit (ignored by gitignore file).") + nb_ignored_files += 1 + continue + + # 2.a. Case adding a regular file + if isinstance(operation, CommitOperationAdd) and operation._upload_mode == "regular": + yield { + "key": "file", + "value": { + "content": operation.b64content().decode(), + "path": operation.path_in_repo, + "encoding": "base64", + }, + } + # 2.b. Case adding an LFS file + elif isinstance(operation, CommitOperationAdd) and operation._upload_mode == "lfs": + yield { + "key": "lfsFile", + "value": { + "path": operation.path_in_repo, + "algo": "sha256", + "oid": operation.upload_info.sha256.hex(), + "size": operation.upload_info.size, + }, + } + # 2.c. Case deleting a file or folder + elif isinstance(operation, CommitOperationDelete): + yield { + "key": "deletedFolder" if operation.is_folder else "deletedFile", + "value": {"path": operation.path_in_repo}, + } + # 2.d. Case copying a file or folder + elif isinstance(operation, CommitOperationCopy): + file_to_copy = files_to_copy[(operation.src_path_in_repo, operation.src_revision)] + if isinstance(file_to_copy, bytes): + yield { + "key": "file", + "value": { + "content": base64.b64encode(file_to_copy).decode(), + "path": operation.path_in_repo, + "encoding": "base64", + }, + } + elif file_to_copy.lfs: + yield { + "key": "lfsFile", + "value": { + "path": operation.path_in_repo, + "algo": "sha256", + "oid": file_to_copy.lfs.sha256, + }, + } + else: + raise ValueError( + "Malformed files_to_copy (should be raw file content as bytes or RepoFile objects with LFS info." + ) + # 2.e. Never expected to happen + else: + raise ValueError( + f"Unknown operation to commit. Operation: {operation}. Upload mode:" + f" {getattr(operation, '_upload_mode', None)}" + ) + + if nb_ignored_files > 0: + logger.info(f"Skipped {nb_ignored_files} file(s) in commit (ignored by gitignore file).") diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_commit_scheduler.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_commit_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..1bc8db6a8ade4d2253dd241a66c86def5dac2733 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_commit_scheduler.py @@ -0,0 +1,350 @@ +import atexit +import logging +import os +import time +from concurrent.futures import Future +from dataclasses import dataclass +from io import SEEK_END, SEEK_SET, BytesIO +from pathlib import Path +from threading import Lock, Thread +from typing import Dict, List, Optional, Union + +from .hf_api import DEFAULT_IGNORE_PATTERNS, CommitInfo, CommitOperationAdd, HfApi +from .utils import filter_repo_objects + + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class _FileToUpload: + """Temporary dataclass to store info about files to upload. Not meant to be used directly.""" + + local_path: Path + path_in_repo: str + size_limit: int + last_modified: float + + +class CommitScheduler: + """ + Scheduler to upload a local folder to the Hub at regular intervals (e.g. push to hub every 5 minutes). + + The recommended way to use the scheduler is to use it as a context manager. This ensures that the scheduler is + properly stopped and the last commit is triggered when the script ends. The scheduler can also be stopped manually + with the `stop` method. Checkout the [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#scheduled-uploads) + to learn more about how to use it. + + Args: + repo_id (`str`): + The id of the repo to commit to. + folder_path (`str` or `Path`): + Path to the local folder to upload regularly. + every (`int` or `float`, *optional*): + The number of minutes between each commit. Defaults to 5 minutes. + path_in_repo (`str`, *optional*): + Relative path of the directory in the repo, for example: `"checkpoints/"`. Defaults to the root folder + of the repository. + repo_type (`str`, *optional*): + The type of the repo to commit to. Defaults to `model`. + revision (`str`, *optional*): + The revision of the repo to commit to. Defaults to `main`. + private (`bool`, *optional*): + Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists. + token (`str`, *optional*): + The token to use to commit to the repo. Defaults to the token saved on the machine. + allow_patterns (`List[str]` or `str`, *optional*): + If provided, only files matching at least one pattern are uploaded. + ignore_patterns (`List[str]` or `str`, *optional*): + If provided, files matching any of the patterns are not uploaded. + squash_history (`bool`, *optional*): + Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is + useful to avoid degraded performances on the repo when it grows too large. + hf_api (`HfApi`, *optional*): + The [`HfApi`] client to use to commit to the Hub. Can be set with custom settings (user agent, token,...). + + Example: + ```py + >>> from pathlib import Path + >>> from huggingface_hub import CommitScheduler + + # Scheduler uploads every 10 minutes + >>> csv_path = Path("watched_folder/data.csv") + >>> CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path=csv_path.parent, every=10) + + >>> with csv_path.open("a") as f: + ... f.write("first line") + + # Some time later (...) + >>> with csv_path.open("a") as f: + ... f.write("second line") + ``` + + Example using a context manager: + ```py + >>> from pathlib import Path + >>> from huggingface_hub import CommitScheduler + + >>> with CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path="watched_folder", every=10) as scheduler: + ... csv_path = Path("watched_folder/data.csv") + ... with csv_path.open("a") as f: + ... f.write("first line") + ... (...) + ... with csv_path.open("a") as f: + ... f.write("second line") + + # Scheduler is now stopped and last commit have been triggered + ``` + """ + + def __init__( + self, + *, + repo_id: str, + folder_path: Union[str, Path], + every: Union[int, float] = 5, + path_in_repo: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + private: Optional[bool] = None, + token: Optional[str] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + squash_history: bool = False, + hf_api: Optional["HfApi"] = None, + ) -> None: + self.api = hf_api or HfApi(token=token) + + # Folder + self.folder_path = Path(folder_path).expanduser().resolve() + self.path_in_repo = path_in_repo or "" + self.allow_patterns = allow_patterns + + if ignore_patterns is None: + ignore_patterns = [] + elif isinstance(ignore_patterns, str): + ignore_patterns = [ignore_patterns] + self.ignore_patterns = ignore_patterns + DEFAULT_IGNORE_PATTERNS + + if self.folder_path.is_file(): + raise ValueError(f"'folder_path' must be a directory, not a file: '{self.folder_path}'.") + self.folder_path.mkdir(parents=True, exist_ok=True) + + # Repository + repo_url = self.api.create_repo(repo_id=repo_id, private=private, repo_type=repo_type, exist_ok=True) + self.repo_id = repo_url.repo_id + self.repo_type = repo_type + self.revision = revision + self.token = token + + # Keep track of already uploaded files + self.last_uploaded: Dict[Path, float] = {} # key is local path, value is timestamp + + # Scheduler + if not every > 0: + raise ValueError(f"'every' must be a positive integer, not '{every}'.") + self.lock = Lock() + self.every = every + self.squash_history = squash_history + + logger.info(f"Scheduled job to push '{self.folder_path}' to '{self.repo_id}' every {self.every} minutes.") + self._scheduler_thread = Thread(target=self._run_scheduler, daemon=True) + self._scheduler_thread.start() + atexit.register(self._push_to_hub) + + self.__stopped = False + + def stop(self) -> None: + """Stop the scheduler. + + A stopped scheduler cannot be restarted. Mostly for tests purposes. + """ + self.__stopped = True + + def __enter__(self) -> "CommitScheduler": + return self + + def __exit__(self, exc_type, exc_value, traceback) -> None: + # Upload last changes before exiting + self.trigger().result() + self.stop() + return + + def _run_scheduler(self) -> None: + """Dumb thread waiting between each scheduled push to Hub.""" + while True: + self.last_future = self.trigger() + time.sleep(self.every * 60) + if self.__stopped: + break + + def trigger(self) -> Future: + """Trigger a `push_to_hub` and return a future. + + This method is automatically called every `every` minutes. You can also call it manually to trigger a commit + immediately, without waiting for the next scheduled commit. + """ + return self.api.run_as_future(self._push_to_hub) + + def _push_to_hub(self) -> Optional[CommitInfo]: + if self.__stopped: # If stopped, already scheduled commits are ignored + return None + + logger.info("(Background) scheduled commit triggered.") + try: + value = self.push_to_hub() + if self.squash_history: + logger.info("(Background) squashing repo history.") + self.api.super_squash_history(repo_id=self.repo_id, repo_type=self.repo_type, branch=self.revision) + return value + except Exception as e: + logger.error(f"Error while pushing to Hub: {e}") # Depending on the setup, error might be silenced + raise + + def push_to_hub(self) -> Optional[CommitInfo]: + """ + Push folder to the Hub and return the commit info. + + > [!WARNING] + > This method is not meant to be called directly. It is run in the background by the scheduler, respecting a + > queue mechanism to avoid concurrent commits. Making a direct call to the method might lead to concurrency + > issues. + + The default behavior of `push_to_hub` is to assume an append-only folder. It lists all files in the folder and + uploads only changed files. If no changes are found, the method returns without committing anything. If you want + to change this behavior, you can inherit from [`CommitScheduler`] and override this method. This can be useful + for example to compress data together in a single file before committing. For more details and examples, check + out our [integration guide](https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#scheduled-uploads). + """ + # Check files to upload (with lock) + with self.lock: + logger.debug("Listing files to upload for scheduled commit.") + + # List files from folder (taken from `_prepare_upload_folder_additions`) + relpath_to_abspath = { + path.relative_to(self.folder_path).as_posix(): path + for path in sorted(self.folder_path.glob("**/*")) # sorted to be deterministic + if path.is_file() + } + prefix = f"{self.path_in_repo.strip('/')}/" if self.path_in_repo else "" + + # Filter with pattern + filter out unchanged files + retrieve current file size + files_to_upload: List[_FileToUpload] = [] + for relpath in filter_repo_objects( + relpath_to_abspath.keys(), allow_patterns=self.allow_patterns, ignore_patterns=self.ignore_patterns + ): + local_path = relpath_to_abspath[relpath] + stat = local_path.stat() + if self.last_uploaded.get(local_path) is None or self.last_uploaded[local_path] != stat.st_mtime: + files_to_upload.append( + _FileToUpload( + local_path=local_path, + path_in_repo=prefix + relpath, + size_limit=stat.st_size, + last_modified=stat.st_mtime, + ) + ) + + # Return if nothing to upload + if len(files_to_upload) == 0: + logger.debug("Dropping schedule commit: no changed file to upload.") + return None + + # Convert `_FileToUpload` as `CommitOperationAdd` (=> compute file shas + limit to file size) + logger.debug("Removing unchanged files since previous scheduled commit.") + add_operations = [ + CommitOperationAdd( + # Cap the file to its current size, even if the user append data to it while a scheduled commit is happening + path_or_fileobj=PartialFileIO(file_to_upload.local_path, size_limit=file_to_upload.size_limit), + path_in_repo=file_to_upload.path_in_repo, + ) + for file_to_upload in files_to_upload + ] + + # Upload files (append mode expected - no need for lock) + logger.debug("Uploading files for scheduled commit.") + commit_info = self.api.create_commit( + repo_id=self.repo_id, + repo_type=self.repo_type, + operations=add_operations, + commit_message="Scheduled Commit", + revision=self.revision, + ) + + # Successful commit: keep track of the latest "last_modified" for each file + for file in files_to_upload: + self.last_uploaded[file.local_path] = file.last_modified + return commit_info + + +class PartialFileIO(BytesIO): + """A file-like object that reads only the first part of a file. + + Useful to upload a file to the Hub when the user might still be appending data to it. Only the first part of the + file is uploaded (i.e. the part that was available when the filesystem was first scanned). + + In practice, only used internally by the CommitScheduler to regularly push a folder to the Hub with minimal + disturbance for the user. The object is passed to `CommitOperationAdd`. + + Only supports `read`, `tell` and `seek` methods. + + Args: + file_path (`str` or `Path`): + Path to the file to read. + size_limit (`int`): + The maximum number of bytes to read from the file. If the file is larger than this, only the first part + will be read (and uploaded). + """ + + def __init__(self, file_path: Union[str, Path], size_limit: int) -> None: + self._file_path = Path(file_path) + self._file = self._file_path.open("rb") + self._size_limit = min(size_limit, os.fstat(self._file.fileno()).st_size) + + def __del__(self) -> None: + self._file.close() + return super().__del__() + + def __repr__(self) -> str: + return f"" + + def __len__(self) -> int: + return self._size_limit + + def __getattribute__(self, name: str): + if name.startswith("_") or name in ("read", "tell", "seek"): # only 3 public methods supported + return super().__getattribute__(name) + raise NotImplementedError(f"PartialFileIO does not support '{name}'.") + + def tell(self) -> int: + """Return the current file position.""" + return self._file.tell() + + def seek(self, __offset: int, __whence: int = SEEK_SET) -> int: + """Change the stream position to the given offset. + + Behavior is the same as a regular file, except that the position is capped to the size limit. + """ + if __whence == SEEK_END: + # SEEK_END => set from the truncated end + __offset = len(self) + __offset + __whence = SEEK_SET + + pos = self._file.seek(__offset, __whence) + if pos > self._size_limit: + return self._file.seek(self._size_limit) + return pos + + def read(self, __size: Optional[int] = -1) -> bytes: + """Read at most `__size` bytes from the file. + + Behavior is the same as a regular file, except that it is capped to the size limit. + """ + current = self._file.tell() + if __size is None or __size < 0: + # Read until file limit + truncated_size = self._size_limit - current + else: + # Read until file limit or __size + truncated_size = min(__size, self._size_limit - current) + return self._file.read(truncated_size) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_inference_endpoints.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_inference_endpoints.py new file mode 100644 index 0000000000000000000000000000000000000000..37f772bfbe28013ff5329d0a19a438706d50a19c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_inference_endpoints.py @@ -0,0 +1,413 @@ +import time +from dataclasses import dataclass, field +from datetime import datetime +from enum import Enum +from typing import TYPE_CHECKING, Dict, Optional, Union + +from huggingface_hub.errors import InferenceEndpointError, InferenceEndpointTimeoutError + +from .utils import get_session, logging, parse_datetime + + +if TYPE_CHECKING: + from .hf_api import HfApi + from .inference._client import InferenceClient + from .inference._generated._async_client import AsyncInferenceClient + +logger = logging.get_logger(__name__) + + +class InferenceEndpointStatus(str, Enum): + PENDING = "pending" + INITIALIZING = "initializing" + UPDATING = "updating" + UPDATE_FAILED = "updateFailed" + RUNNING = "running" + PAUSED = "paused" + FAILED = "failed" + SCALED_TO_ZERO = "scaledToZero" + + +class InferenceEndpointType(str, Enum): + PUBlIC = "public" + PROTECTED = "protected" + PRIVATE = "private" + + +@dataclass +class InferenceEndpoint: + """ + Contains information about a deployed Inference Endpoint. + + Args: + name (`str`): + The unique name of the Inference Endpoint. + namespace (`str`): + The namespace where the Inference Endpoint is located. + repository (`str`): + The name of the model repository deployed on this Inference Endpoint. + status ([`InferenceEndpointStatus`]): + The current status of the Inference Endpoint. + url (`str`, *optional*): + The URL of the Inference Endpoint, if available. Only a deployed Inference Endpoint will have a URL. + framework (`str`): + The machine learning framework used for the model. + revision (`str`): + The specific model revision deployed on the Inference Endpoint. + task (`str`): + The task associated with the deployed model. + created_at (`datetime.datetime`): + The timestamp when the Inference Endpoint was created. + updated_at (`datetime.datetime`): + The timestamp of the last update of the Inference Endpoint. + type ([`InferenceEndpointType`]): + The type of the Inference Endpoint (public, protected, private). + raw (`Dict`): + The raw dictionary data returned from the API. + token (`str` or `bool`, *optional*): + Authentication token for the Inference Endpoint, if set when requesting the API. Will default to the + locally saved token if not provided. Pass `token=False` if you don't want to send your token to the server. + + Example: + ```python + >>> from huggingface_hub import get_inference_endpoint + >>> endpoint = get_inference_endpoint("my-text-to-image") + >>> endpoint + InferenceEndpoint(name='my-text-to-image', ...) + + # Get status + >>> endpoint.status + 'running' + >>> endpoint.url + 'https://my-text-to-image.region.vendor.endpoints.huggingface.cloud' + + # Run inference + >>> endpoint.client.text_to_image(...) + + # Pause endpoint to save $$$ + >>> endpoint.pause() + + # ... + # Resume and wait for deployment + >>> endpoint.resume() + >>> endpoint.wait() + >>> endpoint.client.text_to_image(...) + ``` + """ + + # Field in __repr__ + name: str = field(init=False) + namespace: str + repository: str = field(init=False) + status: InferenceEndpointStatus = field(init=False) + health_route: str = field(init=False) + url: Optional[str] = field(init=False) + + # Other fields + framework: str = field(repr=False, init=False) + revision: str = field(repr=False, init=False) + task: str = field(repr=False, init=False) + created_at: datetime = field(repr=False, init=False) + updated_at: datetime = field(repr=False, init=False) + type: InferenceEndpointType = field(repr=False, init=False) + + # Raw dict from the API + raw: Dict = field(repr=False) + + # Internal fields + _token: Union[str, bool, None] = field(repr=False, compare=False) + _api: "HfApi" = field(repr=False, compare=False) + + @classmethod + def from_raw( + cls, raw: Dict, namespace: str, token: Union[str, bool, None] = None, api: Optional["HfApi"] = None + ) -> "InferenceEndpoint": + """Initialize object from raw dictionary.""" + if api is None: + from .hf_api import HfApi + + api = HfApi() + if token is None: + token = api.token + + # All other fields are populated in __post_init__ + return cls(raw=raw, namespace=namespace, _token=token, _api=api) + + def __post_init__(self) -> None: + """Populate fields from raw dictionary.""" + self._populate_from_raw() + + @property + def client(self) -> "InferenceClient": + """Returns a client to make predictions on this Inference Endpoint. + + Returns: + [`InferenceClient`]: an inference client pointing to the deployed endpoint. + + Raises: + [`InferenceEndpointError`]: If the Inference Endpoint is not yet deployed. + """ + if self.url is None: + raise InferenceEndpointError( + "Cannot create a client for this Inference Endpoint as it is not yet deployed. " + "Please wait for the Inference Endpoint to be deployed using `endpoint.wait()` and try again." + ) + from .inference._client import InferenceClient + + return InferenceClient( + model=self.url, + token=self._token, # type: ignore[arg-type] # boolean token shouldn't be possible. In practice it's ok. + ) + + @property + def async_client(self) -> "AsyncInferenceClient": + """Returns a client to make predictions on this Inference Endpoint. + + Returns: + [`AsyncInferenceClient`]: an asyncio-compatible inference client pointing to the deployed endpoint. + + Raises: + [`InferenceEndpointError`]: If the Inference Endpoint is not yet deployed. + """ + if self.url is None: + raise InferenceEndpointError( + "Cannot create a client for this Inference Endpoint as it is not yet deployed. " + "Please wait for the Inference Endpoint to be deployed using `endpoint.wait()` and try again." + ) + from .inference._generated._async_client import AsyncInferenceClient + + return AsyncInferenceClient( + model=self.url, + token=self._token, # type: ignore[arg-type] # boolean token shouldn't be possible. In practice it's ok. + ) + + def wait(self, timeout: Optional[int] = None, refresh_every: int = 5) -> "InferenceEndpoint": + """Wait for the Inference Endpoint to be deployed. + + Information from the server will be fetched every 1s. If the Inference Endpoint is not deployed after `timeout` + seconds, a [`InferenceEndpointTimeoutError`] will be raised. The [`InferenceEndpoint`] will be mutated in place with the latest + data. + + Args: + timeout (`int`, *optional*): + The maximum time to wait for the Inference Endpoint to be deployed, in seconds. If `None`, will wait + indefinitely. + refresh_every (`int`, *optional*): + The time to wait between each fetch of the Inference Endpoint status, in seconds. Defaults to 5s. + + Returns: + [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data. + + Raises: + [`InferenceEndpointError`] + If the Inference Endpoint ended up in a failed state. + [`InferenceEndpointTimeoutError`] + If the Inference Endpoint is not deployed after `timeout` seconds. + """ + if timeout is not None and timeout < 0: + raise ValueError("`timeout` cannot be negative.") + if refresh_every <= 0: + raise ValueError("`refresh_every` must be positive.") + + start = time.time() + while True: + if self.status == InferenceEndpointStatus.FAILED: + raise InferenceEndpointError( + f"Inference Endpoint {self.name} failed to deploy. Please check the logs for more information." + ) + if self.status == InferenceEndpointStatus.UPDATE_FAILED: + raise InferenceEndpointError( + f"Inference Endpoint {self.name} failed to update. Please check the logs for more information." + ) + if self.status == InferenceEndpointStatus.RUNNING and self.url is not None: + # Verify the endpoint is actually reachable + _health_url = f"{self.url.rstrip('/')}/{self.health_route.lstrip('/')}" + response = get_session().get(_health_url, headers=self._api._build_hf_headers(token=self._token)) + if response.status_code == 200: + logger.info("Inference Endpoint is ready to be used.") + return self + + if timeout is not None: + if time.time() - start > timeout: + raise InferenceEndpointTimeoutError("Timeout while waiting for Inference Endpoint to be deployed.") + logger.info(f"Inference Endpoint is not deployed yet ({self.status}). Waiting {refresh_every}s...") + time.sleep(refresh_every) + self.fetch() + + def fetch(self) -> "InferenceEndpoint": + """Fetch latest information about the Inference Endpoint. + + Returns: + [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data. + """ + obj = self._api.get_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token) # type: ignore [arg-type] + self.raw = obj.raw + self._populate_from_raw() + return self + + def update( + self, + *, + # Compute update + accelerator: Optional[str] = None, + instance_size: Optional[str] = None, + instance_type: Optional[str] = None, + min_replica: Optional[int] = None, + max_replica: Optional[int] = None, + scale_to_zero_timeout: Optional[int] = None, + # Model update + repository: Optional[str] = None, + framework: Optional[str] = None, + revision: Optional[str] = None, + task: Optional[str] = None, + custom_image: Optional[Dict] = None, + secrets: Optional[Dict[str, str]] = None, + ) -> "InferenceEndpoint": + """Update the Inference Endpoint. + + This method allows the update of either the compute configuration, the deployed model, or both. All arguments are + optional but at least one must be provided. + + This is an alias for [`HfApi.update_inference_endpoint`]. The current object is mutated in place with the + latest data from the server. + + Args: + accelerator (`str`, *optional*): + The hardware accelerator to be used for inference (e.g. `"cpu"`). + instance_size (`str`, *optional*): + The size or type of the instance to be used for hosting the model (e.g. `"x4"`). + instance_type (`str`, *optional*): + The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`). + min_replica (`int`, *optional*): + The minimum number of replicas (instances) to keep running for the Inference Endpoint. + max_replica (`int`, *optional*): + The maximum number of replicas (instances) to scale to for the Inference Endpoint. + scale_to_zero_timeout (`int`, *optional*): + The duration in minutes before an inactive endpoint is scaled to zero. + + repository (`str`, *optional*): + The name of the model repository associated with the Inference Endpoint (e.g. `"gpt2"`). + framework (`str`, *optional*): + The machine learning framework used for the model (e.g. `"custom"`). + revision (`str`, *optional*): + The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`). + task (`str`, *optional*): + The task on which to deploy the model (e.g. `"text-classification"`). + custom_image (`Dict`, *optional*): + A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an + Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples). + secrets (`Dict[str, str]`, *optional*): + Secret values to inject in the container environment. + Returns: + [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data. + """ + # Make API call + obj = self._api.update_inference_endpoint( + name=self.name, + namespace=self.namespace, + accelerator=accelerator, + instance_size=instance_size, + instance_type=instance_type, + min_replica=min_replica, + max_replica=max_replica, + scale_to_zero_timeout=scale_to_zero_timeout, + repository=repository, + framework=framework, + revision=revision, + task=task, + custom_image=custom_image, + secrets=secrets, + token=self._token, # type: ignore [arg-type] + ) + + # Mutate current object + self.raw = obj.raw + self._populate_from_raw() + return self + + def pause(self) -> "InferenceEndpoint": + """Pause the Inference Endpoint. + + A paused Inference Endpoint will not be charged. It can be resumed at any time using [`InferenceEndpoint.resume`]. + This is different than scaling the Inference Endpoint to zero with [`InferenceEndpoint.scale_to_zero`], which + would be automatically restarted when a request is made to it. + + This is an alias for [`HfApi.pause_inference_endpoint`]. The current object is mutated in place with the + latest data from the server. + + Returns: + [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data. + """ + obj = self._api.pause_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token) # type: ignore [arg-type] + self.raw = obj.raw + self._populate_from_raw() + return self + + def resume(self, running_ok: bool = True) -> "InferenceEndpoint": + """Resume the Inference Endpoint. + + This is an alias for [`HfApi.resume_inference_endpoint`]. The current object is mutated in place with the + latest data from the server. + + Args: + running_ok (`bool`, *optional*): + If `True`, the method will not raise an error if the Inference Endpoint is already running. Defaults to + `True`. + + Returns: + [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data. + """ + obj = self._api.resume_inference_endpoint( + name=self.name, namespace=self.namespace, running_ok=running_ok, token=self._token + ) # type: ignore [arg-type] + self.raw = obj.raw + self._populate_from_raw() + return self + + def scale_to_zero(self) -> "InferenceEndpoint": + """Scale Inference Endpoint to zero. + + An Inference Endpoint scaled to zero will not be charged. It will be resume on the next request to it, with a + cold start delay. This is different than pausing the Inference Endpoint with [`InferenceEndpoint.pause`], which + would require a manual resume with [`InferenceEndpoint.resume`]. + + This is an alias for [`HfApi.scale_to_zero_inference_endpoint`]. The current object is mutated in place with the + latest data from the server. + + Returns: + [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data. + """ + obj = self._api.scale_to_zero_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token) # type: ignore [arg-type] + self.raw = obj.raw + self._populate_from_raw() + return self + + def delete(self) -> None: + """Delete the Inference Endpoint. + + This operation is not reversible. If you don't want to be charged for an Inference Endpoint, it is preferable + to pause it with [`InferenceEndpoint.pause`] or scale it to zero with [`InferenceEndpoint.scale_to_zero`]. + + This is an alias for [`HfApi.delete_inference_endpoint`]. + """ + self._api.delete_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token) # type: ignore [arg-type] + + def _populate_from_raw(self) -> None: + """Populate fields from raw dictionary. + + Called in __post_init__ + each time the Inference Endpoint is updated. + """ + # Repr fields + self.name = self.raw["name"] + self.repository = self.raw["model"]["repository"] + self.status = self.raw["status"]["state"] + self.url = self.raw["status"].get("url") + self.health_route = self.raw["healthRoute"] + + # Other fields + self.framework = self.raw["model"]["framework"] + self.revision = self.raw["model"]["revision"] + self.task = self.raw["model"]["task"] + self.created_at = parse_datetime(self.raw["status"]["createdAt"]) + self.updated_at = parse_datetime(self.raw["status"]["updatedAt"]) + self.type = self.raw["type"] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_jobs_api.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_jobs_api.py new file mode 100644 index 0000000000000000000000000000000000000000..00177a008c177d5484fb9069d0684bbf416e9289 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_jobs_api.py @@ -0,0 +1,301 @@ +# coding=utf-8 +# Copyright 2025-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass +from datetime import datetime +from enum import Enum +from typing import Any, Dict, List, Optional, Union + +from huggingface_hub import constants +from huggingface_hub._space_api import SpaceHardware +from huggingface_hub.utils._datetime import parse_datetime + + +class JobStage(str, Enum): + """ + Enumeration of possible stage of a Job on the Hub. + + Value can be compared to a string: + ```py + assert JobStage.COMPLETED == "COMPLETED" + ``` + Possible values are: `COMPLETED`, `CANCELED`, `ERROR`, `DELETED`, `RUNNING`. + Taken from https://github.com/huggingface/moon-landing/blob/main/server/job_types/JobInfo.ts#L61 (private url). + """ + + # Copied from moon-landing > server > lib > Job.ts + COMPLETED = "COMPLETED" + CANCELED = "CANCELED" + ERROR = "ERROR" + DELETED = "DELETED" + RUNNING = "RUNNING" + + +@dataclass +class JobStatus: + stage: JobStage + message: Optional[str] + + +@dataclass +class JobOwner: + id: str + name: str + type: str + + +@dataclass +class JobInfo: + """ + Contains information about a Job. + + Args: + id (`str`): + Job ID. + created_at (`datetime` or `None`): + When the Job was created. + docker_image (`str` or `None`): + The Docker image from Docker Hub used for the Job. + Can be None if space_id is present instead. + space_id (`str` or `None`): + The Docker image from Hugging Face Spaces used for the Job. + Can be None if docker_image is present instead. + command (`List[str]` or `None`): + Command of the Job, e.g. `["python", "-c", "print('hello world')"]` + arguments (`List[str]` or `None`): + Arguments passed to the command + environment (`Dict[str]` or `None`): + Environment variables of the Job as a dictionary. + secrets (`Dict[str]` or `None`): + Secret environment variables of the Job (encrypted). + flavor (`str` or `None`): + Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values. + E.g. `"cpu-basic"`. + status: (`JobStatus` or `None`): + Status of the Job, e.g. `JobStatus(stage="RUNNING", message=None)` + See [`JobStage`] for possible stage values. + owner: (`JobOwner` or `None`): + Owner of the Job, e.g. `JobOwner(id="5e9ecfc04957053f60648a3e", name="lhoestq", type="user")` + + Example: + + ```python + >>> from huggingface_hub import run_job + >>> job = run_job( + ... image="python:3.12", + ... command=["python", "-c", "print('Hello from the cloud!')"] + ... ) + >>> job + JobInfo(id='687fb701029421ae5549d998', created_at=datetime.datetime(2025, 7, 22, 16, 6, 25, 79000, tzinfo=datetime.timezone.utc), docker_image='python:3.12', space_id=None, command=['python', '-c', "print('Hello from the cloud!')"], arguments=[], environment={}, secrets={}, flavor='cpu-basic', status=JobStatus(stage='RUNNING', message=None), owner=JobOwner(id='5e9ecfc04957053f60648a3e', name='lhoestq', type='user'), endpoint='https://huggingface.co', url='https://huggingface.co/jobs/lhoestq/687fb701029421ae5549d998') + >>> job.id + '687fb701029421ae5549d998' + >>> job.url + 'https://huggingface.co/jobs/lhoestq/687fb701029421ae5549d998' + >>> job.status.stage + 'RUNNING' + ``` + """ + + id: str + created_at: Optional[datetime] + docker_image: Optional[str] + space_id: Optional[str] + command: Optional[List[str]] + arguments: Optional[List[str]] + environment: Optional[Dict[str, Any]] + secrets: Optional[Dict[str, Any]] + flavor: Optional[SpaceHardware] + status: JobStatus + owner: JobOwner + + # Inferred fields + endpoint: str + url: str + + def __init__(self, **kwargs) -> None: + self.id = kwargs["id"] + created_at = kwargs.get("createdAt") or kwargs.get("created_at") + self.created_at = parse_datetime(created_at) if created_at else None + self.docker_image = kwargs.get("dockerImage") or kwargs.get("docker_image") + self.space_id = kwargs.get("spaceId") or kwargs.get("space_id") + owner = kwargs.get("owner", {}) + self.owner = JobOwner(id=owner["id"], name=owner["name"], type=owner["type"]) + self.command = kwargs.get("command") + self.arguments = kwargs.get("arguments") + self.environment = kwargs.get("environment") + self.secrets = kwargs.get("secrets") + self.flavor = kwargs.get("flavor") + status = kwargs.get("status", {}) + self.status = JobStatus(stage=status["stage"], message=status.get("message")) + + # Inferred fields + self.endpoint = kwargs.get("endpoint", constants.ENDPOINT) + self.url = f"{self.endpoint}/jobs/{self.owner.name}/{self.id}" + + +@dataclass +class JobSpec: + docker_image: Optional[str] + space_id: Optional[str] + command: Optional[List[str]] + arguments: Optional[List[str]] + environment: Optional[Dict[str, Any]] + secrets: Optional[Dict[str, Any]] + flavor: Optional[SpaceHardware] + timeout: Optional[int] + tags: Optional[List[str]] + arch: Optional[str] + + def __init__(self, **kwargs) -> None: + self.docker_image = kwargs.get("dockerImage") or kwargs.get("docker_image") + self.space_id = kwargs.get("spaceId") or kwargs.get("space_id") + self.command = kwargs.get("command") + self.arguments = kwargs.get("arguments") + self.environment = kwargs.get("environment") + self.secrets = kwargs.get("secrets") + self.flavor = kwargs.get("flavor") + self.timeout = kwargs.get("timeout") + self.tags = kwargs.get("tags") + self.arch = kwargs.get("arch") + + +@dataclass +class LastJobInfo: + id: str + at: datetime + + def __init__(self, **kwargs) -> None: + self.id = kwargs["id"] + self.at = parse_datetime(kwargs["at"]) + + +@dataclass +class ScheduledJobStatus: + last_job: Optional[LastJobInfo] + next_job_run_at: Optional[datetime] + + def __init__(self, **kwargs) -> None: + last_job = kwargs.get("lastJob") or kwargs.get("last_job") + self.last_job = LastJobInfo(**last_job) if last_job else None + next_job_run_at = kwargs.get("nextJobRunAt") or kwargs.get("next_job_run_at") + self.next_job_run_at = parse_datetime(str(next_job_run_at)) if next_job_run_at else None + + +@dataclass +class ScheduledJobInfo: + """ + Contains information about a Job. + + Args: + id (`str`): + Scheduled Job ID. + created_at (`datetime` or `None`): + When the scheduled Job was created. + tags (`List[str]` or `None`): + The tags of the scheduled Job. + schedule (`str` or `None`): + One of "@annually", "@yearly", "@monthly", "@weekly", "@daily", "@hourly", or a + CRON schedule expression (e.g., '0 9 * * 1' for 9 AM every Monday). + suspend (`bool` or `None`): + Whether the scheduled job is suspended (paused). + concurrency (`bool` or `None`): + Whether multiple instances of this Job can run concurrently. + status (`ScheduledJobStatus` or `None`): + Status of the scheduled Job. + owner: (`JobOwner` or `None`): + Owner of the scheduled Job, e.g. `JobOwner(id="5e9ecfc04957053f60648a3e", name="lhoestq", type="user")` + job_spec: (`JobSpec` or `None`): + Specifications of the Job. + + Example: + + ```python + >>> from huggingface_hub import run_job + >>> scheduled_job = create_scheduled_job( + ... image="python:3.12", + ... command=["python", "-c", "print('Hello from the cloud!')"], + ... schedule="@hourly", + ... ) + >>> scheduled_job.id + '687fb701029421ae5549d999' + >>> scheduled_job.status.next_job_run_at + datetime.datetime(2025, 7, 22, 17, 6, 25, 79000, tzinfo=datetime.timezone.utc) + ``` + """ + + id: str + created_at: Optional[datetime] + job_spec: JobSpec + schedule: Optional[str] + suspend: Optional[bool] + concurrency: Optional[bool] + status: ScheduledJobStatus + owner: JobOwner + + def __init__(self, **kwargs) -> None: + self.id = kwargs["id"] + created_at = kwargs.get("createdAt") or kwargs.get("created_at") + self.created_at = parse_datetime(created_at) if created_at else None + self.job_spec = JobSpec(**(kwargs.get("job_spec") or kwargs.get("jobSpec", {}))) + self.schedule = kwargs.get("schedule") + self.suspend = kwargs.get("suspend") + self.concurrency = kwargs.get("concurrency") + status = kwargs.get("status", {}) + self.status = ScheduledJobStatus( + last_job=status.get("last_job") or status.get("lastJob"), + next_job_run_at=status.get("next_job_run_at") or status.get("nextJobRunAt"), + ) + owner = kwargs.get("owner", {}) + self.owner = JobOwner(id=owner["id"], name=owner["name"], type=owner["type"]) + + +def _create_job_spec( + *, + image: str, + command: List[str], + env: Optional[Dict[str, Any]], + secrets: Optional[Dict[str, Any]], + flavor: Optional[SpaceHardware], + timeout: Optional[Union[int, float, str]], +) -> Dict[str, Any]: + # prepare job spec to send to HF Jobs API + job_spec: Dict[str, Any] = { + "command": command, + "arguments": [], + "environment": env or {}, + "flavor": flavor or SpaceHardware.CPU_BASIC, + } + # secrets are optional + if secrets: + job_spec["secrets"] = secrets + # timeout is optional + if timeout: + time_units_factors = {"s": 1, "m": 60, "h": 3600, "d": 3600 * 24} + if isinstance(timeout, str) and timeout[-1] in time_units_factors: + job_spec["timeoutSeconds"] = int(float(timeout[:-1]) * time_units_factors[timeout[-1]]) + else: + job_spec["timeoutSeconds"] = int(timeout) + # input is either from docker hub or from HF spaces + for prefix in ( + "https://huggingface.co/spaces/", + "https://hf.co/spaces/", + "huggingface.co/spaces/", + "hf.co/spaces/", + ): + if image.startswith(prefix): + job_spec["spaceId"] = image[len(prefix) :] + break + else: + job_spec["dockerImage"] = image + return job_spec diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_local_folder.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_local_folder.py new file mode 100644 index 0000000000000000000000000000000000000000..37f6c32a760ecf03794c129735fe2e15516952d1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_local_folder.py @@ -0,0 +1,447 @@ +# coding=utf-8 +# Copyright 2024-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains utilities to handle the `../.cache/huggingface` folder in local directories. + +First discussed in https://github.com/huggingface/huggingface_hub/issues/1738 to store +download metadata when downloading files from the hub to a local directory (without +using the cache). + +./.cache/huggingface folder structure: +[4.0K] data +├── [4.0K] .cache +│ └── [4.0K] huggingface +│ └── [4.0K] download +│ ├── [ 16] file.parquet.metadata +│ ├── [ 16] file.txt.metadata +│ └── [4.0K] folder +│ └── [ 16] file.parquet.metadata +│ +├── [6.5G] file.parquet +├── [1.5K] file.txt +└── [4.0K] folder + └── [ 16] file.parquet + + +Download metadata file structure: +``` +# file.txt.metadata +11c5a3d5811f50298f278a704980280950aedb10 +a16a55fda99d2f2e7b69cce5cf93ff4ad3049930 +1712656091.123 + +# file.parquet.metadata +11c5a3d5811f50298f278a704980280950aedb10 +7c5d3f4b8b76583b422fcb9189ad6c89d5d97a094541ce8932dce3ecabde1421 +1712656091.123 +} +``` +""" + +import base64 +import hashlib +import logging +import os +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + +from .utils import WeakFileLock + + +logger = logging.getLogger(__name__) + + +@dataclass +class LocalDownloadFilePaths: + """ + Paths to the files related to a download process in a local dir. + + Returned by [`get_local_download_paths`]. + + Attributes: + file_path (`Path`): + Path where the file will be saved. + lock_path (`Path`): + Path to the lock file used to ensure atomicity when reading/writing metadata. + metadata_path (`Path`): + Path to the metadata file. + """ + + file_path: Path + lock_path: Path + metadata_path: Path + + def incomplete_path(self, etag: str) -> Path: + """Return the path where a file will be temporarily downloaded before being moved to `file_path`.""" + path = self.metadata_path.parent / f"{_short_hash(self.metadata_path.name)}.{etag}.incomplete" + resolved_path = str(path.resolve()) + # Some Windows versions do not allow for paths longer than 255 characters. + # In this case, we must specify it as an extended path by using the "\\?\" prefix. + if os.name == "nt" and len(resolved_path) > 255 and not resolved_path.startswith("\\\\?\\"): + path = Path("\\\\?\\" + resolved_path) + return path + + +@dataclass(frozen=True) +class LocalUploadFilePaths: + """ + Paths to the files related to an upload process in a local dir. + + Returned by [`get_local_upload_paths`]. + + Attributes: + path_in_repo (`str`): + Path of the file in the repo. + file_path (`Path`): + Path where the file will be saved. + lock_path (`Path`): + Path to the lock file used to ensure atomicity when reading/writing metadata. + metadata_path (`Path`): + Path to the metadata file. + """ + + path_in_repo: str + file_path: Path + lock_path: Path + metadata_path: Path + + +@dataclass +class LocalDownloadFileMetadata: + """ + Metadata about a file in the local directory related to a download process. + + Attributes: + filename (`str`): + Path of the file in the repo. + commit_hash (`str`): + Commit hash of the file in the repo. + etag (`str`): + ETag of the file in the repo. Used to check if the file has changed. + For LFS files, this is the sha256 of the file. For regular files, it corresponds to the git hash. + timestamp (`int`): + Unix timestamp of when the metadata was saved i.e. when the metadata was accurate. + """ + + filename: str + commit_hash: str + etag: str + timestamp: float + + +@dataclass +class LocalUploadFileMetadata: + """ + Metadata about a file in the local directory related to an upload process. + """ + + size: int + + # Default values correspond to "we don't know yet" + timestamp: Optional[float] = None + should_ignore: Optional[bool] = None + sha256: Optional[str] = None + upload_mode: Optional[str] = None + remote_oid: Optional[str] = None + is_uploaded: bool = False + is_committed: bool = False + + def save(self, paths: LocalUploadFilePaths) -> None: + """Save the metadata to disk.""" + with WeakFileLock(paths.lock_path): + with paths.metadata_path.open("w") as f: + new_timestamp = time.time() + f.write(str(new_timestamp) + "\n") + + f.write(str(self.size)) # never None + f.write("\n") + + if self.should_ignore is not None: + f.write(str(int(self.should_ignore))) + f.write("\n") + + if self.sha256 is not None: + f.write(self.sha256) + f.write("\n") + + if self.upload_mode is not None: + f.write(self.upload_mode) + f.write("\n") + + if self.remote_oid is not None: + f.write(self.remote_oid) + f.write("\n") + + f.write(str(int(self.is_uploaded)) + "\n") + f.write(str(int(self.is_committed)) + "\n") + + self.timestamp = new_timestamp + + +def get_local_download_paths(local_dir: Path, filename: str) -> LocalDownloadFilePaths: + """Compute paths to the files related to a download process. + + Folders containing the paths are all guaranteed to exist. + + Args: + local_dir (`Path`): + Path to the local directory in which files are downloaded. + filename (`str`): + Path of the file in the repo. + + Return: + [`LocalDownloadFilePaths`]: the paths to the files (file_path, lock_path, metadata_path, incomplete_path). + """ + # filename is the path in the Hub repository (separated by '/') + # make sure to have a cross platform transcription + sanitized_filename = os.path.join(*filename.split("/")) + if os.name == "nt": + if sanitized_filename.startswith("..\\") or "\\..\\" in sanitized_filename: + raise ValueError( + f"Invalid filename: cannot handle filename '{sanitized_filename}' on Windows. Please ask the repository" + " owner to rename this file." + ) + file_path = local_dir / sanitized_filename + metadata_path = _huggingface_dir(local_dir) / "download" / f"{sanitized_filename}.metadata" + lock_path = metadata_path.with_suffix(".lock") + + # Some Windows versions do not allow for paths longer than 255 characters. + # In this case, we must specify it as an extended path by using the "\\?\" prefix + if os.name == "nt": + if not str(local_dir).startswith("\\\\?\\") and len(os.path.abspath(lock_path)) > 255: + file_path = Path("\\\\?\\" + os.path.abspath(file_path)) + lock_path = Path("\\\\?\\" + os.path.abspath(lock_path)) + metadata_path = Path("\\\\?\\" + os.path.abspath(metadata_path)) + + file_path.parent.mkdir(parents=True, exist_ok=True) + metadata_path.parent.mkdir(parents=True, exist_ok=True) + return LocalDownloadFilePaths(file_path=file_path, lock_path=lock_path, metadata_path=metadata_path) + + +def get_local_upload_paths(local_dir: Path, filename: str) -> LocalUploadFilePaths: + """Compute paths to the files related to an upload process. + + Folders containing the paths are all guaranteed to exist. + + Args: + local_dir (`Path`): + Path to the local directory that is uploaded. + filename (`str`): + Path of the file in the repo. + + Return: + [`LocalUploadFilePaths`]: the paths to the files (file_path, lock_path, metadata_path). + """ + # filename is the path in the Hub repository (separated by '/') + # make sure to have a cross platform transcription + sanitized_filename = os.path.join(*filename.split("/")) + if os.name == "nt": + if sanitized_filename.startswith("..\\") or "\\..\\" in sanitized_filename: + raise ValueError( + f"Invalid filename: cannot handle filename '{sanitized_filename}' on Windows. Please ask the repository" + " owner to rename this file." + ) + file_path = local_dir / sanitized_filename + metadata_path = _huggingface_dir(local_dir) / "upload" / f"{sanitized_filename}.metadata" + lock_path = metadata_path.with_suffix(".lock") + + # Some Windows versions do not allow for paths longer than 255 characters. + # In this case, we must specify it as an extended path by using the "\\?\" prefix + if os.name == "nt": + if not str(local_dir).startswith("\\\\?\\") and len(os.path.abspath(lock_path)) > 255: + file_path = Path("\\\\?\\" + os.path.abspath(file_path)) + lock_path = Path("\\\\?\\" + os.path.abspath(lock_path)) + metadata_path = Path("\\\\?\\" + os.path.abspath(metadata_path)) + + file_path.parent.mkdir(parents=True, exist_ok=True) + metadata_path.parent.mkdir(parents=True, exist_ok=True) + return LocalUploadFilePaths( + path_in_repo=filename, file_path=file_path, lock_path=lock_path, metadata_path=metadata_path + ) + + +def read_download_metadata(local_dir: Path, filename: str) -> Optional[LocalDownloadFileMetadata]: + """Read metadata about a file in the local directory related to a download process. + + Args: + local_dir (`Path`): + Path to the local directory in which files are downloaded. + filename (`str`): + Path of the file in the repo. + + Return: + `[LocalDownloadFileMetadata]` or `None`: the metadata if it exists, `None` otherwise. + """ + paths = get_local_download_paths(local_dir, filename) + with WeakFileLock(paths.lock_path): + if paths.metadata_path.exists(): + try: + with paths.metadata_path.open() as f: + commit_hash = f.readline().strip() + etag = f.readline().strip() + timestamp = float(f.readline().strip()) + metadata = LocalDownloadFileMetadata( + filename=filename, + commit_hash=commit_hash, + etag=etag, + timestamp=timestamp, + ) + except Exception as e: + # remove the metadata file if it is corrupted / not the right format + logger.warning( + f"Invalid metadata file {paths.metadata_path}: {e}. Removing it from disk and continue." + ) + try: + paths.metadata_path.unlink() + except Exception as e: + logger.warning(f"Could not remove corrupted metadata file {paths.metadata_path}: {e}") + + try: + # check if the file exists and hasn't been modified since the metadata was saved + stat = paths.file_path.stat() + if ( + stat.st_mtime - 1 <= metadata.timestamp + ): # allow 1s difference as stat.st_mtime might not be precise + return metadata + logger.info(f"Ignored metadata for '{filename}' (outdated). Will re-compute hash.") + except FileNotFoundError: + # file does not exist => metadata is outdated + return None + return None + + +def read_upload_metadata(local_dir: Path, filename: str) -> LocalUploadFileMetadata: + """Read metadata about a file in the local directory related to an upload process. + + TODO: factorize logic with `read_download_metadata`. + + Args: + local_dir (`Path`): + Path to the local directory in which files are downloaded. + filename (`str`): + Path of the file in the repo. + + Return: + `[LocalUploadFileMetadata]` or `None`: the metadata if it exists, `None` otherwise. + """ + paths = get_local_upload_paths(local_dir, filename) + with WeakFileLock(paths.lock_path): + if paths.metadata_path.exists(): + try: + with paths.metadata_path.open() as f: + timestamp = float(f.readline().strip()) + + size = int(f.readline().strip()) # never None + + _should_ignore = f.readline().strip() + should_ignore = None if _should_ignore == "" else bool(int(_should_ignore)) + + _sha256 = f.readline().strip() + sha256 = None if _sha256 == "" else _sha256 + + _upload_mode = f.readline().strip() + upload_mode = None if _upload_mode == "" else _upload_mode + if upload_mode not in (None, "regular", "lfs"): + raise ValueError(f"Invalid upload mode in metadata {paths.path_in_repo}: {upload_mode}") + + _remote_oid = f.readline().strip() + remote_oid = None if _remote_oid == "" else _remote_oid + + is_uploaded = bool(int(f.readline().strip())) + is_committed = bool(int(f.readline().strip())) + + metadata = LocalUploadFileMetadata( + timestamp=timestamp, + size=size, + should_ignore=should_ignore, + sha256=sha256, + upload_mode=upload_mode, + remote_oid=remote_oid, + is_uploaded=is_uploaded, + is_committed=is_committed, + ) + except Exception as e: + # remove the metadata file if it is corrupted / not the right format + logger.warning( + f"Invalid metadata file {paths.metadata_path}: {e}. Removing it from disk and continue." + ) + try: + paths.metadata_path.unlink() + except Exception as e: + logger.warning(f"Could not remove corrupted metadata file {paths.metadata_path}: {e}") + + # TODO: can we do better? + if ( + metadata.timestamp is not None + and metadata.is_uploaded # file was uploaded + and not metadata.is_committed # but not committed + and time.time() - metadata.timestamp > 20 * 3600 # and it's been more than 20 hours + ): # => we consider it as garbage-collected by S3 + metadata.is_uploaded = False + + # check if the file exists and hasn't been modified since the metadata was saved + try: + if metadata.timestamp is not None and paths.file_path.stat().st_mtime <= metadata.timestamp: + return metadata + logger.info(f"Ignored metadata for '{filename}' (outdated). Will re-compute hash.") + except FileNotFoundError: + # file does not exist => metadata is outdated + pass + + # empty metadata => we don't know anything expect its size + return LocalUploadFileMetadata(size=paths.file_path.stat().st_size) + + +def write_download_metadata(local_dir: Path, filename: str, commit_hash: str, etag: str) -> None: + """Write metadata about a file in the local directory related to a download process. + + Args: + local_dir (`Path`): + Path to the local directory in which files are downloaded. + """ + paths = get_local_download_paths(local_dir, filename) + with WeakFileLock(paths.lock_path): + with paths.metadata_path.open("w") as f: + f.write(f"{commit_hash}\n{etag}\n{time.time()}\n") + + +def _huggingface_dir(local_dir: Path) -> Path: + """Return the path to the `.cache/huggingface` directory in a local directory.""" + # Wrap in lru_cache to avoid overwriting the .gitignore file if called multiple times + path = local_dir / ".cache" / "huggingface" + path.mkdir(exist_ok=True, parents=True) + + # Create a .gitignore file in the .cache/huggingface directory if it doesn't exist + # Should be thread-safe enough like this. + gitignore = path / ".gitignore" + gitignore_lock = path / ".gitignore.lock" + if not gitignore.exists(): + try: + with WeakFileLock(gitignore_lock, timeout=0.1): + gitignore.write_text("*") + except IndexError: + pass + except OSError: # TimeoutError, FileNotFoundError, PermissionError, etc. + pass + try: + gitignore_lock.unlink() + except OSError: + pass + return path + + +def _short_hash(filename: str) -> str: + return base64.urlsafe_b64encode(hashlib.sha1(filename.encode()).digest()).decode() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_login.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_login.py new file mode 100644 index 0000000000000000000000000000000000000000..8f721b68348fc3abeb2f90b6a756cb125ce19571 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_login.py @@ -0,0 +1,514 @@ +# Copyright 2020 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains methods to log in to the Hub.""" + +import os +import subprocess +from getpass import getpass +from pathlib import Path +from typing import Optional + +from . import constants +from .commands._cli_utils import ANSI +from .utils import ( + capture_output, + get_token, + is_google_colab, + is_notebook, + list_credential_helpers, + logging, + run_subprocess, + set_git_credential, + unset_git_credential, +) +from .utils._auth import ( + _get_token_by_name, + _get_token_from_environment, + _get_token_from_file, + _get_token_from_google_colab, + _save_stored_tokens, + _save_token, + get_stored_tokens, +) +from .utils._deprecation import _deprecate_arguments, _deprecate_positional_args + + +logger = logging.get_logger(__name__) + +_HF_LOGO_ASCII = """ + _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_| + _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_| + _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_| +""" + + +@_deprecate_arguments( + version="1.0", + deprecated_args="write_permission", + custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.", +) +@_deprecate_positional_args(version="1.0") +def login( + token: Optional[str] = None, + *, + add_to_git_credential: bool = False, + new_session: bool = True, + write_permission: bool = False, +) -> None: + """Login the machine to access the Hub. + + The `token` is persisted in cache and set as a git credential. Once done, the machine + is logged in and the access token will be available across all `huggingface_hub` + components. If `token` is not provided, it will be prompted to the user either with + a widget (in a notebook) or via the terminal. + + To log in from outside of a script, one can also use `hf auth login` which is + a cli command that wraps [`login`]. + + > [!TIP] + > [`login`] is a drop-in replacement method for [`notebook_login`] as it wraps and + > extends its capabilities. + + > [!TIP] + > When the token is not passed, [`login`] will automatically detect if the script runs + > in a notebook or not. However, this detection might not be accurate due to the + > variety of notebooks that exists nowadays. If that is the case, you can always force + > the UI by using [`notebook_login`] or [`interpreter_login`]. + + Args: + token (`str`, *optional*): + User access token to generate from https://huggingface.co/settings/token. + add_to_git_credential (`bool`, defaults to `False`): + If `True`, token will be set as git credential. If no git credential helper + is configured, a warning will be displayed to the user. If `token` is `None`, + the value of `add_to_git_credential` is ignored and will be prompted again + to the end user. + new_session (`bool`, defaults to `True`): + If `True`, will request a token even if one is already saved on the machine. + write_permission (`bool`): + Ignored and deprecated argument. + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If an organization token is passed. Only personal account tokens are valid + to log in. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If token is invalid. + [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError) + If running in a notebook but `ipywidgets` is not installed. + """ + if token is not None: + if not add_to_git_credential: + logger.info( + "The token has not been saved to the git credentials helper. Pass " + "`add_to_git_credential=True` in this function directly or " + "`--add-to-git-credential` if using via `hf`CLI if " + "you want to set the git credential as well." + ) + _login(token, add_to_git_credential=add_to_git_credential) + elif is_notebook(): + notebook_login(new_session=new_session) + else: + interpreter_login(new_session=new_session) + + +def logout(token_name: Optional[str] = None) -> None: + """Logout the machine from the Hub. + + Token is deleted from the machine and removed from git credential. + + Args: + token_name (`str`, *optional*): + Name of the access token to logout from. If `None`, will logout from all saved access tokens. + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError): + If the access token name is not found. + """ + if get_token() is None and not get_stored_tokens(): # No active token and no saved access tokens + logger.warning("Not logged in!") + return + if not token_name: + # Delete all saved access tokens and token + for file_path in (constants.HF_TOKEN_PATH, constants.HF_STORED_TOKENS_PATH): + try: + Path(file_path).unlink() + except FileNotFoundError: + pass + logger.info("Successfully logged out from all access tokens.") + else: + _logout_from_token(token_name) + logger.info(f"Successfully logged out from access token: {token_name}.") + + unset_git_credential() + + # Check if still logged in + if _get_token_from_google_colab() is not None: + raise EnvironmentError( + "You are automatically logged in using a Google Colab secret.\n" + "To log out, you must unset the `HF_TOKEN` secret in your Colab settings." + ) + if _get_token_from_environment() is not None: + raise EnvironmentError( + "Token has been deleted from your machine but you are still logged in.\n" + "To log out, you must clear out both `HF_TOKEN` and `HUGGING_FACE_HUB_TOKEN` environment variables." + ) + + +def auth_switch(token_name: str, add_to_git_credential: bool = False) -> None: + """Switch to a different access token. + + Args: + token_name (`str`): + Name of the access token to switch to. + add_to_git_credential (`bool`, defaults to `False`): + If `True`, token will be set as git credential. If no git credential helper + is configured, a warning will be displayed to the user. If `token` is `None`, + the value of `add_to_git_credential` is ignored and will be prompted again + to the end user. + + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError): + If the access token name is not found. + """ + token = _get_token_by_name(token_name) + if not token: + raise ValueError(f"Access token {token_name} not found in {constants.HF_STORED_TOKENS_PATH}") + # Write token to HF_TOKEN_PATH + _set_active_token(token_name, add_to_git_credential) + logger.info(f"The current active token is: {token_name}") + token_from_environment = _get_token_from_environment() + if token_from_environment is not None and token_from_environment != token: + logger.warning( + "The environment variable `HF_TOKEN` is set and will override the access token you've just switched to." + ) + + +def auth_list() -> None: + """List all stored access tokens.""" + tokens = get_stored_tokens() + + if not tokens: + logger.info("No access tokens found.") + return + # Find current token + current_token = get_token() + current_token_name = None + for token_name in tokens: + if tokens.get(token_name) == current_token: + current_token_name = token_name + # Print header + max_offset = max(len("token"), max(len(token) for token in tokens)) + 2 + print(f" {{:<{max_offset}}}| {{:<15}}".format("name", "token")) + print("-" * (max_offset + 2) + "|" + "-" * 15) + + # Print saved access tokens + for token_name in tokens: + token = tokens.get(token_name, "") + masked_token = f"{token[:3]}****{token[-4:]}" if token != "" else token + is_current = "*" if token == current_token else " " + + print(f"{is_current} {{:<{max_offset}}}| {{:<15}}".format(token_name, masked_token)) + + if _get_token_from_environment(): + logger.warning( + "\nNote: Environment variable `HF_TOKEN` is set and is the current active token independently from the stored tokens listed above." + ) + elif current_token_name is None: + logger.warning( + "\nNote: No active token is set and no environment variable `HF_TOKEN` is found. Use `hf auth login` to log in." + ) + + +### +# Interpreter-based login (text) +### + + +@_deprecate_arguments( + version="1.0", + deprecated_args="write_permission", + custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.", +) +@_deprecate_positional_args(version="1.0") +def interpreter_login(*, new_session: bool = True, write_permission: bool = False) -> None: + """ + Displays a prompt to log in to the HF website and store the token. + + This is equivalent to [`login`] without passing a token when not run in a notebook. + [`interpreter_login`] is useful if you want to force the use of the terminal prompt + instead of a notebook widget. + + For more details, see [`login`]. + + Args: + new_session (`bool`, defaults to `True`): + If `True`, will request a token even if one is already saved on the machine. + write_permission (`bool`): + Ignored and deprecated argument. + """ + if not new_session and get_token() is not None: + logger.info("User is already logged in.") + return + + from .commands.delete_cache import _ask_for_confirmation_no_tui + + print(_HF_LOGO_ASCII) + if get_token() is not None: + logger.info( + " A token is already saved on your machine. Run `hf auth whoami`" + " to get more information or `hf auth logout` if you want" + " to log out." + ) + logger.info(" Setting a new token will erase the existing one.") + + logger.info( + " To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens ." + ) + if os.name == "nt": + logger.info("Token can be pasted using 'Right-Click'.") + token = getpass("Enter your token (input will not be visible): ") + add_to_git_credential = _ask_for_confirmation_no_tui("Add token as git credential?") + + _login(token=token, add_to_git_credential=add_to_git_credential) + + +### +# Notebook-based login (widget) +### + +NOTEBOOK_LOGIN_PASSWORD_HTML = """

Immediately click login after typing your password or +it might be stored in plain text in this notebook file.
""" + + +NOTEBOOK_LOGIN_TOKEN_HTML_START = """

Copy a token from your Hugging Face +tokens page and paste it below.
Immediately click login after copying +your token or it might be stored in plain text in this notebook file.
""" + + +NOTEBOOK_LOGIN_TOKEN_HTML_END = """ +Pro Tip: If you don't already have one, you can create a dedicated +'notebooks' token with 'write' access, that you can then easily reuse for all +notebooks. """ + + +@_deprecate_arguments( + version="1.0", + deprecated_args="write_permission", + custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.", +) +@_deprecate_positional_args(version="1.0") +def notebook_login(*, new_session: bool = True, write_permission: bool = False) -> None: + """ + Displays a widget to log in to the HF website and store the token. + + This is equivalent to [`login`] without passing a token when run in a notebook. + [`notebook_login`] is useful if you want to force the use of the notebook widget + instead of a prompt in the terminal. + + For more details, see [`login`]. + + Args: + new_session (`bool`, defaults to `True`): + If `True`, will request a token even if one is already saved on the machine. + write_permission (`bool`): + Ignored and deprecated argument. + """ + try: + import ipywidgets.widgets as widgets # type: ignore + from IPython.display import display # type: ignore + except ImportError: + raise ImportError( + "The `notebook_login` function can only be used in a notebook (Jupyter or" + " Colab) and you need the `ipywidgets` module: `pip install ipywidgets`." + ) + if not new_session and get_token() is not None: + logger.info("User is already logged in.") + return + + box_layout = widgets.Layout(display="flex", flex_flow="column", align_items="center", width="50%") + + token_widget = widgets.Password(description="Token:") + git_checkbox_widget = widgets.Checkbox(value=True, description="Add token as git credential?") + token_finish_button = widgets.Button(description="Login") + + login_token_widget = widgets.VBox( + [ + widgets.HTML(NOTEBOOK_LOGIN_TOKEN_HTML_START), + token_widget, + git_checkbox_widget, + token_finish_button, + widgets.HTML(NOTEBOOK_LOGIN_TOKEN_HTML_END), + ], + layout=box_layout, + ) + display(login_token_widget) + + # On click events + def login_token_event(t): + """Event handler for the login button.""" + token = token_widget.value + add_to_git_credential = git_checkbox_widget.value + # Erase token and clear value to make sure it's not saved in the notebook. + token_widget.value = "" + # Hide inputs + login_token_widget.children = [widgets.Label("Connecting...")] + try: + with capture_output() as captured: + _login(token, add_to_git_credential=add_to_git_credential) + message = captured.getvalue() + except Exception as error: + message = str(error) + # Print result (success message or error) + login_token_widget.children = [widgets.Label(line) for line in message.split("\n") if line.strip()] + + token_finish_button.on_click(login_token_event) + + +### +# Login private helpers +### + + +def _login( + token: str, + add_to_git_credential: bool, +) -> None: + from .hf_api import whoami # avoid circular import + + if token.startswith("api_org"): + raise ValueError("You must use your personal account token, not an organization token.") + + token_info = whoami(token) + permission = token_info["auth"]["accessToken"]["role"] + logger.info(f"Token is valid (permission: {permission}).") + + token_name = token_info["auth"]["accessToken"]["displayName"] + # Store token locally + _save_token(token=token, token_name=token_name) + # Set active token + _set_active_token(token_name=token_name, add_to_git_credential=add_to_git_credential) + logger.info("Login successful.") + if _get_token_from_environment(): + logger.warning( + "Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured." + ) + else: + logger.info(f"The current active token is: `{token_name}`") + + +def _logout_from_token(token_name: str) -> None: + """Logout from a specific access token. + + Args: + token_name (`str`): + The name of the access token to logout from. + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError): + If the access token name is not found. + """ + stored_tokens = get_stored_tokens() + # If there is no access tokens saved or the access token name is not found, do nothing + if not stored_tokens or token_name not in stored_tokens: + return + + token = stored_tokens.pop(token_name) + _save_stored_tokens(stored_tokens) + + if token == _get_token_from_file(): + logger.warning(f"Active token '{token_name}' has been deleted.") + Path(constants.HF_TOKEN_PATH).unlink(missing_ok=True) + + +def _set_active_token( + token_name: str, + add_to_git_credential: bool, +) -> None: + """Set the active access token. + + Args: + token_name (`str`): + The name of the token to set as active. + """ + token = _get_token_by_name(token_name) + if not token: + raise ValueError(f"Token {token_name} not found in {constants.HF_STORED_TOKENS_PATH}") + if add_to_git_credential: + if _is_git_credential_helper_configured(): + set_git_credential(token) + logger.info( + "Your token has been saved in your configured git credential helpers" + + f" ({','.join(list_credential_helpers())})." + ) + else: + logger.warning("Token has not been saved to git credential helper.") + # Write token to HF_TOKEN_PATH + path = Path(constants.HF_TOKEN_PATH) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(token) + logger.info(f"Your token has been saved to {constants.HF_TOKEN_PATH}") + + +def _is_git_credential_helper_configured() -> bool: + """Check if a git credential helper is configured. + + Warns user if not the case (except for Google Colab where "store" is set by default + by `huggingface_hub`). + """ + helpers = list_credential_helpers() + if len(helpers) > 0: + return True # Do not warn: at least 1 helper is set + + # Only in Google Colab to avoid the warning message + # See https://github.com/huggingface/huggingface_hub/issues/1043#issuecomment-1247010710 + if is_google_colab(): + _set_store_as_git_credential_helper_globally() + return True # Do not warn: "store" is used by default in Google Colab + + # Otherwise, warn user + print( + ANSI.red( + "Cannot authenticate through git-credential as no helper is defined on your" + " machine.\nYou might have to re-authenticate when pushing to the Hugging" + " Face Hub.\nRun the following command in your terminal in case you want to" + " set the 'store' credential helper as default.\n\ngit config --global" + " credential.helper store\n\nRead" + " https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more" + " details." + ) + ) + return False + + +def _set_store_as_git_credential_helper_globally() -> None: + """Set globally the credential.helper to `store`. + + To be used only in Google Colab as we assume the user doesn't care about the git + credential config. It is the only particular case where we don't want to display the + warning message in [`notebook_login()`]. + + Related: + - https://github.com/huggingface/huggingface_hub/issues/1043 + - https://github.com/huggingface/huggingface_hub/issues/1051 + - https://git-scm.com/docs/git-credential-store + """ + try: + run_subprocess("git config --global credential.helper store") + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_oauth.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_oauth.py new file mode 100644 index 0000000000000000000000000000000000000000..9f8eb607962bc18fec348fed18ce269524983e23 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_oauth.py @@ -0,0 +1,460 @@ +import datetime +import hashlib +import logging +import os +import time +import urllib.parse +import warnings +from dataclasses import dataclass +from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Union + +from . import constants +from .hf_api import whoami +from .utils import experimental, get_token + + +logger = logging.getLogger(__name__) + +if TYPE_CHECKING: + import fastapi + + +@dataclass +class OAuthOrgInfo: + """ + Information about an organization linked to a user logged in with OAuth. + + Attributes: + sub (`str`): + Unique identifier for the org. OpenID Connect field. + name (`str`): + The org's full name. OpenID Connect field. + preferred_username (`str`): + The org's username. OpenID Connect field. + picture (`str`): + The org's profile picture URL. OpenID Connect field. + is_enterprise (`bool`): + Whether the org is an enterprise org. Hugging Face field. + can_pay (`Optional[bool]`, *optional*): + Whether the org has a payment method set up. Hugging Face field. + role_in_org (`Optional[str]`, *optional*): + The user's role in the org. Hugging Face field. + security_restrictions (`Optional[List[Literal["ip", "token-policy", "mfa", "sso"]]]`, *optional*): + Array of security restrictions that the user hasn't completed for this org. Possible values: "ip", "token-policy", "mfa", "sso". Hugging Face field. + """ + + sub: str + name: str + preferred_username: str + picture: str + is_enterprise: bool + can_pay: Optional[bool] = None + role_in_org: Optional[str] = None + security_restrictions: Optional[List[Literal["ip", "token-policy", "mfa", "sso"]]] = None + + +@dataclass +class OAuthUserInfo: + """ + Information about a user logged in with OAuth. + + Attributes: + sub (`str`): + Unique identifier for the user, even in case of rename. OpenID Connect field. + name (`str`): + The user's full name. OpenID Connect field. + preferred_username (`str`): + The user's username. OpenID Connect field. + email_verified (`Optional[bool]`, *optional*): + Indicates if the user's email is verified. OpenID Connect field. + email (`Optional[str]`, *optional*): + The user's email address. OpenID Connect field. + picture (`str`): + The user's profile picture URL. OpenID Connect field. + profile (`str`): + The user's profile URL. OpenID Connect field. + website (`Optional[str]`, *optional*): + The user's website URL. OpenID Connect field. + is_pro (`bool`): + Whether the user is a pro user. Hugging Face field. + can_pay (`Optional[bool]`, *optional*): + Whether the user has a payment method set up. Hugging Face field. + orgs (`Optional[List[OrgInfo]]`, *optional*): + List of organizations the user is part of. Hugging Face field. + """ + + sub: str + name: str + preferred_username: str + email_verified: Optional[bool] + email: Optional[str] + picture: str + profile: str + website: Optional[str] + is_pro: bool + can_pay: Optional[bool] + orgs: Optional[List[OAuthOrgInfo]] + + +@dataclass +class OAuthInfo: + """ + Information about the OAuth login. + + Attributes: + access_token (`str`): + The access token. + access_token_expires_at (`datetime.datetime`): + The expiration date of the access token. + user_info ([`OAuthUserInfo`]): + The user information. + state (`str`, *optional*): + State passed to the OAuth provider in the original request to the OAuth provider. + scope (`str`): + Granted scope. + """ + + access_token: str + access_token_expires_at: datetime.datetime + user_info: OAuthUserInfo + state: Optional[str] + scope: str + + +@experimental +def attach_huggingface_oauth(app: "fastapi.FastAPI", route_prefix: str = "/"): + """ + Add OAuth endpoints to a FastAPI app to enable OAuth login with Hugging Face. + + How to use: + - Call this method on your FastAPI app to add the OAuth endpoints. + - Inside your route handlers, call `parse_huggingface_oauth(request)` to retrieve the OAuth info. + - If user is logged in, an [`OAuthInfo`] object is returned with the user's info. If not, `None` is returned. + - In your app, make sure to add links to `/oauth/huggingface/login` and `/oauth/huggingface/logout` for the user to log in and out. + + Example: + ```py + from huggingface_hub import attach_huggingface_oauth, parse_huggingface_oauth + + # Create a FastAPI app + app = FastAPI() + + # Add OAuth endpoints to the FastAPI app + attach_huggingface_oauth(app) + + # Add a route that greets the user if they are logged in + @app.get("/") + def greet_json(request: Request): + # Retrieve the OAuth info from the request + oauth_info = parse_huggingface_oauth(request) # e.g. OAuthInfo dataclass + if oauth_info is None: + return {"msg": "Not logged in!"} + return {"msg": f"Hello, {oauth_info.user_info.preferred_username}!"} + ``` + """ + # TODO: handle generic case (handling OAuth in a non-Space environment with custom dev values) (low priority) + + # Add SessionMiddleware to the FastAPI app to store the OAuth info in the session. + # Session Middleware requires a secret key to sign the cookies. Let's use a hash + # of the OAuth secret key to make it unique to the Space + updated in case OAuth + # config gets updated. When ran locally, we use an empty string as a secret key. + try: + from starlette.middleware.sessions import SessionMiddleware + except ImportError as e: + raise ImportError( + "Cannot initialize OAuth to due a missing library. Please run `pip install huggingface_hub[oauth]` or add " + "`huggingface_hub[oauth]` to your requirements.txt file in order to install the required dependencies." + ) from e + session_secret = (constants.OAUTH_CLIENT_SECRET or "") + "-v1" + app.add_middleware( + SessionMiddleware, # type: ignore[arg-type] + secret_key=hashlib.sha256(session_secret.encode()).hexdigest(), + same_site="none", + https_only=True, + ) # type: ignore + + # Add OAuth endpoints to the FastAPI app: + # - {route_prefix}/oauth/huggingface/login + # - {route_prefix}/oauth/huggingface/callback + # - {route_prefix}/oauth/huggingface/logout + # If the app is running in a Space, OAuth is enabled normally. + # Otherwise, we mock the endpoints to make the user log in with a fake user profile - without any calls to hf.co. + route_prefix = route_prefix.strip("/") + if os.getenv("SPACE_ID") is not None: + logger.info("OAuth is enabled in the Space. Adding OAuth routes.") + _add_oauth_routes(app, route_prefix=route_prefix) + else: + logger.info("App is not running in a Space. Adding mocked OAuth routes.") + _add_mocked_oauth_routes(app, route_prefix=route_prefix) + + +def parse_huggingface_oauth(request: "fastapi.Request") -> Optional[OAuthInfo]: + """ + Returns the information from a logged in user as a [`OAuthInfo`] object. + + For flexibility and future-proofing, this method is very lax in its parsing and does not raise errors. + Missing fields are set to `None` without a warning. + + Return `None`, if the user is not logged in (no info in session cookie). + + See [`attach_huggingface_oauth`] for an example on how to use this method. + """ + if "oauth_info" not in request.session: + logger.debug("No OAuth info in session.") + return None + + logger.debug("Parsing OAuth info from session.") + oauth_data = request.session["oauth_info"] + user_data = oauth_data.get("userinfo", {}) + orgs_data = user_data.get("orgs", []) + + orgs = ( + [ + OAuthOrgInfo( + sub=org.get("sub"), + name=org.get("name"), + preferred_username=org.get("preferred_username"), + picture=org.get("picture"), + is_enterprise=org.get("isEnterprise"), + can_pay=org.get("canPay"), + role_in_org=org.get("roleInOrg"), + security_restrictions=org.get("securityRestrictions"), + ) + for org in orgs_data + ] + if orgs_data + else None + ) + + user_info = OAuthUserInfo( + sub=user_data.get("sub"), + name=user_data.get("name"), + preferred_username=user_data.get("preferred_username"), + email_verified=user_data.get("email_verified"), + email=user_data.get("email"), + picture=user_data.get("picture"), + profile=user_data.get("profile"), + website=user_data.get("website"), + is_pro=user_data.get("isPro"), + can_pay=user_data.get("canPay"), + orgs=orgs, + ) + + return OAuthInfo( + access_token=oauth_data.get("access_token"), + access_token_expires_at=datetime.datetime.fromtimestamp(oauth_data.get("expires_at")), + user_info=user_info, + state=oauth_data.get("state"), + scope=oauth_data.get("scope"), + ) + + +def _add_oauth_routes(app: "fastapi.FastAPI", route_prefix: str) -> None: + """Add OAuth routes to the FastAPI app (login, callback handler and logout).""" + try: + import fastapi + from authlib.integrations.base_client.errors import MismatchingStateError + from authlib.integrations.starlette_client import OAuth + from fastapi.responses import RedirectResponse + except ImportError as e: + raise ImportError( + "Cannot initialize OAuth to due a missing library. Please run `pip install huggingface_hub[oauth]` or add " + "`huggingface_hub[oauth]` to your requirements.txt file." + ) from e + + # Check environment variables + msg = ( + "OAuth is required but '{}' environment variable is not set. Make sure you've enabled OAuth in your Space by" + " setting `hf_oauth: true` in the Space metadata." + ) + if constants.OAUTH_CLIENT_ID is None: + raise ValueError(msg.format("OAUTH_CLIENT_ID")) + if constants.OAUTH_CLIENT_SECRET is None: + raise ValueError(msg.format("OAUTH_CLIENT_SECRET")) + if constants.OAUTH_SCOPES is None: + raise ValueError(msg.format("OAUTH_SCOPES")) + if constants.OPENID_PROVIDER_URL is None: + raise ValueError(msg.format("OPENID_PROVIDER_URL")) + + # Register OAuth server + oauth = OAuth() + oauth.register( + name="huggingface", + client_id=constants.OAUTH_CLIENT_ID, + client_secret=constants.OAUTH_CLIENT_SECRET, + client_kwargs={"scope": constants.OAUTH_SCOPES}, + server_metadata_url=constants.OPENID_PROVIDER_URL + "/.well-known/openid-configuration", + ) + + login_uri, callback_uri, logout_uri = _get_oauth_uris(route_prefix) + + # Register OAuth endpoints + @app.get(login_uri) + async def oauth_login(request: fastapi.Request) -> RedirectResponse: + """Endpoint that redirects to HF OAuth page.""" + redirect_uri = _generate_redirect_uri(request) + return await oauth.huggingface.authorize_redirect(request, redirect_uri) # type: ignore + + @app.get(callback_uri) + async def oauth_redirect_callback(request: fastapi.Request) -> RedirectResponse: + """Endpoint that handles the OAuth callback.""" + try: + oauth_info = await oauth.huggingface.authorize_access_token(request) # type: ignore + except MismatchingStateError: + # Parse query params + nb_redirects = int(request.query_params.get("_nb_redirects", 0)) + target_url = request.query_params.get("_target_url") + + # Build redirect URI with the same query params as before and bump nb_redirects count + query_params: Dict[str, Union[int, str]] = {"_nb_redirects": nb_redirects + 1} + if target_url: + query_params["_target_url"] = target_url + + redirect_uri = f"{login_uri}?{urllib.parse.urlencode(query_params)}" + + # If the user is redirected more than 3 times, it is very likely that the cookie is not working properly. + # (e.g. browser is blocking third-party cookies in iframe). In this case, redirect the user in the + # non-iframe view. + if nb_redirects > constants.OAUTH_MAX_REDIRECTS: + host = os.environ.get("SPACE_HOST") + if host is None: # cannot happen in a Space + raise RuntimeError( + "App is not running in a Space (SPACE_HOST environment variable is not set). Cannot redirect to non-iframe view." + ) from None + host_url = "https://" + host.rstrip("/") + return RedirectResponse(host_url + redirect_uri) + + # Redirect the user to the login page again + return RedirectResponse(redirect_uri) + + # OAuth login worked => store the user info in the session and redirect + logger.debug("Successfully logged in with OAuth. Storing user info in session.") + request.session["oauth_info"] = oauth_info + return RedirectResponse(_get_redirect_target(request)) + + @app.get(logout_uri) + async def oauth_logout(request: fastapi.Request) -> RedirectResponse: + """Endpoint that logs out the user (e.g. delete info from cookie session).""" + logger.debug("Logged out with OAuth. Removing user info from session.") + request.session.pop("oauth_info", None) + return RedirectResponse(_get_redirect_target(request)) + + +def _add_mocked_oauth_routes(app: "fastapi.FastAPI", route_prefix: str = "/") -> None: + """Add fake oauth routes if app is run locally and OAuth is enabled. + + Using OAuth will have the same behavior as in a Space but instead of authenticating with HF, a mocked user profile + is added to the session. + """ + try: + import fastapi + from fastapi.responses import RedirectResponse + from starlette.datastructures import URL + except ImportError as e: + raise ImportError( + "Cannot initialize OAuth to due a missing library. Please run `pip install huggingface_hub[oauth]` or add " + "`huggingface_hub[oauth]` to your requirements.txt file." + ) from e + + warnings.warn( + "OAuth is not supported outside of a Space environment. To help you debug your app locally, the oauth endpoints" + " are mocked to return your profile and token. To make it work, your machine must be logged in to Huggingface." + ) + mocked_oauth_info = _get_mocked_oauth_info() + + login_uri, callback_uri, logout_uri = _get_oauth_uris(route_prefix) + + # Define OAuth routes + @app.get(login_uri) + async def oauth_login(request: fastapi.Request) -> RedirectResponse: + """Fake endpoint that redirects to HF OAuth page.""" + # Define target (where to redirect after login) + redirect_uri = _generate_redirect_uri(request) + return RedirectResponse(callback_uri + "?" + urllib.parse.urlencode({"_target_url": redirect_uri})) + + @app.get(callback_uri) + async def oauth_redirect_callback(request: fastapi.Request) -> RedirectResponse: + """Endpoint that handles the OAuth callback.""" + request.session["oauth_info"] = mocked_oauth_info + return RedirectResponse(_get_redirect_target(request)) + + @app.get(logout_uri) + async def oauth_logout(request: fastapi.Request) -> RedirectResponse: + """Endpoint that logs out the user (e.g. delete cookie session).""" + request.session.pop("oauth_info", None) + logout_url = URL("/").include_query_params(**request.query_params) + return RedirectResponse(url=logout_url, status_code=302) # see https://github.com/gradio-app/gradio/pull/9659 + + +def _generate_redirect_uri(request: "fastapi.Request") -> str: + if "_target_url" in request.query_params: + # if `_target_url` already in query params => respect it + target = request.query_params["_target_url"] + else: + # otherwise => keep query params + target = "/?" + urllib.parse.urlencode(request.query_params) + + redirect_uri = request.url_for("oauth_redirect_callback").include_query_params(_target_url=target) + redirect_uri_as_str = str(redirect_uri) + if redirect_uri.netloc.endswith(".hf.space"): + # In Space, FastAPI redirect as http but we want https + redirect_uri_as_str = redirect_uri_as_str.replace("http://", "https://") + return redirect_uri_as_str + + +def _get_redirect_target(request: "fastapi.Request", default_target: str = "/") -> str: + return request.query_params.get("_target_url", default_target) + + +def _get_mocked_oauth_info() -> Dict: + token = get_token() + if token is None: + raise ValueError( + "Your machine must be logged in to HF to debug an OAuth app locally. Please" + " run `hf auth login` or set `HF_TOKEN` as environment variable " + "with one of your access token. You can generate a new token in your " + "settings page (https://huggingface.co/settings/tokens)." + ) + + user = whoami() + if user["type"] != "user": + raise ValueError( + "Your machine is not logged in with a personal account. Please use a " + "personal access token. You can generate a new token in your settings page" + " (https://huggingface.co/settings/tokens)." + ) + + return { + "access_token": token, + "token_type": "bearer", + "expires_in": 8 * 60 * 60, # 8 hours + "id_token": "FOOBAR", + "scope": "openid profile", + "refresh_token": "hf_oauth__refresh_token", + "expires_at": int(time.time()) + 8 * 60 * 60, # 8 hours + "userinfo": { + "sub": "0123456789", + "name": user["fullname"], + "preferred_username": user["name"], + "profile": f"https://huggingface.co/{user['name']}", + "picture": user["avatarUrl"], + "website": "", + "aud": "00000000-0000-0000-0000-000000000000", + "auth_time": 1691672844, + "nonce": "aaaaaaaaaaaaaaaaaaa", + "iat": 1691672844, + "exp": 1691676444, + "iss": "https://huggingface.co", + }, + } + + +def _get_oauth_uris(route_prefix: str = "/") -> Tuple[str, str, str]: + route_prefix = route_prefix.strip("/") + if route_prefix: + route_prefix = f"/{route_prefix}" + return ( + f"{route_prefix}/oauth/huggingface/login", + f"{route_prefix}/oauth/huggingface/callback", + f"{route_prefix}/oauth/huggingface/logout", + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_snapshot_download.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_snapshot_download.py new file mode 100644 index 0000000000000000000000000000000000000000..0db8a29f7e65a4841590d033f6b7b51d46647bf0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_snapshot_download.py @@ -0,0 +1,343 @@ +import os +from pathlib import Path +from typing import Dict, Iterable, List, Literal, Optional, Type, Union + +import requests +from tqdm.auto import tqdm as base_tqdm +from tqdm.contrib.concurrent import thread_map + +from . import constants +from .errors import ( + GatedRepoError, + HfHubHTTPError, + LocalEntryNotFoundError, + RepositoryNotFoundError, + RevisionNotFoundError, +) +from .file_download import REGEX_COMMIT_HASH, hf_hub_download, repo_folder_name +from .hf_api import DatasetInfo, HfApi, ModelInfo, RepoFile, SpaceInfo +from .utils import OfflineModeIsEnabled, filter_repo_objects, logging, validate_hf_hub_args +from .utils import tqdm as hf_tqdm + + +logger = logging.get_logger(__name__) + +VERY_LARGE_REPO_THRESHOLD = 50000 # After this limit, we don't consider `repo_info.siblings` to be reliable enough + + +@validate_hf_hub_args +def snapshot_download( + repo_id: str, + *, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + cache_dir: Union[str, Path, None] = None, + local_dir: Union[str, Path, None] = None, + library_name: Optional[str] = None, + library_version: Optional[str] = None, + user_agent: Optional[Union[Dict, str]] = None, + proxies: Optional[Dict] = None, + etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT, + force_download: bool = False, + token: Optional[Union[bool, str]] = None, + local_files_only: bool = False, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + max_workers: int = 8, + tqdm_class: Optional[Type[base_tqdm]] = None, + headers: Optional[Dict[str, str]] = None, + endpoint: Optional[str] = None, + # Deprecated args + local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto", + resume_download: Optional[bool] = None, +) -> str: + """Download repo files. + + Download a whole snapshot of a repo's files at the specified revision. This is useful when you want all files from + a repo, because you don't know which ones you will need a priori. All files are nested inside a folder in order + to keep their actual filename relative to that folder. You can also filter which files to download using + `allow_patterns` and `ignore_patterns`. + + If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this + option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir` + to store some metadata related to the downloaded files. While this mechanism is not as robust as the main + cache-system, it's optimized for regularly pulling the latest version of a repository. + + An alternative would be to clone the repo but this requires git and git-lfs to be installed and properly + configured. It is also not possible to filter which files to download when cloning a repository using git. + + Args: + repo_id (`str`): + A user or an organization name and a repo name separated by a `/`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if downloading from a dataset or space, + `None` or `"model"` if downloading from a model. Default is `None`. + revision (`str`, *optional*): + An optional Git revision id which can be a branch name, a tag, or a + commit hash. + cache_dir (`str`, `Path`, *optional*): + Path to the folder where cached files are stored. + local_dir (`str` or `Path`, *optional*): + If provided, the downloaded files will be placed under this directory. + library_name (`str`, *optional*): + The name of the library to which the object corresponds. + library_version (`str`, *optional*): + The version of the library. + user_agent (`str`, `dict`, *optional*): + The user-agent info in the form of a dictionary or a string. + proxies (`dict`, *optional*): + Dictionary mapping protocol to the URL of the proxy passed to + `requests.request`. + etag_timeout (`float`, *optional*, defaults to `10`): + When fetching ETag, how many seconds to wait for the server to send + data before giving up which is passed to `requests.request`. + force_download (`bool`, *optional*, defaults to `False`): + Whether the file should be downloaded even if it already exists in the local cache. + token (`str`, `bool`, *optional*): + A token to be used for the download. + - If `True`, the token is read from the HuggingFace config + folder. + - If a string, it's used as the authentication token. + headers (`dict`, *optional*): + Additional headers to include in the request. Those headers take precedence over the others. + local_files_only (`bool`, *optional*, defaults to `False`): + If `True`, avoid downloading the file and return the path to the + local cached file if it exists. + allow_patterns (`List[str]` or `str`, *optional*): + If provided, only files matching at least one pattern are downloaded. + ignore_patterns (`List[str]` or `str`, *optional*): + If provided, files matching any of the patterns are not downloaded. + max_workers (`int`, *optional*): + Number of concurrent threads to download files (1 thread = 1 file download). + Defaults to 8. + tqdm_class (`tqdm`, *optional*): + If provided, overwrites the default behavior for the progress bar. Passed + argument must inherit from `tqdm.auto.tqdm` or at least mimic its behavior. + Note that the `tqdm_class` is not passed to each individual download. + Defaults to the custom HF progress bar that can be disabled by setting + `HF_HUB_DISABLE_PROGRESS_BARS` environment variable. + + Returns: + `str`: folder path of the repo snapshot. + + Raises: + [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + [`~utils.RevisionNotFoundError`] + If the revision to download from cannot be found. + [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) + If `token=True` and the token cannot be found. + [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if + ETag cannot be determined. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if some parameter value is invalid. + """ + if cache_dir is None: + cache_dir = constants.HF_HUB_CACHE + if revision is None: + revision = constants.DEFAULT_REVISION + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + if repo_type is None: + repo_type = "model" + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}") + + storage_folder = os.path.join(cache_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type)) + + api = HfApi( + library_name=library_name, + library_version=library_version, + user_agent=user_agent, + endpoint=endpoint, + headers=headers, + token=token, + ) + + repo_info: Union[ModelInfo, DatasetInfo, SpaceInfo, None] = None + api_call_error: Optional[Exception] = None + if not local_files_only: + # try/except logic to handle different errors => taken from `hf_hub_download` + try: + # if we have internet connection we want to list files to download + repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type, revision=revision) + except (requests.exceptions.SSLError, requests.exceptions.ProxyError): + # Actually raise for those subclasses of ConnectionError + raise + except ( + requests.exceptions.ConnectionError, + requests.exceptions.Timeout, + OfflineModeIsEnabled, + ) as error: + # Internet connection is down + # => will try to use local files only + api_call_error = error + pass + except RevisionNotFoundError: + # The repo was found but the revision doesn't exist on the Hub (never existed or got deleted) + raise + except requests.HTTPError as error: + # Multiple reasons for an http error: + # - Repository is private and invalid/missing token sent + # - Repository is gated and invalid/missing token sent + # - Hub is down (error 500 or 504) + # => let's switch to 'local_files_only=True' to check if the files are already cached. + # (if it's not the case, the error will be re-raised) + api_call_error = error + pass + + # At this stage, if `repo_info` is None it means either: + # - internet connection is down + # - internet connection is deactivated (local_files_only=True or HF_HUB_OFFLINE=True) + # - repo is private/gated and invalid/missing token sent + # - Hub is down + # => let's look if we can find the appropriate folder in the cache: + # - if the specified revision is a commit hash, look inside "snapshots". + # - f the specified revision is a branch or tag, look inside "refs". + # => if local_dir is not None, we will return the path to the local folder if it exists. + if repo_info is None: + # Try to get which commit hash corresponds to the specified revision + commit_hash = None + if REGEX_COMMIT_HASH.match(revision): + commit_hash = revision + else: + ref_path = os.path.join(storage_folder, "refs", revision) + if os.path.exists(ref_path): + # retrieve commit_hash from refs file + with open(ref_path) as f: + commit_hash = f.read() + + # Try to locate snapshot folder for this commit hash + if commit_hash is not None and local_dir is None: + snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash) + if os.path.exists(snapshot_folder): + # Snapshot folder exists => let's return it + # (but we can't check if all the files are actually there) + return snapshot_folder + + # If local_dir is not None, return it if it exists and is not empty + if local_dir is not None: + local_dir = Path(local_dir) + if local_dir.is_dir() and any(local_dir.iterdir()): + logger.warning( + f"Returning existing local_dir `{local_dir}` as remote repo cannot be accessed in `snapshot_download` ({api_call_error})." + ) + return str(local_dir.resolve()) + # If we couldn't find the appropriate folder on disk, raise an error. + if local_files_only: + raise LocalEntryNotFoundError( + "Cannot find an appropriate cached snapshot folder for the specified revision on the local disk and " + "outgoing traffic has been disabled. To enable repo look-ups and downloads online, pass " + "'local_files_only=False' as input." + ) + elif isinstance(api_call_error, OfflineModeIsEnabled): + raise LocalEntryNotFoundError( + "Cannot find an appropriate cached snapshot folder for the specified revision on the local disk and " + "outgoing traffic has been disabled. To enable repo look-ups and downloads online, set " + "'HF_HUB_OFFLINE=0' as environment variable." + ) from api_call_error + elif isinstance(api_call_error, (RepositoryNotFoundError, GatedRepoError)) or ( + isinstance(api_call_error, HfHubHTTPError) and api_call_error.response.status_code == 401 + ): + # Repo not found, gated, or specific authentication error => let's raise the actual error + raise api_call_error + else: + # Otherwise: most likely a connection issue or Hub downtime => let's warn the user + raise LocalEntryNotFoundError( + "An error happened while trying to locate the files on the Hub and we cannot find the appropriate" + " snapshot folder for the specified revision on the local disk. Please check your internet connection" + " and try again." + ) from api_call_error + + # At this stage, internet connection is up and running + # => let's download the files! + assert repo_info.sha is not None, "Repo info returned from server must have a revision sha." + + # Corner case: on very large repos, the siblings list in `repo_info` might not contain all files. + # In that case, we need to use the `list_repo_tree` method to prevent caching issues. + repo_files: Iterable[str] = [f.rfilename for f in repo_info.siblings] if repo_info.siblings is not None else [] + unreliable_nb_files = ( + repo_info.siblings is None + or len(repo_info.siblings) == 0 + or len(repo_info.siblings) > VERY_LARGE_REPO_THRESHOLD + ) + if unreliable_nb_files: + logger.info( + "Number of files in the repo is unreliable. Using `list_repo_tree` to ensure all files are listed." + ) + repo_files = ( + f.rfilename + for f in api.list_repo_tree(repo_id=repo_id, recursive=True, revision=revision, repo_type=repo_type) + if isinstance(f, RepoFile) + ) + + filtered_repo_files: Iterable[str] = filter_repo_objects( + items=repo_files, + allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, + ) + + if not unreliable_nb_files: + filtered_repo_files = list(filtered_repo_files) + tqdm_desc = f"Fetching {len(filtered_repo_files)} files" + else: + tqdm_desc = "Fetching ... files" + + commit_hash = repo_info.sha + snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash) + # if passed revision is not identical to commit_hash + # then revision has to be a branch name or tag name. + # In that case store a ref. + if revision != commit_hash: + ref_path = os.path.join(storage_folder, "refs", revision) + try: + os.makedirs(os.path.dirname(ref_path), exist_ok=True) + with open(ref_path, "w") as f: + f.write(commit_hash) + except OSError as e: + logger.warning(f"Ignored error while writing commit hash to {ref_path}: {e}.") + + # we pass the commit_hash to hf_hub_download + # so no network call happens if we already + # have the file locally. + def _inner_hf_hub_download(repo_file: str): + return hf_hub_download( + repo_id, + filename=repo_file, + repo_type=repo_type, + revision=commit_hash, + endpoint=endpoint, + cache_dir=cache_dir, + local_dir=local_dir, + local_dir_use_symlinks=local_dir_use_symlinks, + library_name=library_name, + library_version=library_version, + user_agent=user_agent, + proxies=proxies, + etag_timeout=etag_timeout, + resume_download=resume_download, + force_download=force_download, + token=token, + headers=headers, + ) + + if constants.HF_HUB_ENABLE_HF_TRANSFER: + # when using hf_transfer we don't want extra parallelism + # from the one hf_transfer provides + for file in filtered_repo_files: + _inner_hf_hub_download(file) + else: + thread_map( + _inner_hf_hub_download, + filtered_repo_files, + desc=tqdm_desc, + max_workers=max_workers, + # User can use its own tqdm class or the default one from `huggingface_hub.utils` + tqdm_class=tqdm_class or hf_tqdm, + ) + + if local_dir is not None: + return str(os.path.realpath(local_dir)) + return snapshot_folder diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_space_api.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_space_api.py new file mode 100644 index 0000000000000000000000000000000000000000..05fccfbc1ebdfc14840a88751914b8fc0d1a498d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_space_api.py @@ -0,0 +1,168 @@ +# coding=utf-8 +# Copyright 2019-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass +from datetime import datetime +from enum import Enum +from typing import Dict, Optional + +from huggingface_hub.utils import parse_datetime + + +class SpaceStage(str, Enum): + """ + Enumeration of possible stage of a Space on the Hub. + + Value can be compared to a string: + ```py + assert SpaceStage.BUILDING == "BUILDING" + ``` + + Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceInfo.ts#L61 (private url). + """ + + # Copied from moon-landing > server > repo_types > SpaceInfo.ts (private repo) + NO_APP_FILE = "NO_APP_FILE" + CONFIG_ERROR = "CONFIG_ERROR" + BUILDING = "BUILDING" + BUILD_ERROR = "BUILD_ERROR" + RUNNING = "RUNNING" + RUNNING_BUILDING = "RUNNING_BUILDING" + RUNTIME_ERROR = "RUNTIME_ERROR" + DELETING = "DELETING" + STOPPED = "STOPPED" + PAUSED = "PAUSED" + + +class SpaceHardware(str, Enum): + """ + Enumeration of hardwares available to run your Space on the Hub. + + Value can be compared to a string: + ```py + assert SpaceHardware.CPU_BASIC == "cpu-basic" + ``` + + Taken from https://github.com/huggingface-internal/moon-landing/blob/main/server/repo_types/SpaceHardwareFlavor.ts (private url). + """ + + # CPU + CPU_BASIC = "cpu-basic" + CPU_UPGRADE = "cpu-upgrade" + CPU_XL = "cpu-xl" + + # ZeroGPU + ZERO_A10G = "zero-a10g" + + # GPU + T4_SMALL = "t4-small" + T4_MEDIUM = "t4-medium" + L4X1 = "l4x1" + L4X4 = "l4x4" + L40SX1 = "l40sx1" + L40SX4 = "l40sx4" + L40SX8 = "l40sx8" + A10G_SMALL = "a10g-small" + A10G_LARGE = "a10g-large" + A10G_LARGEX2 = "a10g-largex2" + A10G_LARGEX4 = "a10g-largex4" + A100_LARGE = "a100-large" + H100 = "h100" + H100X8 = "h100x8" + + +class SpaceStorage(str, Enum): + """ + Enumeration of persistent storage available for your Space on the Hub. + + Value can be compared to a string: + ```py + assert SpaceStorage.SMALL == "small" + ``` + + Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceHardwareFlavor.ts#L24 (private url). + """ + + SMALL = "small" + MEDIUM = "medium" + LARGE = "large" + + +@dataclass +class SpaceRuntime: + """ + Contains information about the current runtime of a Space. + + Args: + stage (`str`): + Current stage of the space. Example: RUNNING. + hardware (`str` or `None`): + Current hardware of the space. Example: "cpu-basic". Can be `None` if Space + is `BUILDING` for the first time. + requested_hardware (`str` or `None`): + Requested hardware. Can be different than `hardware` especially if the request + has just been made. Example: "t4-medium". Can be `None` if no hardware has + been requested yet. + sleep_time (`int` or `None`): + Number of seconds the Space will be kept alive after the last request. By default (if value is `None`), the + Space will never go to sleep if it's running on an upgraded hardware, while it will go to sleep after 48 + hours on a free 'cpu-basic' hardware. For more details, see https://huggingface.co/docs/hub/spaces-gpus#sleep-time. + raw (`dict`): + Raw response from the server. Contains more information about the Space + runtime like number of replicas, number of cpu, memory size,... + """ + + stage: SpaceStage + hardware: Optional[SpaceHardware] + requested_hardware: Optional[SpaceHardware] + sleep_time: Optional[int] + storage: Optional[SpaceStorage] + raw: Dict + + def __init__(self, data: Dict) -> None: + self.stage = data["stage"] + self.hardware = data.get("hardware", {}).get("current") + self.requested_hardware = data.get("hardware", {}).get("requested") + self.sleep_time = data.get("gcTimeout") + self.storage = data.get("storage") + self.raw = data + + +@dataclass +class SpaceVariable: + """ + Contains information about the current variables of a Space. + + Args: + key (`str`): + Variable key. Example: `"MODEL_REPO_ID"` + value (`str`): + Variable value. Example: `"the_model_repo_id"`. + description (`str` or None): + Description of the variable. Example: `"Model Repo ID of the implemented model"`. + updatedAt (`datetime` or None): + datetime of the last update of the variable (if the variable has been updated at least once). + """ + + key: str + value: str + description: Optional[str] + updated_at: Optional[datetime] + + def __init__(self, key: str, values: Dict) -> None: + self.key = key + self.value = values["value"] + self.description = values.get("description") + updated_at = values.get("updatedAt") + self.updated_at = parse_datetime(updated_at) if updated_at is not None else None diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_tensorboard_logger.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_tensorboard_logger.py new file mode 100644 index 0000000000000000000000000000000000000000..4d9581d8ee127436ec1e1d585ed0426422a66131 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_tensorboard_logger.py @@ -0,0 +1,190 @@ +# Copyright 2023 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains a logger to push training logs to the Hub, using Tensorboard.""" + +from pathlib import Path +from typing import List, Optional, Union + +from ._commit_scheduler import CommitScheduler +from .errors import EntryNotFoundError +from .repocard import ModelCard +from .utils import experimental + + +# Depending on user's setup, SummaryWriter can come either from 'tensorboardX' +# or from 'torch.utils.tensorboard'. Both are compatible so let's try to load +# from either of them. +try: + from tensorboardX import SummaryWriter as _RuntimeSummaryWriter + + is_summary_writer_available = True +except ImportError: + try: + from torch.utils.tensorboard import SummaryWriter as _RuntimeSummaryWriter + + is_summary_writer_available = True + except ImportError: + # Dummy class to avoid failing at import. Will raise on instance creation. + class _DummySummaryWriter: + pass + + _RuntimeSummaryWriter = _DummySummaryWriter # type: ignore[assignment] + is_summary_writer_available = False + + +class HFSummaryWriter(_RuntimeSummaryWriter): + """ + Wrapper around the tensorboard's `SummaryWriter` to push training logs to the Hub. + + Data is logged locally and then pushed to the Hub asynchronously. Pushing data to the Hub is done in a separate + thread to avoid blocking the training script. In particular, if the upload fails for any reason (e.g. a connection + issue), the main script will not be interrupted. Data is automatically pushed to the Hub every `commit_every` + minutes (default to every 5 minutes). + + > [!WARNING] + > `HFSummaryWriter` is experimental. Its API is subject to change in the future without prior notice. + + Args: + repo_id (`str`): + The id of the repo to which the logs will be pushed. + logdir (`str`, *optional*): + The directory where the logs will be written. If not specified, a local directory will be created by the + underlying `SummaryWriter` object. + commit_every (`int` or `float`, *optional*): + The frequency (in minutes) at which the logs will be pushed to the Hub. Defaults to 5 minutes. + squash_history (`bool`, *optional*): + Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is + useful to avoid degraded performances on the repo when it grows too large. + repo_type (`str`, *optional*): + The type of the repo to which the logs will be pushed. Defaults to "model". + repo_revision (`str`, *optional*): + The revision of the repo to which the logs will be pushed. Defaults to "main". + repo_private (`bool`, *optional*): + Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists. + path_in_repo (`str`, *optional*): + The path to the folder in the repo where the logs will be pushed. Defaults to "tensorboard/". + repo_allow_patterns (`List[str]` or `str`, *optional*): + A list of patterns to include in the upload. Defaults to `"*.tfevents.*"`. Check out the + [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details. + repo_ignore_patterns (`List[str]` or `str`, *optional*): + A list of patterns to exclude in the upload. Check out the + [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details. + token (`str`, *optional*): + Authentication token. Will default to the stored token. See https://huggingface.co/settings/token for more + details + kwargs: + Additional keyword arguments passed to `SummaryWriter`. + + Examples: + ```diff + # Taken from https://pytorch.org/docs/stable/tensorboard.html + - from torch.utils.tensorboard import SummaryWriter + + from huggingface_hub import HFSummaryWriter + + import numpy as np + + - writer = SummaryWriter() + + writer = HFSummaryWriter(repo_id="username/my-trained-model") + + for n_iter in range(100): + writer.add_scalar('Loss/train', np.random.random(), n_iter) + writer.add_scalar('Loss/test', np.random.random(), n_iter) + writer.add_scalar('Accuracy/train', np.random.random(), n_iter) + writer.add_scalar('Accuracy/test', np.random.random(), n_iter) + ``` + + ```py + >>> from huggingface_hub import HFSummaryWriter + + # Logs are automatically pushed every 15 minutes (5 by default) + when exiting the context manager + >>> with HFSummaryWriter(repo_id="test_hf_logger", commit_every=15) as logger: + ... logger.add_scalar("a", 1) + ... logger.add_scalar("b", 2) + ``` + """ + + @experimental + def __new__(cls, *args, **kwargs) -> "HFSummaryWriter": + if not is_summary_writer_available: + raise ImportError( + "You must have `tensorboard` installed to use `HFSummaryWriter`. Please run `pip install --upgrade" + " tensorboardX` first." + ) + return super().__new__(cls) + + def __init__( + self, + repo_id: str, + *, + logdir: Optional[str] = None, + commit_every: Union[int, float] = 5, + squash_history: bool = False, + repo_type: Optional[str] = None, + repo_revision: Optional[str] = None, + repo_private: Optional[bool] = None, + path_in_repo: Optional[str] = "tensorboard", + repo_allow_patterns: Optional[Union[List[str], str]] = "*.tfevents.*", + repo_ignore_patterns: Optional[Union[List[str], str]] = None, + token: Optional[str] = None, + **kwargs, + ): + # Initialize SummaryWriter + super().__init__(logdir=logdir, **kwargs) + + # Check logdir has been correctly initialized and fail early otherwise. In practice, SummaryWriter takes care of it. + if not isinstance(self.logdir, str): + raise ValueError(f"`self.logdir` must be a string. Got '{self.logdir}' of type {type(self.logdir)}.") + + # Append logdir name to `path_in_repo` + if path_in_repo is None or path_in_repo == "": + path_in_repo = Path(self.logdir).name + else: + path_in_repo = path_in_repo.strip("/") + "/" + Path(self.logdir).name + + # Initialize scheduler + self.scheduler = CommitScheduler( + folder_path=self.logdir, + path_in_repo=path_in_repo, + repo_id=repo_id, + repo_type=repo_type, + revision=repo_revision, + private=repo_private, + token=token, + allow_patterns=repo_allow_patterns, + ignore_patterns=repo_ignore_patterns, + every=commit_every, + squash_history=squash_history, + ) + + # Exposing some high-level info at root level + self.repo_id = self.scheduler.repo_id + self.repo_type = self.scheduler.repo_type + self.repo_revision = self.scheduler.revision + + # Add `hf-summary-writer` tag to the model card metadata + try: + card = ModelCard.load(repo_id_or_path=self.repo_id, repo_type=self.repo_type) + except EntryNotFoundError: + card = ModelCard("") + tags = card.data.get("tags", []) + if "hf-summary-writer" not in tags: + tags.append("hf-summary-writer") + card.data["tags"] = tags + card.push_to_hub(repo_id=self.repo_id, repo_type=self.repo_type) + + def __exit__(self, exc_type, exc_val, exc_tb): + """Push to hub in a non-blocking way when exiting the logger's context manager.""" + super().__exit__(exc_type, exc_val, exc_tb) + future = self.scheduler.trigger() + future.result() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_upload_large_folder.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_upload_large_folder.py new file mode 100644 index 0000000000000000000000000000000000000000..1ccbc07d39d3d03e9bb8c39f1bb16aa2ca4ab41f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_upload_large_folder.py @@ -0,0 +1,755 @@ +# coding=utf-8 +# Copyright 2024-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import enum +import logging +import os +import queue +import shutil +import sys +import threading +import time +import traceback +from datetime import datetime +from pathlib import Path +from threading import Lock +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union +from urllib.parse import quote + +from . import constants +from ._commit_api import CommitOperationAdd, UploadInfo, _fetch_upload_modes +from ._local_folder import LocalUploadFileMetadata, LocalUploadFilePaths, get_local_upload_paths, read_upload_metadata +from .constants import DEFAULT_REVISION, REPO_TYPES +from .utils import DEFAULT_IGNORE_PATTERNS, filter_repo_objects, tqdm +from .utils._cache_manager import _format_size +from .utils._runtime import is_xet_available +from .utils.sha import sha_fileobj + + +if TYPE_CHECKING: + from .hf_api import HfApi + +logger = logging.getLogger(__name__) + +WAITING_TIME_IF_NO_TASKS = 10 # seconds +MAX_NB_FILES_FETCH_UPLOAD_MODE = 100 +COMMIT_SIZE_SCALE: List[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000] + +UPLOAD_BATCH_SIZE_XET = 256 # Max 256 files per upload batch for XET-enabled repos +UPLOAD_BATCH_SIZE_LFS = 1 # Otherwise, batches of 1 for regular LFS upload + +# Repository limits (from https://huggingface.co/docs/hub/repositories-recommendations) +MAX_FILES_PER_REPO = 100_000 # Recommended maximum number of files per repository +MAX_FILES_PER_FOLDER = 10_000 # Recommended maximum number of files per folder +MAX_FILE_SIZE_GB = 50 # Hard limit for individual file size +RECOMMENDED_FILE_SIZE_GB = 20 # Recommended maximum for individual file size + + +def _validate_upload_limits(paths_list: List[LocalUploadFilePaths]) -> None: + """ + Validate upload against repository limits and warn about potential issues. + + Args: + paths_list: List of file paths to be uploaded + + Warns about: + - Too many files in the repository (>100k) + - Too many entries (files or subdirectories) in a single folder (>10k) + - Files exceeding size limits (>20GB recommended, >50GB hard limit) + """ + logger.info("Running validation checks on files to upload...") + + # Check 1: Total file count + if len(paths_list) > MAX_FILES_PER_REPO: + logger.warning( + f"You are about to upload {len(paths_list):,} files. " + f"This exceeds the recommended limit of {MAX_FILES_PER_REPO:,} files per repository.\n" + f"Consider:\n" + f" - Splitting your data into multiple repositories\n" + f" - Using fewer, larger files (e.g., parquet files)\n" + f" - See: https://huggingface.co/docs/hub/repositories-recommendations" + ) + + # Check 2: Files and subdirectories per folder + # Track immediate children (files and subdirs) for each folder + from collections import defaultdict + + entries_per_folder: Dict[str, Any] = defaultdict(lambda: {"files": 0, "subdirs": set()}) + + for paths in paths_list: + path = Path(paths.path_in_repo) + parts = path.parts + + # Count this file in its immediate parent directory + parent = str(path.parent) if str(path.parent) != "." else "." + entries_per_folder[parent]["files"] += 1 + + # Track immediate subdirectories for each parent folder + # Walk through the path components to track parent-child relationships + for i, child in enumerate(parts[:-1]): + parent = "." if i == 0 else "/".join(parts[:i]) + entries_per_folder[parent]["subdirs"].add(child) + + # Check limits for each folder + for folder, data in entries_per_folder.items(): + file_count = data["files"] + subdir_count = len(data["subdirs"]) + total_entries = file_count + subdir_count + + if total_entries > MAX_FILES_PER_FOLDER: + folder_display = "root" if folder == "." else folder + logger.warning( + f"Folder '{folder_display}' contains {total_entries:,} entries " + f"({file_count:,} files and {subdir_count:,} subdirectories). " + f"This exceeds the recommended {MAX_FILES_PER_FOLDER:,} entries per folder.\n" + "Consider reorganising into sub-folders." + ) + + # Check 3: File sizes + large_files = [] + very_large_files = [] + + for paths in paths_list: + size = paths.file_path.stat().st_size + size_gb = size / 1_000_000_000 # Use decimal GB as per Hub limits + + if size_gb > MAX_FILE_SIZE_GB: + very_large_files.append((paths.path_in_repo, size_gb)) + elif size_gb > RECOMMENDED_FILE_SIZE_GB: + large_files.append((paths.path_in_repo, size_gb)) + + # Warn about very large files (>50GB) + if very_large_files: + files_str = "\n - ".join(f"{path}: {size:.1f}GB" for path, size in very_large_files[:5]) + more_str = f"\n ... and {len(very_large_files) - 5} more files" if len(very_large_files) > 5 else "" + logger.warning( + f"Found {len(very_large_files)} files exceeding the {MAX_FILE_SIZE_GB}GB hard limit:\n" + f" - {files_str}{more_str}\n" + f"These files may fail to upload. Consider splitting them into smaller chunks." + ) + + # Warn about large files (>20GB) + if large_files: + files_str = "\n - ".join(f"{path}: {size:.1f}GB" for path, size in large_files[:5]) + more_str = f"\n ... and {len(large_files) - 5} more files" if len(large_files) > 5 else "" + logger.warning( + f"Found {len(large_files)} files larger than {RECOMMENDED_FILE_SIZE_GB}GB (recommended limit):\n" + f" - {files_str}{more_str}\n" + f"Large files may slow down loading and processing." + ) + + logger.info("Validation checks complete.") + + +def upload_large_folder_internal( + api: "HfApi", + repo_id: str, + folder_path: Union[str, Path], + *, + repo_type: str, # Repo type is required! + revision: Optional[str] = None, + private: Optional[bool] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + num_workers: Optional[int] = None, + print_report: bool = True, + print_report_every: int = 60, +): + """Upload a large folder to the Hub in the most resilient way possible. + + See [`HfApi.upload_large_folder`] for the full documentation. + """ + # 1. Check args and setup + if repo_type is None: + raise ValueError( + "For large uploads, `repo_type` is explicitly required. Please set it to `model`, `dataset` or `space`." + " If you are using the CLI, pass it as `--repo-type=model`." + ) + if repo_type not in REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {REPO_TYPES}") + if revision is None: + revision = DEFAULT_REVISION + + folder_path = Path(folder_path).expanduser().resolve() + if not folder_path.is_dir(): + raise ValueError(f"Provided path: '{folder_path}' is not a directory") + + if ignore_patterns is None: + ignore_patterns = [] + elif isinstance(ignore_patterns, str): + ignore_patterns = [ignore_patterns] + ignore_patterns += DEFAULT_IGNORE_PATTERNS + + if num_workers is None: + nb_cores = os.cpu_count() or 1 + num_workers = max(nb_cores - 2, 2) # Use all but 2 cores, or at least 2 cores + + # 2. Create repo if missing + repo_url = api.create_repo(repo_id=repo_id, repo_type=repo_type, private=private, exist_ok=True) + logger.info(f"Repo created: {repo_url}") + repo_id = repo_url.repo_id + # 2.1 Check if xet is enabled to set batch file upload size + is_xet_enabled = ( + is_xet_available() + and api.repo_info( + repo_id=repo_id, + repo_type=repo_type, + revision=revision, + expand="xetEnabled", + ).xet_enabled + ) + upload_batch_size = UPLOAD_BATCH_SIZE_XET if is_xet_enabled else UPLOAD_BATCH_SIZE_LFS + + # 3. List files to upload + filtered_paths_list = filter_repo_objects( + (path.relative_to(folder_path).as_posix() for path in folder_path.glob("**/*") if path.is_file()), + allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, + ) + paths_list = [get_local_upload_paths(folder_path, relpath) for relpath in filtered_paths_list] + logger.info(f"Found {len(paths_list)} candidate files to upload") + + # Validate upload against repository limits + _validate_upload_limits(paths_list) + + logger.info("Starting upload...") + + # Read metadata for each file + items = [ + (paths, read_upload_metadata(folder_path, paths.path_in_repo)) + for paths in tqdm(paths_list, desc="Recovering from metadata files") + ] + + # 4. Start workers + status = LargeUploadStatus(items, upload_batch_size) + threads = [ + threading.Thread( + target=_worker_job, + kwargs={ + "status": status, + "api": api, + "repo_id": repo_id, + "repo_type": repo_type, + "revision": revision, + }, + ) + for _ in range(num_workers) + ] + + for thread in threads: + thread.start() + + # 5. Print regular reports + if print_report: + print("\n\n" + status.current_report()) + last_report_ts = time.time() + while True: + time.sleep(1) + if time.time() - last_report_ts >= print_report_every: + if print_report: + _print_overwrite(status.current_report()) + last_report_ts = time.time() + if status.is_done(): + logging.info("Is done: exiting main loop") + break + + for thread in threads: + thread.join() + + logger.info(status.current_report()) + logging.info("Upload is complete!") + + +#################### +# Logic to manage workers and synchronize tasks +#################### + + +class WorkerJob(enum.Enum): + SHA256 = enum.auto() + GET_UPLOAD_MODE = enum.auto() + PREUPLOAD_LFS = enum.auto() + COMMIT = enum.auto() + WAIT = enum.auto() # if no tasks are available but we don't want to exit + + +JOB_ITEM_T = Tuple[LocalUploadFilePaths, LocalUploadFileMetadata] + + +class LargeUploadStatus: + """Contains information, queues and tasks for a large upload process.""" + + def __init__(self, items: List[JOB_ITEM_T], upload_batch_size: int = 1): + self.items = items + self.queue_sha256: "queue.Queue[JOB_ITEM_T]" = queue.Queue() + self.queue_get_upload_mode: "queue.Queue[JOB_ITEM_T]" = queue.Queue() + self.queue_preupload_lfs: "queue.Queue[JOB_ITEM_T]" = queue.Queue() + self.queue_commit: "queue.Queue[JOB_ITEM_T]" = queue.Queue() + self.lock = Lock() + + self.nb_workers_sha256: int = 0 + self.nb_workers_get_upload_mode: int = 0 + self.nb_workers_preupload_lfs: int = 0 + self.upload_batch_size: int = upload_batch_size + self.nb_workers_commit: int = 0 + self.nb_workers_waiting: int = 0 + self.last_commit_attempt: Optional[float] = None + + self._started_at = datetime.now() + self._chunk_idx: int = 1 + self._chunk_lock: Lock = Lock() + + # Setup queues + for item in self.items: + paths, metadata = item + if metadata.sha256 is None: + self.queue_sha256.put(item) + elif metadata.upload_mode is None: + self.queue_get_upload_mode.put(item) + elif metadata.upload_mode == "lfs" and not metadata.is_uploaded: + self.queue_preupload_lfs.put(item) + elif not metadata.is_committed: + self.queue_commit.put(item) + else: + logger.debug(f"Skipping file {paths.path_in_repo} (already uploaded and committed)") + + def target_chunk(self) -> int: + with self._chunk_lock: + return COMMIT_SIZE_SCALE[self._chunk_idx] + + def update_chunk(self, success: bool, nb_items: int, duration: float) -> None: + with self._chunk_lock: + if not success: + logger.warning(f"Failed to commit {nb_items} files at once. Will retry with less files in next batch.") + self._chunk_idx -= 1 + elif nb_items >= COMMIT_SIZE_SCALE[self._chunk_idx] and duration < 40: + logger.info(f"Successfully committed {nb_items} at once. Increasing the limit for next batch.") + self._chunk_idx += 1 + + self._chunk_idx = max(0, min(self._chunk_idx, len(COMMIT_SIZE_SCALE) - 1)) + + def current_report(self) -> str: + """Generate a report of the current status of the large upload.""" + nb_hashed = 0 + size_hashed = 0 + nb_preuploaded = 0 + nb_lfs = 0 + nb_lfs_unsure = 0 + size_preuploaded = 0 + nb_committed = 0 + size_committed = 0 + total_size = 0 + ignored_files = 0 + total_files = 0 + + with self.lock: + for _, metadata in self.items: + if metadata.should_ignore: + ignored_files += 1 + continue + total_size += metadata.size + total_files += 1 + if metadata.sha256 is not None: + nb_hashed += 1 + size_hashed += metadata.size + if metadata.upload_mode == "lfs": + nb_lfs += 1 + if metadata.upload_mode is None: + nb_lfs_unsure += 1 + if metadata.is_uploaded: + nb_preuploaded += 1 + size_preuploaded += metadata.size + if metadata.is_committed: + nb_committed += 1 + size_committed += metadata.size + total_size_str = _format_size(total_size) + + now = datetime.now() + now_str = now.strftime("%Y-%m-%d %H:%M:%S") + elapsed = now - self._started_at + elapsed_str = str(elapsed).split(".")[0] # remove milliseconds + + message = "\n" + "-" * 10 + message += f" {now_str} ({elapsed_str}) " + message += "-" * 10 + "\n" + + message += "Files: " + message += f"hashed {nb_hashed}/{total_files} ({_format_size(size_hashed)}/{total_size_str}) | " + message += f"pre-uploaded: {nb_preuploaded}/{nb_lfs} ({_format_size(size_preuploaded)}/{total_size_str})" + if nb_lfs_unsure > 0: + message += f" (+{nb_lfs_unsure} unsure)" + message += f" | committed: {nb_committed}/{total_files} ({_format_size(size_committed)}/{total_size_str})" + message += f" | ignored: {ignored_files}\n" + + message += "Workers: " + message += f"hashing: {self.nb_workers_sha256} | " + message += f"get upload mode: {self.nb_workers_get_upload_mode} | " + message += f"pre-uploading: {self.nb_workers_preupload_lfs} | " + message += f"committing: {self.nb_workers_commit} | " + message += f"waiting: {self.nb_workers_waiting}\n" + message += "-" * 51 + + return message + + def is_done(self) -> bool: + with self.lock: + return all(metadata.is_committed or metadata.should_ignore for _, metadata in self.items) + + +def _worker_job( + status: LargeUploadStatus, + api: "HfApi", + repo_id: str, + repo_type: str, + revision: str, +): + """ + Main process for a worker. The worker will perform tasks based on the priority list until all files are uploaded + and committed. If no tasks are available, the worker will wait for 10 seconds before checking again. + + If a task fails for any reason, the item(s) are put back in the queue for another worker to pick up. + + Read `upload_large_folder` docstring for more information on how tasks are prioritized. + """ + while True: + next_job: Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]] = None + + # Determine next task + next_job = _determine_next_job(status) + if next_job is None: + return + job, items = next_job + + # Perform task + if job == WorkerJob.SHA256: + item = items[0] # single item + try: + _compute_sha256(item) + status.queue_get_upload_mode.put(item) + except KeyboardInterrupt: + raise + except Exception as e: + logger.error(f"Failed to compute sha256: {e}") + traceback.format_exc() + status.queue_sha256.put(item) + + with status.lock: + status.nb_workers_sha256 -= 1 + + elif job == WorkerJob.GET_UPLOAD_MODE: + try: + _get_upload_mode(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision) + except KeyboardInterrupt: + raise + except Exception as e: + logger.error(f"Failed to get upload mode: {e}") + traceback.format_exc() + + # Items are either: + # - dropped (if should_ignore) + # - put in LFS queue (if LFS) + # - put in commit queue (if regular) + # - or put back (if error occurred). + for item in items: + _, metadata = item + if metadata.should_ignore: + continue + if metadata.upload_mode == "lfs": + status.queue_preupload_lfs.put(item) + elif metadata.upload_mode == "regular": + status.queue_commit.put(item) + else: + status.queue_get_upload_mode.put(item) + + with status.lock: + status.nb_workers_get_upload_mode -= 1 + + elif job == WorkerJob.PREUPLOAD_LFS: + try: + _preupload_lfs(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision) + for item in items: + status.queue_commit.put(item) + except KeyboardInterrupt: + raise + except Exception as e: + logger.error(f"Failed to preupload LFS: {e}") + traceback.format_exc() + for item in items: + status.queue_preupload_lfs.put(item) + + with status.lock: + status.nb_workers_preupload_lfs -= 1 + + elif job == WorkerJob.COMMIT: + start_ts = time.time() + success = True + try: + _commit(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision) + except KeyboardInterrupt: + raise + except Exception as e: + logger.error(f"Failed to commit: {e}") + traceback.format_exc() + for item in items: + status.queue_commit.put(item) + success = False + duration = time.time() - start_ts + status.update_chunk(success, len(items), duration) + with status.lock: + status.last_commit_attempt = time.time() + status.nb_workers_commit -= 1 + + elif job == WorkerJob.WAIT: + time.sleep(WAITING_TIME_IF_NO_TASKS) + with status.lock: + status.nb_workers_waiting -= 1 + + +def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]]: + with status.lock: + # 1. Commit if more than 5 minutes since last commit attempt (and at least 1 file) + if ( + status.nb_workers_commit == 0 + and status.queue_commit.qsize() > 0 + and status.last_commit_attempt is not None + and time.time() - status.last_commit_attempt > 5 * 60 + ): + status.nb_workers_commit += 1 + logger.debug("Job: commit (more than 5 minutes since last commit attempt)") + return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk())) + + # 2. Commit if at least 100 files are ready to commit + elif status.nb_workers_commit == 0 and status.queue_commit.qsize() >= 150: + status.nb_workers_commit += 1 + logger.debug("Job: commit (>100 files ready)") + return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk())) + + # 3. Get upload mode if at least 100 files + elif status.queue_get_upload_mode.qsize() >= MAX_NB_FILES_FETCH_UPLOAD_MODE: + status.nb_workers_get_upload_mode += 1 + logger.debug(f"Job: get upload mode (>{MAX_NB_FILES_FETCH_UPLOAD_MODE} files ready)") + return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, MAX_NB_FILES_FETCH_UPLOAD_MODE)) + + # 4. Preupload LFS file if at least `status.upload_batch_size` files and no worker is preuploading LFS + elif status.queue_preupload_lfs.qsize() >= status.upload_batch_size and status.nb_workers_preupload_lfs == 0: + status.nb_workers_preupload_lfs += 1 + logger.debug("Job: preupload LFS (no other worker preuploading LFS)") + return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, status.upload_batch_size)) + + # 5. Compute sha256 if at least 1 file and no worker is computing sha256 + elif status.queue_sha256.qsize() > 0 and status.nb_workers_sha256 == 0: + status.nb_workers_sha256 += 1 + logger.debug("Job: sha256 (no other worker computing sha256)") + return (WorkerJob.SHA256, _get_one(status.queue_sha256)) + + # 6. Get upload mode if at least 1 file and no worker is getting upload mode + elif status.queue_get_upload_mode.qsize() > 0 and status.nb_workers_get_upload_mode == 0: + status.nb_workers_get_upload_mode += 1 + logger.debug("Job: get upload mode (no other worker getting upload mode)") + return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, MAX_NB_FILES_FETCH_UPLOAD_MODE)) + + # 7. Preupload LFS file if at least `status.upload_batch_size` files + # Skip if hf_transfer is enabled and there is already a worker preuploading LFS + elif status.queue_preupload_lfs.qsize() >= status.upload_batch_size and ( + status.nb_workers_preupload_lfs == 0 or not constants.HF_HUB_ENABLE_HF_TRANSFER + ): + status.nb_workers_preupload_lfs += 1 + logger.debug("Job: preupload LFS") + return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, status.upload_batch_size)) + + # 8. Compute sha256 if at least 1 file + elif status.queue_sha256.qsize() > 0: + status.nb_workers_sha256 += 1 + logger.debug("Job: sha256") + return (WorkerJob.SHA256, _get_one(status.queue_sha256)) + + # 9. Get upload mode if at least 1 file + elif status.queue_get_upload_mode.qsize() > 0: + status.nb_workers_get_upload_mode += 1 + logger.debug("Job: get upload mode") + return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, MAX_NB_FILES_FETCH_UPLOAD_MODE)) + + # 10. Preupload LFS file if at least 1 file + elif status.queue_preupload_lfs.qsize() > 0: + status.nb_workers_preupload_lfs += 1 + logger.debug("Job: preupload LFS") + return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, status.upload_batch_size)) + + # 11. Commit if at least 1 file and 1 min since last commit attempt + elif ( + status.nb_workers_commit == 0 + and status.queue_commit.qsize() > 0 + and status.last_commit_attempt is not None + and time.time() - status.last_commit_attempt > 1 * 60 + ): + status.nb_workers_commit += 1 + logger.debug("Job: commit (1 min since last commit attempt)") + return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk())) + + # 12. Commit if at least 1 file all other queues are empty and all workers are waiting + # e.g. when it's the last commit + elif ( + status.nb_workers_commit == 0 + and status.queue_commit.qsize() > 0 + and status.queue_sha256.qsize() == 0 + and status.queue_get_upload_mode.qsize() == 0 + and status.queue_preupload_lfs.qsize() == 0 + and status.nb_workers_sha256 == 0 + and status.nb_workers_get_upload_mode == 0 + and status.nb_workers_preupload_lfs == 0 + ): + status.nb_workers_commit += 1 + logger.debug("Job: commit") + return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk())) + + # 13. If all queues are empty, exit + elif all(metadata.is_committed or metadata.should_ignore for _, metadata in status.items): + logger.info("All files have been processed! Exiting worker.") + return None + + # 14. If no task is available, wait + else: + status.nb_workers_waiting += 1 + logger.debug(f"No task available, waiting... ({WAITING_TIME_IF_NO_TASKS}s)") + return (WorkerJob.WAIT, []) + + +#################### +# Atomic jobs (sha256, get_upload_mode, preupload_lfs, commit) +#################### + + +def _compute_sha256(item: JOB_ITEM_T) -> None: + """Compute sha256 of a file and save it in metadata.""" + paths, metadata = item + if metadata.sha256 is None: + with paths.file_path.open("rb") as f: + metadata.sha256 = sha_fileobj(f).hex() + metadata.save(paths) + + +def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None: + """Get upload mode for each file and update metadata. + + Also receive info if the file should be ignored. + """ + additions = [_build_hacky_operation(item) for item in items] + _fetch_upload_modes( + additions=additions, + repo_type=repo_type, + repo_id=repo_id, + headers=api._build_hf_headers(), + revision=quote(revision, safe=""), + endpoint=api.endpoint, + ) + for item, addition in zip(items, additions): + paths, metadata = item + metadata.upload_mode = addition._upload_mode + metadata.should_ignore = addition._should_ignore + metadata.remote_oid = addition._remote_oid + metadata.save(paths) + + +def _preupload_lfs(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None: + """Preupload LFS files and update metadata.""" + additions = [_build_hacky_operation(item) for item in items] + api.preupload_lfs_files( + repo_id=repo_id, + repo_type=repo_type, + revision=revision, + additions=additions, + ) + + for paths, metadata in items: + metadata.is_uploaded = True + metadata.save(paths) + + +def _commit(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None: + """Commit files to the repo.""" + additions = [_build_hacky_operation(item) for item in items] + api.create_commit( + repo_id=repo_id, + repo_type=repo_type, + revision=revision, + operations=additions, + commit_message="Add files using upload-large-folder tool", + ) + for paths, metadata in items: + metadata.is_committed = True + metadata.save(paths) + + +#################### +# Hacks with CommitOperationAdd to bypass checks/sha256 calculation +#################### + + +class HackyCommitOperationAdd(CommitOperationAdd): + def __post_init__(self) -> None: + if isinstance(self.path_or_fileobj, Path): + self.path_or_fileobj = str(self.path_or_fileobj) + + +def _build_hacky_operation(item: JOB_ITEM_T) -> HackyCommitOperationAdd: + paths, metadata = item + operation = HackyCommitOperationAdd(path_in_repo=paths.path_in_repo, path_or_fileobj=paths.file_path) + with paths.file_path.open("rb") as file: + sample = file.peek(512)[:512] + if metadata.sha256 is None: + raise ValueError("sha256 must have been computed by now!") + operation.upload_info = UploadInfo(sha256=bytes.fromhex(metadata.sha256), size=metadata.size, sample=sample) + operation._upload_mode = metadata.upload_mode # type: ignore[assignment] + operation._should_ignore = metadata.should_ignore + operation._remote_oid = metadata.remote_oid + return operation + + +#################### +# Misc helpers +#################### + + +def _get_one(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]: + return [queue.get()] + + +def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) -> List[JOB_ITEM_T]: + return [queue.get() for _ in range(min(queue.qsize(), n))] + + +def _print_overwrite(report: str) -> None: + """Print a report, overwriting the previous lines. + + Since tqdm in using `sys.stderr` to (re-)write progress bars, we need to use `sys.stdout` + to print the report. + + Note: works well only if no other process is writing to `sys.stdout`! + """ + report += "\n" + # Get terminal width + terminal_width = shutil.get_terminal_size().columns + + # Count number of lines that should be cleared + nb_lines = sum(len(line) // terminal_width + 1 for line in report.splitlines()) + + # Clear previous lines based on the number of lines in the report + for _ in range(nb_lines): + sys.stdout.write("\r\033[K") # Clear line + sys.stdout.write("\033[F") # Move cursor up one line + + # Print the new report, filling remaining space with whitespace + sys.stdout.write(report) + sys.stdout.write(" " * (terminal_width - len(report.splitlines()[-1]))) + sys.stdout.flush() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_webhooks_payload.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_webhooks_payload.py new file mode 100644 index 0000000000000000000000000000000000000000..288f4b08b9428980e99ca06703442eab62fad277 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_webhooks_payload.py @@ -0,0 +1,137 @@ +# coding=utf-8 +# Copyright 2023-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains data structures to parse the webhooks payload.""" + +from typing import List, Literal, Optional + +from .utils import is_pydantic_available + + +if is_pydantic_available(): + from pydantic import BaseModel +else: + # Define a dummy BaseModel to avoid import errors when pydantic is not installed + # Import error will be raised when trying to use the class + + class BaseModel: # type: ignore [no-redef] + def __init__(self, *args, **kwargs) -> None: + raise ImportError( + "You must have `pydantic` installed to use `WebhookPayload`. This is an optional dependency that" + " should be installed separately. Please run `pip install --upgrade pydantic` and retry." + ) + + +# This is an adaptation of the ReportV3 interface implemented in moon-landing. V0, V1 and V2 have been ignored as they +# are not in used anymore. To keep in sync when format is updated in +# https://github.com/huggingface/moon-landing/blob/main/server/lib/HFWebhooks.ts (internal link). + + +WebhookEvent_T = Literal[ + "create", + "delete", + "move", + "update", +] +RepoChangeEvent_T = Literal[ + "add", + "move", + "remove", + "update", +] +RepoType_T = Literal[ + "dataset", + "model", + "space", +] +DiscussionStatus_T = Literal[ + "closed", + "draft", + "open", + "merged", +] +SupportedWebhookVersion = Literal[3] + + +class ObjectId(BaseModel): + id: str + + +class WebhookPayloadUrl(BaseModel): + web: str + api: Optional[str] = None + + +class WebhookPayloadMovedTo(BaseModel): + name: str + owner: ObjectId + + +class WebhookPayloadWebhook(ObjectId): + version: SupportedWebhookVersion + + +class WebhookPayloadEvent(BaseModel): + action: WebhookEvent_T + scope: str + + +class WebhookPayloadDiscussionChanges(BaseModel): + base: str + mergeCommitId: Optional[str] = None + + +class WebhookPayloadComment(ObjectId): + author: ObjectId + hidden: bool + content: Optional[str] = None + url: WebhookPayloadUrl + + +class WebhookPayloadDiscussion(ObjectId): + num: int + author: ObjectId + url: WebhookPayloadUrl + title: str + isPullRequest: bool + status: DiscussionStatus_T + changes: Optional[WebhookPayloadDiscussionChanges] = None + pinned: Optional[bool] = None + + +class WebhookPayloadRepo(ObjectId): + owner: ObjectId + head_sha: Optional[str] = None + name: str + private: bool + subdomain: Optional[str] = None + tags: Optional[List[str]] = None + type: Literal["dataset", "model", "space"] + url: WebhookPayloadUrl + + +class WebhookPayloadUpdatedRef(BaseModel): + ref: str + oldSha: Optional[str] = None + newSha: Optional[str] = None + + +class WebhookPayload(BaseModel): + event: WebhookPayloadEvent + repo: WebhookPayloadRepo + discussion: Optional[WebhookPayloadDiscussion] = None + comment: Optional[WebhookPayloadComment] = None + webhook: WebhookPayloadWebhook + movedTo: Optional[WebhookPayloadMovedTo] = None + updatedRefs: Optional[List[WebhookPayloadUpdatedRef]] = None diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_webhooks_server.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_webhooks_server.py new file mode 100644 index 0000000000000000000000000000000000000000..a3668304553e13f9605a59ec623aceb5202a2488 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/_webhooks_server.py @@ -0,0 +1,376 @@ +# coding=utf-8 +# Copyright 2023-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains `WebhooksServer` and `webhook_endpoint` to create a webhook server easily.""" + +import atexit +import inspect +import os +from functools import wraps +from typing import TYPE_CHECKING, Any, Callable, Dict, Optional + +from .utils import experimental, is_fastapi_available, is_gradio_available + + +if TYPE_CHECKING: + import gradio as gr + from fastapi import Request + +if is_fastapi_available(): + from fastapi import FastAPI, Request + from fastapi.responses import JSONResponse +else: + # Will fail at runtime if FastAPI is not available + FastAPI = Request = JSONResponse = None # type: ignore + + +_global_app: Optional["WebhooksServer"] = None +_is_local = os.environ.get("SPACE_ID") is None + + +@experimental +class WebhooksServer: + """ + The [`WebhooksServer`] class lets you create an instance of a Gradio app that can receive Huggingface webhooks. + These webhooks can be registered using the [`~WebhooksServer.add_webhook`] decorator. Webhook endpoints are added to + the app as a POST endpoint to the FastAPI router. Once all the webhooks are registered, the `launch` method has to be + called to start the app. + + It is recommended to accept [`WebhookPayload`] as the first argument of the webhook function. It is a Pydantic + model that contains all the information about the webhook event. The data will be parsed automatically for you. + + Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your + WebhooksServer and deploy it on a Space. + + > [!WARNING] + > `WebhooksServer` is experimental. Its API is subject to change in the future. + + > [!WARNING] + > You must have `gradio` installed to use `WebhooksServer` (`pip install --upgrade gradio`). + + Args: + ui (`gradio.Blocks`, optional): + A Gradio UI instance to be used as the Space landing page. If `None`, a UI displaying instructions + about the configured webhooks is created. + webhook_secret (`str`, optional): + A secret key to verify incoming webhook requests. You can set this value to any secret you want as long as + you also configure it in your [webhooks settings panel](https://huggingface.co/settings/webhooks). You + can also set this value as the `WEBHOOK_SECRET` environment variable. If no secret is provided, the + webhook endpoints are opened without any security. + + Example: + + ```python + import gradio as gr + from huggingface_hub import WebhooksServer, WebhookPayload + + with gr.Blocks() as ui: + ... + + app = WebhooksServer(ui=ui, webhook_secret="my_secret_key") + + @app.add_webhook("/say_hello") + async def hello(payload: WebhookPayload): + return {"message": "hello"} + + app.launch() + ``` + """ + + def __new__(cls, *args, **kwargs) -> "WebhooksServer": + if not is_gradio_available(): + raise ImportError( + "You must have `gradio` installed to use `WebhooksServer`. Please run `pip install --upgrade gradio`" + " first." + ) + if not is_fastapi_available(): + raise ImportError( + "You must have `fastapi` installed to use `WebhooksServer`. Please run `pip install --upgrade fastapi`" + " first." + ) + return super().__new__(cls) + + def __init__( + self, + ui: Optional["gr.Blocks"] = None, + webhook_secret: Optional[str] = None, + ) -> None: + self._ui = ui + + self.webhook_secret = webhook_secret or os.getenv("WEBHOOK_SECRET") + self.registered_webhooks: Dict[str, Callable] = {} + _warn_on_empty_secret(self.webhook_secret) + + def add_webhook(self, path: Optional[str] = None) -> Callable: + """ + Decorator to add a webhook to the [`WebhooksServer`] server. + + Args: + path (`str`, optional): + The URL path to register the webhook function. If not provided, the function name will be used as the + path. In any case, all webhooks are registered under `/webhooks`. + + Raises: + ValueError: If the provided path is already registered as a webhook. + + Example: + ```python + from huggingface_hub import WebhooksServer, WebhookPayload + + app = WebhooksServer() + + @app.add_webhook + async def trigger_training(payload: WebhookPayload): + if payload.repo.type == "dataset" and payload.event.action == "update": + # Trigger a training job if a dataset is updated + ... + + app.launch() + ``` + """ + # Usage: directly as decorator. Example: `@app.add_webhook` + if callable(path): + # If path is a function, it means it was used as a decorator without arguments + return self.add_webhook()(path) + + # Usage: provide a path. Example: `@app.add_webhook(...)` + @wraps(FastAPI.post) + def _inner_post(*args, **kwargs): + func = args[0] + abs_path = f"/webhooks/{(path or func.__name__).strip('/')}" + if abs_path in self.registered_webhooks: + raise ValueError(f"Webhook {abs_path} already exists.") + self.registered_webhooks[abs_path] = func + + return _inner_post + + def launch(self, prevent_thread_lock: bool = False, **launch_kwargs: Any) -> None: + """Launch the Gradio app and register webhooks to the underlying FastAPI server. + + Input parameters are forwarded to Gradio when launching the app. + """ + ui = self._ui or self._get_default_ui() + + # Start Gradio App + # - as non-blocking so that webhooks can be added afterwards + # - as shared if launch locally (to debug webhooks) + launch_kwargs.setdefault("share", _is_local) + self.fastapi_app, _, _ = ui.launch(prevent_thread_lock=True, **launch_kwargs) + + # Register webhooks to FastAPI app + for path, func in self.registered_webhooks.items(): + # Add secret check if required + if self.webhook_secret is not None: + func = _wrap_webhook_to_check_secret(func, webhook_secret=self.webhook_secret) + + # Add route to FastAPI app + self.fastapi_app.post(path)(func) + + # Print instructions and block main thread + space_host = os.environ.get("SPACE_HOST") + url = "https://" + space_host if space_host is not None else (ui.share_url or ui.local_url) + if url is None: + raise ValueError("Cannot find the URL of the app. Please provide a valid `ui` or update `gradio` version.") + url = url.strip("/") + message = "\nWebhooks are correctly setup and ready to use:" + message += "\n" + "\n".join(f" - POST {url}{webhook}" for webhook in self.registered_webhooks) + message += "\nGo to https://huggingface.co/settings/webhooks to setup your webhooks." + print(message) + + if not prevent_thread_lock: + ui.block_thread() + + def _get_default_ui(self) -> "gr.Blocks": + """Default UI if not provided (lists webhooks and provides basic instructions).""" + import gradio as gr + + with gr.Blocks() as ui: + gr.Markdown("# This is an app to process 🤗 Webhooks") + gr.Markdown( + "Webhooks are a foundation for MLOps-related features. They allow you to listen for new changes on" + " specific repos or to all repos belonging to particular set of users/organizations (not just your" + " repos, but any repo). Check out this [guide](https://huggingface.co/docs/hub/webhooks) to get to" + " know more about webhooks on the Huggingface Hub." + ) + gr.Markdown( + f"{len(self.registered_webhooks)} webhook(s) are registered:" + + "\n\n" + + "\n ".join( + f"- [{webhook_path}]({_get_webhook_doc_url(webhook.__name__, webhook_path)})" + for webhook_path, webhook in self.registered_webhooks.items() + ) + ) + gr.Markdown( + "Go to https://huggingface.co/settings/webhooks to setup your webhooks." + + "\nYou app is running locally. Please look at the logs to check the full URL you need to set." + if _is_local + else ( + "\nThis app is running on a Space. You can find the corresponding URL in the options menu" + " (top-right) > 'Embed the Space'. The URL looks like 'https://{username}-{repo_name}.hf.space'." + ) + ) + return ui + + +@experimental +def webhook_endpoint(path: Optional[str] = None) -> Callable: + """Decorator to start a [`WebhooksServer`] and register the decorated function as a webhook endpoint. + + This is a helper to get started quickly. If you need more flexibility (custom landing page or webhook secret), + you can use [`WebhooksServer`] directly. You can register multiple webhook endpoints (to the same server) by using + this decorator multiple times. + + Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your + server and deploy it on a Space. + + > [!WARNING] + > `webhook_endpoint` is experimental. Its API is subject to change in the future. + + > [!WARNING] + > You must have `gradio` installed to use `webhook_endpoint` (`pip install --upgrade gradio`). + + Args: + path (`str`, optional): + The URL path to register the webhook function. If not provided, the function name will be used as the path. + In any case, all webhooks are registered under `/webhooks`. + + Examples: + The default usage is to register a function as a webhook endpoint. The function name will be used as the path. + The server will be started automatically at exit (i.e. at the end of the script). + + ```python + from huggingface_hub import webhook_endpoint, WebhookPayload + + @webhook_endpoint + async def trigger_training(payload: WebhookPayload): + if payload.repo.type == "dataset" and payload.event.action == "update": + # Trigger a training job if a dataset is updated + ... + + # Server is automatically started at the end of the script. + ``` + + Advanced usage: register a function as a webhook endpoint and start the server manually. This is useful if you + are running it in a notebook. + + ```python + from huggingface_hub import webhook_endpoint, WebhookPayload + + @webhook_endpoint + async def trigger_training(payload: WebhookPayload): + if payload.repo.type == "dataset" and payload.event.action == "update": + # Trigger a training job if a dataset is updated + ... + + # Start the server manually + trigger_training.launch() + ``` + """ + if callable(path): + # If path is a function, it means it was used as a decorator without arguments + return webhook_endpoint()(path) + + @wraps(WebhooksServer.add_webhook) + def _inner(func: Callable) -> Callable: + app = _get_global_app() + app.add_webhook(path)(func) + if len(app.registered_webhooks) == 1: + # Register `app.launch` to run at exit (only once) + atexit.register(app.launch) + + @wraps(app.launch) + def _launch_now(): + # Run the app directly (without waiting atexit) + atexit.unregister(app.launch) + app.launch() + + func.launch = _launch_now # type: ignore + return func + + return _inner + + +def _get_global_app() -> WebhooksServer: + global _global_app + if _global_app is None: + _global_app = WebhooksServer() + return _global_app + + +def _warn_on_empty_secret(webhook_secret: Optional[str]) -> None: + if webhook_secret is None: + print("Webhook secret is not defined. This means your webhook endpoints will be open to everyone.") + print( + "To add a secret, set `WEBHOOK_SECRET` as environment variable or pass it at initialization: " + "\n\t`app = WebhooksServer(webhook_secret='my_secret', ...)`" + ) + print( + "For more details about webhook secrets, please refer to" + " https://huggingface.co/docs/hub/webhooks#webhook-secret." + ) + else: + print("Webhook secret is correctly defined.") + + +def _get_webhook_doc_url(webhook_name: str, webhook_path: str) -> str: + """Returns the anchor to a given webhook in the docs (experimental)""" + return "/docs#/default/" + webhook_name + webhook_path.replace("/", "_") + "_post" + + +def _wrap_webhook_to_check_secret(func: Callable, webhook_secret: str) -> Callable: + """Wraps a webhook function to check the webhook secret before calling the function. + + This is a hacky way to add the `request` parameter to the function signature. Since FastAPI based itself on route + parameters to inject the values to the function, we need to hack the function signature to retrieve the `Request` + object (and hence the headers). A far cleaner solution would be to use a middleware. However, since + `fastapi==0.90.1`, a middleware cannot be added once the app has started. And since the FastAPI app is started by + Gradio internals (and not by us), we cannot add a middleware. + + This method is called only when a secret has been defined by the user. If a request is sent without the + "x-webhook-secret", the function will return a 401 error (unauthorized). If the header is sent but is incorrect, + the function will return a 403 error (forbidden). + + Inspired by https://stackoverflow.com/a/33112180. + """ + initial_sig = inspect.signature(func) + + @wraps(func) + async def _protected_func(request: Request, **kwargs): + request_secret = request.headers.get("x-webhook-secret") + if request_secret is None: + return JSONResponse({"error": "x-webhook-secret header not set."}, status_code=401) + if request_secret != webhook_secret: + return JSONResponse({"error": "Invalid webhook secret."}, status_code=403) + + # Inject `request` in kwargs if required + if "request" in initial_sig.parameters: + kwargs["request"] = request + + # Handle both sync and async routes + if inspect.iscoroutinefunction(func): + return await func(**kwargs) + else: + return func(**kwargs) + + # Update signature to include request + if "request" not in initial_sig.parameters: + _protected_func.__signature__ = initial_sig.replace( # type: ignore + parameters=( + inspect.Parameter(name="request", kind=inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=Request), + ) + + tuple(initial_sig.parameters.values()) + ) + + # Return protected route + return _protected_func diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/community.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/community.py new file mode 100644 index 0000000000000000000000000000000000000000..ffaab355174689b1dfb5b1c95f06fc088859d4cf --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/community.py @@ -0,0 +1,363 @@ +""" +Data structures to interact with Discussions and Pull Requests on the Hub. + +See [the Discussions and Pull Requests guide](https://huggingface.co/docs/hub/repositories-pull-requests-discussions) +for more information on Pull Requests, Discussions, and the community tab. +""" + +from dataclasses import dataclass +from datetime import datetime +from typing import List, Literal, Optional, TypedDict, Union + +from . import constants +from .utils import parse_datetime + + +DiscussionStatus = Literal["open", "closed", "merged", "draft"] + + +@dataclass +class Discussion: + """ + A Discussion or Pull Request on the Hub. + + This dataclass is not intended to be instantiated directly. + + Attributes: + title (`str`): + The title of the Discussion / Pull Request + status (`str`): + The status of the Discussion / Pull Request. + It must be one of: + * `"open"` + * `"closed"` + * `"merged"` (only for Pull Requests ) + * `"draft"` (only for Pull Requests ) + num (`int`): + The number of the Discussion / Pull Request. + repo_id (`str`): + The id (`"{namespace}/{repo_name}"`) of the repo on which + the Discussion / Pull Request was open. + repo_type (`str`): + The type of the repo on which the Discussion / Pull Request was open. + Possible values are: `"model"`, `"dataset"`, `"space"`. + author (`str`): + The username of the Discussion / Pull Request author. + Can be `"deleted"` if the user has been deleted since. + is_pull_request (`bool`): + Whether or not this is a Pull Request. + created_at (`datetime`): + The `datetime` of creation of the Discussion / Pull Request. + endpoint (`str`): + Endpoint of the Hub. Default is https://huggingface.co. + git_reference (`str`, *optional*): + (property) Git reference to which changes can be pushed if this is a Pull Request, `None` otherwise. + url (`str`): + (property) URL of the discussion on the Hub. + """ + + title: str + status: DiscussionStatus + num: int + repo_id: str + repo_type: str + author: str + is_pull_request: bool + created_at: datetime + endpoint: str + + @property + def git_reference(self) -> Optional[str]: + """ + If this is a Pull Request , returns the git reference to which changes can be pushed. + Returns `None` otherwise. + """ + if self.is_pull_request: + return f"refs/pr/{self.num}" + return None + + @property + def url(self) -> str: + """Returns the URL of the discussion on the Hub.""" + if self.repo_type is None or self.repo_type == constants.REPO_TYPE_MODEL: + return f"{self.endpoint}/{self.repo_id}/discussions/{self.num}" + return f"{self.endpoint}/{self.repo_type}s/{self.repo_id}/discussions/{self.num}" + + +@dataclass +class DiscussionWithDetails(Discussion): + """ + Subclass of [`Discussion`]. + + Attributes: + title (`str`): + The title of the Discussion / Pull Request + status (`str`): + The status of the Discussion / Pull Request. + It can be one of: + * `"open"` + * `"closed"` + * `"merged"` (only for Pull Requests ) + * `"draft"` (only for Pull Requests ) + num (`int`): + The number of the Discussion / Pull Request. + repo_id (`str`): + The id (`"{namespace}/{repo_name}"`) of the repo on which + the Discussion / Pull Request was open. + repo_type (`str`): + The type of the repo on which the Discussion / Pull Request was open. + Possible values are: `"model"`, `"dataset"`, `"space"`. + author (`str`): + The username of the Discussion / Pull Request author. + Can be `"deleted"` if the user has been deleted since. + is_pull_request (`bool`): + Whether or not this is a Pull Request. + created_at (`datetime`): + The `datetime` of creation of the Discussion / Pull Request. + events (`list` of [`DiscussionEvent`]) + The list of [`DiscussionEvents`] in this Discussion or Pull Request. + conflicting_files (`Union[List[str], bool, None]`, *optional*): + A list of conflicting files if this is a Pull Request. + `None` if `self.is_pull_request` is `False`. + `True` if there are conflicting files but the list can't be retrieved. + target_branch (`str`, *optional*): + The branch into which changes are to be merged if this is a + Pull Request . `None` if `self.is_pull_request` is `False`. + merge_commit_oid (`str`, *optional*): + If this is a merged Pull Request , this is set to the OID / SHA of + the merge commit, `None` otherwise. + diff (`str`, *optional*): + The git diff if this is a Pull Request , `None` otherwise. + endpoint (`str`): + Endpoint of the Hub. Default is https://huggingface.co. + git_reference (`str`, *optional*): + (property) Git reference to which changes can be pushed if this is a Pull Request, `None` otherwise. + url (`str`): + (property) URL of the discussion on the Hub. + """ + + events: List["DiscussionEvent"] + conflicting_files: Union[List[str], bool, None] + target_branch: Optional[str] + merge_commit_oid: Optional[str] + diff: Optional[str] + + +class DiscussionEventArgs(TypedDict): + id: str + type: str + created_at: datetime + author: str + _event: dict + + +@dataclass +class DiscussionEvent: + """ + An event in a Discussion or Pull Request. + + Use concrete classes: + * [`DiscussionComment`] + * [`DiscussionStatusChange`] + * [`DiscussionCommit`] + * [`DiscussionTitleChange`] + + Attributes: + id (`str`): + The ID of the event. An hexadecimal string. + type (`str`): + The type of the event. + created_at (`datetime`): + A [`datetime`](https://docs.python.org/3/library/datetime.html?highlight=datetime#datetime.datetime) + object holding the creation timestamp for the event. + author (`str`): + The username of the Discussion / Pull Request author. + Can be `"deleted"` if the user has been deleted since. + """ + + id: str + type: str + created_at: datetime + author: str + + _event: dict + """Stores the original event data, in case we need to access it later.""" + + +@dataclass +class DiscussionComment(DiscussionEvent): + """A comment in a Discussion / Pull Request. + + Subclass of [`DiscussionEvent`]. + + + Attributes: + id (`str`): + The ID of the event. An hexadecimal string. + type (`str`): + The type of the event. + created_at (`datetime`): + A [`datetime`](https://docs.python.org/3/library/datetime.html?highlight=datetime#datetime.datetime) + object holding the creation timestamp for the event. + author (`str`): + The username of the Discussion / Pull Request author. + Can be `"deleted"` if the user has been deleted since. + content (`str`): + The raw markdown content of the comment. Mentions, links and images are not rendered. + edited (`bool`): + Whether or not this comment has been edited. + hidden (`bool`): + Whether or not this comment has been hidden. + """ + + content: str + edited: bool + hidden: bool + + @property + def rendered(self) -> str: + """The rendered comment, as a HTML string""" + return self._event["data"]["latest"]["html"] + + @property + def last_edited_at(self) -> datetime: + """The last edit time, as a `datetime` object.""" + return parse_datetime(self._event["data"]["latest"]["updatedAt"]) + + @property + def last_edited_by(self) -> str: + """The last edit time, as a `datetime` object.""" + return self._event["data"]["latest"].get("author", {}).get("name", "deleted") + + @property + def edit_history(self) -> List[dict]: + """The edit history of the comment""" + return self._event["data"]["history"] + + @property + def number_of_edits(self) -> int: + return len(self.edit_history) + + +@dataclass +class DiscussionStatusChange(DiscussionEvent): + """A change of status in a Discussion / Pull Request. + + Subclass of [`DiscussionEvent`]. + + Attributes: + id (`str`): + The ID of the event. An hexadecimal string. + type (`str`): + The type of the event. + created_at (`datetime`): + A [`datetime`](https://docs.python.org/3/library/datetime.html?highlight=datetime#datetime.datetime) + object holding the creation timestamp for the event. + author (`str`): + The username of the Discussion / Pull Request author. + Can be `"deleted"` if the user has been deleted since. + new_status (`str`): + The status of the Discussion / Pull Request after the change. + It can be one of: + * `"open"` + * `"closed"` + * `"merged"` (only for Pull Requests ) + """ + + new_status: str + + +@dataclass +class DiscussionCommit(DiscussionEvent): + """A commit in a Pull Request. + + Subclass of [`DiscussionEvent`]. + + Attributes: + id (`str`): + The ID of the event. An hexadecimal string. + type (`str`): + The type of the event. + created_at (`datetime`): + A [`datetime`](https://docs.python.org/3/library/datetime.html?highlight=datetime#datetime.datetime) + object holding the creation timestamp for the event. + author (`str`): + The username of the Discussion / Pull Request author. + Can be `"deleted"` if the user has been deleted since. + summary (`str`): + The summary of the commit. + oid (`str`): + The OID / SHA of the commit, as a hexadecimal string. + """ + + summary: str + oid: str + + +@dataclass +class DiscussionTitleChange(DiscussionEvent): + """A rename event in a Discussion / Pull Request. + + Subclass of [`DiscussionEvent`]. + + Attributes: + id (`str`): + The ID of the event. An hexadecimal string. + type (`str`): + The type of the event. + created_at (`datetime`): + A [`datetime`](https://docs.python.org/3/library/datetime.html?highlight=datetime#datetime.datetime) + object holding the creation timestamp for the event. + author (`str`): + The username of the Discussion / Pull Request author. + Can be `"deleted"` if the user has been deleted since. + old_title (`str`): + The previous title for the Discussion / Pull Request. + new_title (`str`): + The new title. + """ + + old_title: str + new_title: str + + +def deserialize_event(event: dict) -> DiscussionEvent: + """Instantiates a [`DiscussionEvent`] from a dict""" + event_id: str = event["id"] + event_type: str = event["type"] + created_at = parse_datetime(event["createdAt"]) + + common_args: DiscussionEventArgs = { + "id": event_id, + "type": event_type, + "created_at": created_at, + "author": event.get("author", {}).get("name", "deleted"), + "_event": event, + } + + if event_type == "comment": + return DiscussionComment( + **common_args, + edited=event["data"]["edited"], + hidden=event["data"]["hidden"], + content=event["data"]["latest"]["raw"], + ) + if event_type == "status-change": + return DiscussionStatusChange( + **common_args, + new_status=event["data"]["status"], + ) + if event_type == "commit": + return DiscussionCommit( + **common_args, + summary=event["data"]["subject"], + oid=event["data"]["oid"], + ) + if event_type == "title-change": + return DiscussionTitleChange( + **common_args, + old_title=event["data"]["from"], + new_title=event["data"]["to"], + ) + + return DiscussionEvent(**common_args) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/constants.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..b30b2c01d99c5ee5428875f3711227024f5d0829 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/constants.py @@ -0,0 +1,294 @@ +import os +import re +import typing +from typing import Literal, Optional, Tuple + + +# Possible values for env variables + + +ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"} +ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"}) + + +def _is_true(value: Optional[str]) -> bool: + if value is None: + return False + return value.upper() in ENV_VARS_TRUE_VALUES + + +def _as_int(value: Optional[str]) -> Optional[int]: + if value is None: + return None + return int(value) + + +# Constants for file downloads + +PYTORCH_WEIGHTS_NAME = "pytorch_model.bin" +TF2_WEIGHTS_NAME = "tf_model.h5" +TF_WEIGHTS_NAME = "model.ckpt" +FLAX_WEIGHTS_NAME = "flax_model.msgpack" +CONFIG_NAME = "config.json" +REPOCARD_NAME = "README.md" +DEFAULT_ETAG_TIMEOUT = 10 +DEFAULT_DOWNLOAD_TIMEOUT = 10 +DEFAULT_REQUEST_TIMEOUT = 10 +DOWNLOAD_CHUNK_SIZE = 10 * 1024 * 1024 +HF_TRANSFER_CONCURRENCY = 100 +MAX_HTTP_DOWNLOAD_SIZE = 50 * 1000 * 1000 * 1000 # 50 GB + +# Constants for serialization + +PYTORCH_WEIGHTS_FILE_PATTERN = "pytorch_model{suffix}.bin" # Unsafe pickle: use safetensors instead +SAFETENSORS_WEIGHTS_FILE_PATTERN = "model{suffix}.safetensors" +TF2_WEIGHTS_FILE_PATTERN = "tf_model{suffix}.h5" + +# Constants for safetensors repos + +SAFETENSORS_SINGLE_FILE = "model.safetensors" +SAFETENSORS_INDEX_FILE = "model.safetensors.index.json" +SAFETENSORS_MAX_HEADER_LENGTH = 25_000_000 + +# Timeout of aquiring file lock and logging the attempt +FILELOCK_LOG_EVERY_SECONDS = 10 + +# Git-related constants + +DEFAULT_REVISION = "main" +REGEX_COMMIT_OID = re.compile(r"[A-Fa-f0-9]{5,40}") + +HUGGINGFACE_CO_URL_HOME = "https://huggingface.co/" + +_staging_mode = _is_true(os.environ.get("HUGGINGFACE_CO_STAGING")) + +_HF_DEFAULT_ENDPOINT = "https://huggingface.co" +_HF_DEFAULT_STAGING_ENDPOINT = "https://hub-ci.huggingface.co" +ENDPOINT = os.getenv("HF_ENDPOINT", _HF_DEFAULT_ENDPOINT).rstrip("/") +HUGGINGFACE_CO_URL_TEMPLATE = ENDPOINT + "/{repo_id}/resolve/{revision}/{filename}" + +if _staging_mode: + ENDPOINT = _HF_DEFAULT_STAGING_ENDPOINT + HUGGINGFACE_CO_URL_TEMPLATE = _HF_DEFAULT_STAGING_ENDPOINT + "/{repo_id}/resolve/{revision}/{filename}" + +HUGGINGFACE_HEADER_X_REPO_COMMIT = "X-Repo-Commit" +HUGGINGFACE_HEADER_X_LINKED_ETAG = "X-Linked-Etag" +HUGGINGFACE_HEADER_X_LINKED_SIZE = "X-Linked-Size" +HUGGINGFACE_HEADER_X_BILL_TO = "X-HF-Bill-To" + +INFERENCE_ENDPOINT = os.environ.get("HF_INFERENCE_ENDPOINT", "https://api-inference.huggingface.co") + +# See https://huggingface.co/docs/inference-endpoints/index +INFERENCE_ENDPOINTS_ENDPOINT = "https://api.endpoints.huggingface.cloud/v2" +INFERENCE_CATALOG_ENDPOINT = "https://endpoints.huggingface.co/api/catalog" + +# See https://api.endpoints.huggingface.cloud/#post-/v2/endpoint/-namespace- +INFERENCE_ENDPOINT_IMAGE_KEYS = [ + "custom", + "huggingface", + "huggingfaceNeuron", + "llamacpp", + "tei", + "tgi", + "tgiNeuron", +] + +# Proxy for third-party providers +INFERENCE_PROXY_TEMPLATE = "https://router.huggingface.co/{provider}" + +REPO_ID_SEPARATOR = "--" +# ^ this substring is not allowed in repo_ids on hf.co +# and is the canonical one we use for serialization of repo ids elsewhere. + + +REPO_TYPE_DATASET = "dataset" +REPO_TYPE_SPACE = "space" +REPO_TYPE_MODEL = "model" +REPO_TYPES = [None, REPO_TYPE_MODEL, REPO_TYPE_DATASET, REPO_TYPE_SPACE] +SPACES_SDK_TYPES = ["gradio", "streamlit", "docker", "static"] + +REPO_TYPES_URL_PREFIXES = { + REPO_TYPE_DATASET: "datasets/", + REPO_TYPE_SPACE: "spaces/", +} +REPO_TYPES_MAPPING = { + "datasets": REPO_TYPE_DATASET, + "spaces": REPO_TYPE_SPACE, + "models": REPO_TYPE_MODEL, +} + +DiscussionTypeFilter = Literal["all", "discussion", "pull_request"] +DISCUSSION_TYPES: Tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionTypeFilter) +DiscussionStatusFilter = Literal["all", "open", "closed"] +DISCUSSION_STATUS: Tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionStatusFilter) + +# Webhook subscription types +WEBHOOK_DOMAIN_T = Literal["repo", "discussions"] + +# default cache +default_home = os.path.join(os.path.expanduser("~"), ".cache") +HF_HOME = os.path.expandvars( + os.path.expanduser( + os.getenv( + "HF_HOME", + os.path.join(os.getenv("XDG_CACHE_HOME", default_home), "huggingface"), + ) + ) +) +hf_cache_home = HF_HOME # for backward compatibility. TODO: remove this in 1.0.0 + +default_cache_path = os.path.join(HF_HOME, "hub") +default_assets_cache_path = os.path.join(HF_HOME, "assets") + +# Legacy env variables +HUGGINGFACE_HUB_CACHE = os.getenv("HUGGINGFACE_HUB_CACHE", default_cache_path) +HUGGINGFACE_ASSETS_CACHE = os.getenv("HUGGINGFACE_ASSETS_CACHE", default_assets_cache_path) + +# New env variables +HF_HUB_CACHE = os.path.expandvars( + os.path.expanduser( + os.getenv( + "HF_HUB_CACHE", + HUGGINGFACE_HUB_CACHE, + ) + ) +) +HF_ASSETS_CACHE = os.path.expandvars( + os.path.expanduser( + os.getenv( + "HF_ASSETS_CACHE", + HUGGINGFACE_ASSETS_CACHE, + ) + ) +) + +HF_HUB_OFFLINE = _is_true(os.environ.get("HF_HUB_OFFLINE") or os.environ.get("TRANSFORMERS_OFFLINE")) + +# If set, log level will be set to DEBUG and all requests made to the Hub will be logged +# as curl commands for reproducibility. +HF_DEBUG = _is_true(os.environ.get("HF_DEBUG")) + +# Opt-out from telemetry requests +HF_HUB_DISABLE_TELEMETRY = ( + _is_true(os.environ.get("HF_HUB_DISABLE_TELEMETRY")) # HF-specific env variable + or _is_true(os.environ.get("DISABLE_TELEMETRY")) + or _is_true(os.environ.get("DO_NOT_TRACK")) # https://consoledonottrack.com/ +) + +HF_TOKEN_PATH = os.path.expandvars( + os.path.expanduser( + os.getenv( + "HF_TOKEN_PATH", + os.path.join(HF_HOME, "token"), + ) + ) +) +HF_STORED_TOKENS_PATH = os.path.join(os.path.dirname(HF_TOKEN_PATH), "stored_tokens") + +if _staging_mode: + # In staging mode, we use a different cache to ensure we don't mix up production and staging data or tokens + # In practice in `huggingface_hub` tests, we monkeypatch these values with temporary directories. The following + # lines are only used in third-party libraries tests (e.g. `transformers`, `diffusers`, etc.). + _staging_home = os.path.join(os.path.expanduser("~"), ".cache", "huggingface_staging") + HUGGINGFACE_HUB_CACHE = os.path.join(_staging_home, "hub") + HF_TOKEN_PATH = os.path.join(_staging_home, "token") + +# Here, `True` will disable progress bars globally without possibility of enabling it +# programmatically. `False` will enable them without possibility of disabling them. +# If environment variable is not set (None), then the user is free to enable/disable +# them programmatically. +# TL;DR: env variable has priority over code +__HF_HUB_DISABLE_PROGRESS_BARS = os.environ.get("HF_HUB_DISABLE_PROGRESS_BARS") +HF_HUB_DISABLE_PROGRESS_BARS: Optional[bool] = ( + _is_true(__HF_HUB_DISABLE_PROGRESS_BARS) if __HF_HUB_DISABLE_PROGRESS_BARS is not None else None +) + +# Disable warning on machines that do not support symlinks (e.g. Windows non-developer) +HF_HUB_DISABLE_SYMLINKS_WARNING: bool = _is_true(os.environ.get("HF_HUB_DISABLE_SYMLINKS_WARNING")) + +# Disable warning when using experimental features +HF_HUB_DISABLE_EXPERIMENTAL_WARNING: bool = _is_true(os.environ.get("HF_HUB_DISABLE_EXPERIMENTAL_WARNING")) + +# Disable sending the cached token by default is all HTTP requests to the Hub +HF_HUB_DISABLE_IMPLICIT_TOKEN: bool = _is_true(os.environ.get("HF_HUB_DISABLE_IMPLICIT_TOKEN")) + +# Enable fast-download using external dependency "hf_transfer" +# See: +# - https://pypi.org/project/hf-transfer/ +# - https://github.com/huggingface/hf_transfer (private) +HF_HUB_ENABLE_HF_TRANSFER: bool = _is_true(os.environ.get("HF_HUB_ENABLE_HF_TRANSFER")) + + +# UNUSED +# We don't use symlinks in local dir anymore. +HF_HUB_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD: int = ( + _as_int(os.environ.get("HF_HUB_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD")) or 5 * 1024 * 1024 +) + +# Used to override the etag timeout on a system level +HF_HUB_ETAG_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_ETAG_TIMEOUT")) or DEFAULT_ETAG_TIMEOUT + +# Used to override the get request timeout on a system level +HF_HUB_DOWNLOAD_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_DOWNLOAD_TIMEOUT")) or DEFAULT_DOWNLOAD_TIMEOUT + +# Allows to add information about the requester in the user-agent (eg. partner name) +HF_HUB_USER_AGENT_ORIGIN: Optional[str] = os.environ.get("HF_HUB_USER_AGENT_ORIGIN") + +# List frameworks that are handled by the InferenceAPI service. Useful to scan endpoints and check which models are +# deployed and running. Since 95% of the models are using the top 4 frameworks listed below, we scan only those by +# default. We still keep the full list of supported frameworks in case we want to scan all of them. +MAIN_INFERENCE_API_FRAMEWORKS = [ + "diffusers", + "sentence-transformers", + "text-generation-inference", + "transformers", +] + +ALL_INFERENCE_API_FRAMEWORKS = MAIN_INFERENCE_API_FRAMEWORKS + [ + "adapter-transformers", + "allennlp", + "asteroid", + "bertopic", + "doctr", + "espnet", + "fairseq", + "fastai", + "fasttext", + "flair", + "k2", + "keras", + "mindspore", + "nemo", + "open_clip", + "paddlenlp", + "peft", + "pyannote-audio", + "sklearn", + "spacy", + "span-marker", + "speechbrain", + "stanza", + "timm", +] + +# If OAuth didn't work after 2 redirects, there's likely a third-party cookie issue in the Space iframe view. +# In this case, we redirect the user to the non-iframe view. +OAUTH_MAX_REDIRECTS = 2 + +# OAuth-related environment variables injected by the Space +OAUTH_CLIENT_ID = os.environ.get("OAUTH_CLIENT_ID") +OAUTH_CLIENT_SECRET = os.environ.get("OAUTH_CLIENT_SECRET") +OAUTH_SCOPES = os.environ.get("OAUTH_SCOPES") +OPENID_PROVIDER_URL = os.environ.get("OPENID_PROVIDER_URL") + +# Xet constants +HUGGINGFACE_HEADER_X_XET_ENDPOINT = "X-Xet-Cas-Url" +HUGGINGFACE_HEADER_X_XET_ACCESS_TOKEN = "X-Xet-Access-Token" +HUGGINGFACE_HEADER_X_XET_EXPIRATION = "X-Xet-Token-Expiration" +HUGGINGFACE_HEADER_X_XET_HASH = "X-Xet-Hash" +HUGGINGFACE_HEADER_X_XET_REFRESH_ROUTE = "X-Xet-Refresh-Route" +HUGGINGFACE_HEADER_LINK_XET_AUTH_KEY = "xet-auth" + +default_xet_cache_path = os.path.join(HF_HOME, "xet") +HF_XET_CACHE = os.getenv("HF_XET_CACHE", default_xet_cache_path) +HF_HUB_DISABLE_XET: bool = _is_true(os.environ.get("HF_HUB_DISABLE_XET")) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/dataclasses.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/dataclasses.py new file mode 100644 index 0000000000000000000000000000000000000000..636a0ac64b327448e6f8f56b10add54528071f29 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/dataclasses.py @@ -0,0 +1,484 @@ +import inspect +from dataclasses import _MISSING_TYPE, MISSING, Field, field, fields +from functools import wraps +from typing import ( + Any, + Callable, + Dict, + ForwardRef, + List, + Literal, + Optional, + Tuple, + Type, + TypeVar, + Union, + get_args, + get_origin, + overload, +) + +from .errors import ( + StrictDataclassClassValidationError, + StrictDataclassDefinitionError, + StrictDataclassFieldValidationError, +) + + +Validator_T = Callable[[Any], None] +T = TypeVar("T") + + +# The overload decorator helps type checkers understand the different return types +@overload +def strict(cls: Type[T]) -> Type[T]: ... + + +@overload +def strict(*, accept_kwargs: bool = False) -> Callable[[Type[T]], Type[T]]: ... + + +def strict( + cls: Optional[Type[T]] = None, *, accept_kwargs: bool = False +) -> Union[Type[T], Callable[[Type[T]], Type[T]]]: + """ + Decorator to add strict validation to a dataclass. + + This decorator must be used on top of `@dataclass` to ensure IDEs and static typing tools + recognize the class as a dataclass. + + Can be used with or without arguments: + - `@strict` + - `@strict(accept_kwargs=True)` + + Args: + cls: + The class to convert to a strict dataclass. + accept_kwargs (`bool`, *optional*): + If True, allows arbitrary keyword arguments in `__init__`. Defaults to False. + + Returns: + The enhanced dataclass with strict validation on field assignment. + + Example: + ```py + >>> from dataclasses import dataclass + >>> from huggingface_hub.dataclasses import as_validated_field, strict, validated_field + + >>> @as_validated_field + >>> def positive_int(value: int): + ... if not value >= 0: + ... raise ValueError(f"Value must be positive, got {value}") + + >>> @strict(accept_kwargs=True) + ... @dataclass + ... class User: + ... name: str + ... age: int = positive_int(default=10) + + # Initialize + >>> User(name="John") + User(name='John', age=10) + + # Extra kwargs are accepted + >>> User(name="John", age=30, lastname="Doe") + User(name='John', age=30, *lastname='Doe') + + # Invalid type => raises + >>> User(name="John", age="30") + huggingface_hub.errors.StrictDataclassFieldValidationError: Validation error for field 'age': + TypeError: Field 'age' expected int, got str (value: '30') + + # Invalid value => raises + >>> User(name="John", age=-1) + huggingface_hub.errors.StrictDataclassFieldValidationError: Validation error for field 'age': + ValueError: Value must be positive, got -1 + ``` + """ + + def wrap(cls: Type[T]) -> Type[T]: + if not hasattr(cls, "__dataclass_fields__"): + raise StrictDataclassDefinitionError( + f"Class '{cls.__name__}' must be a dataclass before applying @strict." + ) + + # List and store validators + field_validators: Dict[str, List[Validator_T]] = {} + for f in fields(cls): # type: ignore [arg-type] + validators = [] + validators.append(_create_type_validator(f)) + custom_validator = f.metadata.get("validator") + if custom_validator is not None: + if not isinstance(custom_validator, list): + custom_validator = [custom_validator] + for validator in custom_validator: + if not _is_validator(validator): + raise StrictDataclassDefinitionError( + f"Invalid validator for field '{f.name}': {validator}. Must be a callable taking a single argument." + ) + validators.extend(custom_validator) + field_validators[f.name] = validators + cls.__validators__ = field_validators # type: ignore + + # Override __setattr__ to validate fields on assignment + original_setattr = cls.__setattr__ + + def __strict_setattr__(self: Any, name: str, value: Any) -> None: + """Custom __setattr__ method for strict dataclasses.""" + # Run all validators + for validator in self.__validators__.get(name, []): + try: + validator(value) + except (ValueError, TypeError) as e: + raise StrictDataclassFieldValidationError(field=name, cause=e) from e + + # If validation passed, set the attribute + original_setattr(self, name, value) + + cls.__setattr__ = __strict_setattr__ # type: ignore[method-assign] + + if accept_kwargs: + # (optional) Override __init__ to accept arbitrary keyword arguments + original_init = cls.__init__ + + @wraps(original_init) + def __init__(self, **kwargs: Any) -> None: + # Extract only the fields that are part of the dataclass + dataclass_fields = {f.name for f in fields(cls)} # type: ignore [arg-type] + standard_kwargs = {k: v for k, v in kwargs.items() if k in dataclass_fields} + + # Call the original __init__ with standard fields + original_init(self, **standard_kwargs) + + # Add any additional kwargs as attributes + for name, value in kwargs.items(): + if name not in dataclass_fields: + self.__setattr__(name, value) + + cls.__init__ = __init__ # type: ignore[method-assign] + + # (optional) Override __repr__ to include additional kwargs + original_repr = cls.__repr__ + + @wraps(original_repr) + def __repr__(self) -> str: + # Call the original __repr__ to get the standard fields + standard_repr = original_repr(self) + + # Get additional kwargs + additional_kwargs = [ + # add a '*' in front of additional kwargs to let the user know they are not part of the dataclass + f"*{k}={v!r}" + for k, v in self.__dict__.items() + if k not in cls.__dataclass_fields__ # type: ignore [attr-defined] + ] + additional_repr = ", ".join(additional_kwargs) + + # Combine both representations + return f"{standard_repr[:-1]}, {additional_repr})" if additional_kwargs else standard_repr + + cls.__repr__ = __repr__ # type: ignore [method-assign] + + # List all public methods starting with `validate_` => class validators. + class_validators = [] + + for name in dir(cls): + if not name.startswith("validate_"): + continue + method = getattr(cls, name) + if not callable(method): + continue + if len(inspect.signature(method).parameters) != 1: + raise StrictDataclassDefinitionError( + f"Class '{cls.__name__}' has a class validator '{name}' that takes more than one argument." + " Class validators must take only 'self' as an argument. Methods starting with 'validate_'" + " are considered to be class validators." + ) + class_validators.append(method) + + cls.__class_validators__ = class_validators # type: ignore [attr-defined] + + # Add `validate` method to the class, but first check if it already exists + def validate(self: T) -> None: + """Run class validators on the instance.""" + for validator in cls.__class_validators__: # type: ignore [attr-defined] + try: + validator(self) + except (ValueError, TypeError) as e: + raise StrictDataclassClassValidationError(validator=validator.__name__, cause=e) from e + + # Hack to be able to raise if `.validate()` already exists except if it was created by this decorator on a parent class + # (in which case we just override it) + validate.__is_defined_by_strict_decorator__ = True # type: ignore [attr-defined] + + if hasattr(cls, "validate"): + if not getattr(cls.validate, "__is_defined_by_strict_decorator__", False): # type: ignore [attr-defined] + raise StrictDataclassDefinitionError( + f"Class '{cls.__name__}' already implements a method called 'validate'." + " This method name is reserved when using the @strict decorator on a dataclass." + " If you want to keep your own method, please rename it." + ) + + cls.validate = validate # type: ignore + + # Run class validators after initialization + initial_init = cls.__init__ + + @wraps(initial_init) + def init_with_validate(self, *args, **kwargs) -> None: + """Run class validators after initialization.""" + initial_init(self, *args, **kwargs) # type: ignore [call-arg] + cls.validate(self) # type: ignore [attr-defined] + + setattr(cls, "__init__", init_with_validate) + + return cls + + # Return wrapped class or the decorator itself + return wrap(cls) if cls is not None else wrap + + +def validated_field( + validator: Union[List[Validator_T], Validator_T], + default: Union[Any, _MISSING_TYPE] = MISSING, + default_factory: Union[Callable[[], Any], _MISSING_TYPE] = MISSING, + init: bool = True, + repr: bool = True, + hash: Optional[bool] = None, + compare: bool = True, + metadata: Optional[Dict] = None, + **kwargs: Any, +) -> Any: + """ + Create a dataclass field with a custom validator. + + Useful to apply several checks to a field. If only applying one rule, check out the [`as_validated_field`] decorator. + + Args: + validator (`Callable` or `List[Callable]`): + A method that takes a value as input and raises ValueError/TypeError if the value is invalid. + Can be a list of validators to apply multiple checks. + **kwargs: + Additional arguments to pass to `dataclasses.field()`. + + Returns: + A field with the validator attached in metadata + """ + if not isinstance(validator, list): + validator = [validator] + if metadata is None: + metadata = {} + metadata["validator"] = validator + return field( # type: ignore + default=default, # type: ignore [arg-type] + default_factory=default_factory, # type: ignore [arg-type] + init=init, + repr=repr, + hash=hash, + compare=compare, + metadata=metadata, + **kwargs, + ) + + +def as_validated_field(validator: Validator_T): + """ + Decorates a validator function as a [`validated_field`] (i.e. a dataclass field with a custom validator). + + Args: + validator (`Callable`): + A method that takes a value as input and raises ValueError/TypeError if the value is invalid. + """ + + def _inner( + default: Union[Any, _MISSING_TYPE] = MISSING, + default_factory: Union[Callable[[], Any], _MISSING_TYPE] = MISSING, + init: bool = True, + repr: bool = True, + hash: Optional[bool] = None, + compare: bool = True, + metadata: Optional[Dict] = None, + **kwargs: Any, + ): + return validated_field( + validator, + default=default, + default_factory=default_factory, + init=init, + repr=repr, + hash=hash, + compare=compare, + metadata=metadata, + **kwargs, + ) + + return _inner + + +def type_validator(name: str, value: Any, expected_type: Any) -> None: + """Validate that 'value' matches 'expected_type'.""" + origin = get_origin(expected_type) + args = get_args(expected_type) + + if expected_type is Any: + return + elif validator := _BASIC_TYPE_VALIDATORS.get(origin): + validator(name, value, args) + elif isinstance(expected_type, type): # simple types + _validate_simple_type(name, value, expected_type) + elif isinstance(expected_type, ForwardRef) or isinstance(expected_type, str): + return + else: + raise TypeError(f"Unsupported type for field '{name}': {expected_type}") + + +def _validate_union(name: str, value: Any, args: Tuple[Any, ...]) -> None: + """Validate that value matches one of the types in a Union.""" + errors = [] + for t in args: + try: + type_validator(name, value, t) + return # Valid if any type matches + except TypeError as e: + errors.append(str(e)) + + raise TypeError( + f"Field '{name}' with value {repr(value)} doesn't match any type in {args}. Errors: {'; '.join(errors)}" + ) + + +def _validate_literal(name: str, value: Any, args: Tuple[Any, ...]) -> None: + """Validate Literal type.""" + if value not in args: + raise TypeError(f"Field '{name}' expected one of {args}, got {value}") + + +def _validate_list(name: str, value: Any, args: Tuple[Any, ...]) -> None: + """Validate List[T] type.""" + if not isinstance(value, list): + raise TypeError(f"Field '{name}' expected a list, got {type(value).__name__}") + + # Validate each item in the list + item_type = args[0] + for i, item in enumerate(value): + try: + type_validator(f"{name}[{i}]", item, item_type) + except TypeError as e: + raise TypeError(f"Invalid item at index {i} in list '{name}'") from e + + +def _validate_dict(name: str, value: Any, args: Tuple[Any, ...]) -> None: + """Validate Dict[K, V] type.""" + if not isinstance(value, dict): + raise TypeError(f"Field '{name}' expected a dict, got {type(value).__name__}") + + # Validate keys and values + key_type, value_type = args + for k, v in value.items(): + try: + type_validator(f"{name}.key", k, key_type) + type_validator(f"{name}[{k!r}]", v, value_type) + except TypeError as e: + raise TypeError(f"Invalid key or value in dict '{name}'") from e + + +def _validate_tuple(name: str, value: Any, args: Tuple[Any, ...]) -> None: + """Validate Tuple type.""" + if not isinstance(value, tuple): + raise TypeError(f"Field '{name}' expected a tuple, got {type(value).__name__}") + + # Handle variable-length tuples: Tuple[T, ...] + if len(args) == 2 and args[1] is Ellipsis: + for i, item in enumerate(value): + try: + type_validator(f"{name}[{i}]", item, args[0]) + except TypeError as e: + raise TypeError(f"Invalid item at index {i} in tuple '{name}'") from e + # Handle fixed-length tuples: Tuple[T1, T2, ...] + elif len(args) != len(value): + raise TypeError(f"Field '{name}' expected a tuple of length {len(args)}, got {len(value)}") + else: + for i, (item, expected) in enumerate(zip(value, args)): + try: + type_validator(f"{name}[{i}]", item, expected) + except TypeError as e: + raise TypeError(f"Invalid item at index {i} in tuple '{name}'") from e + + +def _validate_set(name: str, value: Any, args: Tuple[Any, ...]) -> None: + """Validate Set[T] type.""" + if not isinstance(value, set): + raise TypeError(f"Field '{name}' expected a set, got {type(value).__name__}") + + # Validate each item in the set + item_type = args[0] + for i, item in enumerate(value): + try: + type_validator(f"{name} item", item, item_type) + except TypeError as e: + raise TypeError(f"Invalid item in set '{name}'") from e + + +def _validate_simple_type(name: str, value: Any, expected_type: type) -> None: + """Validate simple type (int, str, etc.).""" + if not isinstance(value, expected_type): + raise TypeError( + f"Field '{name}' expected {expected_type.__name__}, got {type(value).__name__} (value: {repr(value)})" + ) + + +def _create_type_validator(field: Field) -> Validator_T: + """Create a type validator function for a field.""" + # Hacky: we cannot use a lambda here because of reference issues + + def validator(value: Any) -> None: + type_validator(field.name, value, field.type) + + return validator + + +def _is_validator(validator: Any) -> bool: + """Check if a function is a validator. + + A validator is a Callable that can be called with a single positional argument. + The validator can have more arguments with default values. + + Basically, returns True if `validator(value)` is possible. + """ + if not callable(validator): + return False + + signature = inspect.signature(validator) + parameters = list(signature.parameters.values()) + if len(parameters) == 0: + return False + if parameters[0].kind not in ( + inspect.Parameter.POSITIONAL_OR_KEYWORD, + inspect.Parameter.POSITIONAL_ONLY, + inspect.Parameter.VAR_POSITIONAL, + ): + return False + for parameter in parameters[1:]: + if parameter.default == inspect.Parameter.empty: + return False + return True + + +_BASIC_TYPE_VALIDATORS = { + Union: _validate_union, + Literal: _validate_literal, + list: _validate_list, + dict: _validate_dict, + tuple: _validate_tuple, + set: _validate_set, +} + + +__all__ = [ + "strict", + "validated_field", + "Validator_T", + "StrictDataclassClassValidationError", + "StrictDataclassDefinitionError", + "StrictDataclassFieldValidationError", +] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/errors.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/errors.py new file mode 100644 index 0000000000000000000000000000000000000000..e7cc5647ef02bade7f4eb81c93ca31825437af0e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/errors.py @@ -0,0 +1,379 @@ +"""Contains all custom errors.""" + +from pathlib import Path +from typing import Optional, Union + +from requests import HTTPError, Response + + +# CACHE ERRORS + + +class CacheNotFound(Exception): + """Exception thrown when the Huggingface cache is not found.""" + + cache_dir: Union[str, Path] + + def __init__(self, msg: str, cache_dir: Union[str, Path], *args, **kwargs): + super().__init__(msg, *args, **kwargs) + self.cache_dir = cache_dir + + +class CorruptedCacheException(Exception): + """Exception for any unexpected structure in the Huggingface cache-system.""" + + +# HEADERS ERRORS + + +class LocalTokenNotFoundError(EnvironmentError): + """Raised if local token is required but not found.""" + + +# HTTP ERRORS + + +class OfflineModeIsEnabled(ConnectionError): + """Raised when a request is made but `HF_HUB_OFFLINE=1` is set as environment variable.""" + + +class HfHubHTTPError(HTTPError): + """ + HTTPError to inherit from for any custom HTTP Error raised in HF Hub. + + Any HTTPError is converted at least into a `HfHubHTTPError`. If some information is + sent back by the server, it will be added to the error message. + + Added details: + - Request ID sourced from headers in order of precedence: "X-Request-Id", "X-Amzn-Trace-Id", "X-Amz-Cf-Id". + - Server error message from the header "X-Error-Message". + - Server error message if we can found one in the response body. + + Example: + ```py + import requests + from huggingface_hub.utils import get_session, hf_raise_for_status, HfHubHTTPError + + response = get_session().post(...) + try: + hf_raise_for_status(response) + except HfHubHTTPError as e: + print(str(e)) # formatted message + e.request_id, e.server_message # details returned by server + + # Complete the error message with additional information once it's raised + e.append_to_message("\n`create_commit` expects the repository to exist.") + raise + ``` + """ + + def __init__(self, message: str, response: Optional[Response] = None, *, server_message: Optional[str] = None): + self.request_id = ( + response.headers.get("x-request-id") + or response.headers.get("X-Amzn-Trace-Id") + or response.headers.get("x-amz-cf-id") + if response is not None + else None + ) + self.server_message = server_message + + super().__init__( + message, + response=response, # type: ignore [arg-type] + request=response.request if response is not None else None, # type: ignore [arg-type] + ) + + def append_to_message(self, additional_message: str) -> None: + """Append additional information to the `HfHubHTTPError` initial message.""" + self.args = (self.args[0] + additional_message,) + self.args[1:] + + +# INFERENCE CLIENT ERRORS + + +class InferenceTimeoutError(HTTPError, TimeoutError): + """Error raised when a model is unavailable or the request times out.""" + + +# INFERENCE ENDPOINT ERRORS + + +class InferenceEndpointError(Exception): + """Generic exception when dealing with Inference Endpoints.""" + + +class InferenceEndpointTimeoutError(InferenceEndpointError, TimeoutError): + """Exception for timeouts while waiting for Inference Endpoint.""" + + +# SAFETENSORS ERRORS + + +class SafetensorsParsingError(Exception): + """Raised when failing to parse a safetensors file metadata. + + This can be the case if the file is not a safetensors file or does not respect the specification. + """ + + +class NotASafetensorsRepoError(Exception): + """Raised when a repo is not a Safetensors repo i.e. doesn't have either a `model.safetensors` or a + `model.safetensors.index.json` file. + """ + + +# TEXT GENERATION ERRORS + + +class TextGenerationError(HTTPError): + """Generic error raised if text-generation went wrong.""" + + +# Text Generation Inference Errors +class ValidationError(TextGenerationError): + """Server-side validation error.""" + + +class GenerationError(TextGenerationError): + pass + + +class OverloadedError(TextGenerationError): + pass + + +class IncompleteGenerationError(TextGenerationError): + pass + + +class UnknownError(TextGenerationError): + pass + + +# VALIDATION ERRORS + + +class HFValidationError(ValueError): + """Generic exception thrown by `huggingface_hub` validators. + + Inherits from [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError). + """ + + +# FILE METADATA ERRORS + + +class FileMetadataError(OSError): + """Error triggered when the metadata of a file on the Hub cannot be retrieved (missing ETag or commit_hash). + + Inherits from `OSError` for backward compatibility. + """ + + +# REPOSITORY ERRORS + + +class RepositoryNotFoundError(HfHubHTTPError): + """ + Raised when trying to access a hf.co URL with an invalid repository name, or + with a private repo name the user does not have access to. + + Example: + + ```py + >>> from huggingface_hub import model_info + >>> model_info("") + (...) + huggingface_hub.utils._errors.RepositoryNotFoundError: 401 Client Error. (Request ID: PvMw_VjBMjVdMz53WKIzP) + + Repository Not Found for url: https://huggingface.co/api/models/%3Cnon_existent_repository%3E. + Please make sure you specified the correct `repo_id` and `repo_type`. + If the repo is private, make sure you are authenticated. + Invalid username or password. + ``` + """ + + +class GatedRepoError(RepositoryNotFoundError): + """ + Raised when trying to access a gated repository for which the user is not on the + authorized list. + + Note: derives from `RepositoryNotFoundError` to ensure backward compatibility. + + Example: + + ```py + >>> from huggingface_hub import model_info + >>> model_info("") + (...) + huggingface_hub.utils._errors.GatedRepoError: 403 Client Error. (Request ID: ViT1Bf7O_026LGSQuVqfa) + + Cannot access gated repo for url https://huggingface.co/api/models/ardent-figment/gated-model. + Access to model ardent-figment/gated-model is restricted and you are not in the authorized list. + Visit https://huggingface.co/ardent-figment/gated-model to ask for access. + ``` + """ + + +class DisabledRepoError(HfHubHTTPError): + """ + Raised when trying to access a repository that has been disabled by its author. + + Example: + + ```py + >>> from huggingface_hub import dataset_info + >>> dataset_info("laion/laion-art") + (...) + huggingface_hub.utils._errors.DisabledRepoError: 403 Client Error. (Request ID: Root=1-659fc3fa-3031673e0f92c71a2260dbe2;bc6f4dfb-b30a-4862-af0a-5cfe827610d8) + + Cannot access repository for url https://huggingface.co/api/datasets/laion/laion-art. + Access to this resource is disabled. + ``` + """ + + +# REVISION ERROR + + +class RevisionNotFoundError(HfHubHTTPError): + """ + Raised when trying to access a hf.co URL with a valid repository but an invalid + revision. + + Example: + + ```py + >>> from huggingface_hub import hf_hub_download + >>> hf_hub_download('bert-base-cased', 'config.json', revision='') + (...) + huggingface_hub.utils._errors.RevisionNotFoundError: 404 Client Error. (Request ID: Mwhe_c3Kt650GcdKEFomX) + + Revision Not Found for url: https://huggingface.co/bert-base-cased/resolve/%3Cnon-existent-revision%3E/config.json. + ``` + """ + + +# ENTRY ERRORS +class EntryNotFoundError(HfHubHTTPError): + """ + Raised when trying to access a hf.co URL with a valid repository and revision + but an invalid filename. + + Example: + + ```py + >>> from huggingface_hub import hf_hub_download + >>> hf_hub_download('bert-base-cased', '') + (...) + huggingface_hub.utils._errors.EntryNotFoundError: 404 Client Error. (Request ID: 53pNl6M0MxsnG5Sw8JA6x) + + Entry Not Found for url: https://huggingface.co/bert-base-cased/resolve/main/%3Cnon-existent-file%3E. + ``` + """ + + +class LocalEntryNotFoundError(EntryNotFoundError, FileNotFoundError, ValueError): + """ + Raised when trying to access a file or snapshot that is not on the disk when network is + disabled or unavailable (connection issue). The entry may exist on the Hub. + + Note: `ValueError` type is to ensure backward compatibility. + Note: `LocalEntryNotFoundError` derives from `HTTPError` because of `EntryNotFoundError` + even when it is not a network issue. + + Example: + + ```py + >>> from huggingface_hub import hf_hub_download + >>> hf_hub_download('bert-base-cased', '', local_files_only=True) + (...) + huggingface_hub.utils._errors.LocalEntryNotFoundError: Cannot find the requested files in the disk cache and outgoing traffic has been disabled. To enable hf.co look-ups and downloads online, set 'local_files_only' to False. + ``` + """ + + def __init__(self, message: str): + super().__init__(message, response=None) + + +# REQUEST ERROR +class BadRequestError(HfHubHTTPError, ValueError): + """ + Raised by `hf_raise_for_status` when the server returns a HTTP 400 error. + + Example: + + ```py + >>> resp = requests.post("hf.co/api/check", ...) + >>> hf_raise_for_status(resp, endpoint_name="check") + huggingface_hub.utils._errors.BadRequestError: Bad request for check endpoint: {details} (Request ID: XXX) + ``` + """ + + +# DDUF file format ERROR + + +class DDUFError(Exception): + """Base exception for errors related to the DDUF format.""" + + +class DDUFCorruptedFileError(DDUFError): + """Exception thrown when the DDUF file is corrupted.""" + + +class DDUFExportError(DDUFError): + """Base exception for errors during DDUF export.""" + + +class DDUFInvalidEntryNameError(DDUFExportError): + """Exception thrown when the entry name is invalid.""" + + +# STRICT DATACLASSES ERRORS + + +class StrictDataclassError(Exception): + """Base exception for strict dataclasses.""" + + +class StrictDataclassDefinitionError(StrictDataclassError): + """Exception thrown when a strict dataclass is defined incorrectly.""" + + +class StrictDataclassFieldValidationError(StrictDataclassError): + """Exception thrown when a strict dataclass fails validation for a given field.""" + + def __init__(self, field: str, cause: Exception): + error_message = f"Validation error for field '{field}':" + error_message += f"\n {cause.__class__.__name__}: {cause}" + super().__init__(error_message) + + +class StrictDataclassClassValidationError(StrictDataclassError): + """Exception thrown when a strict dataclass fails validation on a class validator.""" + + def __init__(self, validator: str, cause: Exception): + error_message = f"Class validation error for validator '{validator}':" + error_message += f"\n {cause.__class__.__name__}: {cause}" + super().__init__(error_message) + + +# XET ERRORS + + +class XetError(Exception): + """Base exception for errors related to Xet Storage.""" + + +class XetAuthorizationError(XetError): + """Exception thrown when the user does not have the right authorization to use Xet Storage.""" + + +class XetRefreshTokenError(XetError): + """Exception thrown when the refresh token is invalid.""" + + +class XetDownloadError(Exception): + """Exception thrown when the download from Xet Storage fails.""" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/fastai_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/fastai_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..fc3b42323a251140aac813da24493918be267472 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/fastai_utils.py @@ -0,0 +1,415 @@ +import json +import os +from pathlib import Path +from pickle import DEFAULT_PROTOCOL, PicklingError +from typing import Any, Dict, List, Optional, Union + +from packaging import version + +from huggingface_hub import constants, snapshot_download +from huggingface_hub.hf_api import HfApi +from huggingface_hub.utils import ( + SoftTemporaryDirectory, + get_fastai_version, + get_fastcore_version, + get_python_version, +) + +from .utils import logging, validate_hf_hub_args +from .utils._runtime import _PY_VERSION # noqa: F401 # for backward compatibility... + + +logger = logging.get_logger(__name__) + + +def _check_fastai_fastcore_versions( + fastai_min_version: str = "2.4", + fastcore_min_version: str = "1.3.27", +): + """ + Checks that the installed fastai and fastcore versions are compatible for pickle serialization. + + Args: + fastai_min_version (`str`, *optional*): + The minimum fastai version supported. + fastcore_min_version (`str`, *optional*): + The minimum fastcore version supported. + + > [!TIP] + > Raises the following error: + > + > - [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError) + > if the fastai or fastcore libraries are not available or are of an invalid version. + """ + + if (get_fastcore_version() or get_fastai_version()) == "N/A": + raise ImportError( + f"fastai>={fastai_min_version} and fastcore>={fastcore_min_version} are" + f" required. Currently using fastai=={get_fastai_version()} and" + f" fastcore=={get_fastcore_version()}." + ) + + current_fastai_version = version.Version(get_fastai_version()) + current_fastcore_version = version.Version(get_fastcore_version()) + + if current_fastai_version < version.Version(fastai_min_version): + raise ImportError( + "`push_to_hub_fastai` and `from_pretrained_fastai` require a" + f" fastai>={fastai_min_version} version, but you are using fastai version" + f" {get_fastai_version()} which is incompatible. Upgrade with `pip install" + " fastai==2.5.6`." + ) + + if current_fastcore_version < version.Version(fastcore_min_version): + raise ImportError( + "`push_to_hub_fastai` and `from_pretrained_fastai` require a" + f" fastcore>={fastcore_min_version} version, but you are using fastcore" + f" version {get_fastcore_version()} which is incompatible. Upgrade with" + " `pip install fastcore==1.3.27`." + ) + + +def _check_fastai_fastcore_pyproject_versions( + storage_folder: str, + fastai_min_version: str = "2.4", + fastcore_min_version: str = "1.3.27", +): + """ + Checks that the `pyproject.toml` file in the directory `storage_folder` has fastai and fastcore versions + that are compatible with `from_pretrained_fastai` and `push_to_hub_fastai`. If `pyproject.toml` does not exist + or does not contain versions for fastai and fastcore, then it logs a warning. + + Args: + storage_folder (`str`): + Folder to look for the `pyproject.toml` file. + fastai_min_version (`str`, *optional*): + The minimum fastai version supported. + fastcore_min_version (`str`, *optional*): + The minimum fastcore version supported. + + > [!TIP] + > Raises the following errors: + > + > - [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError) + > if the `toml` module is not installed. + > - [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError) + > if the `pyproject.toml` indicates a lower than minimum supported version of fastai or fastcore. + """ + + try: + import toml + except ModuleNotFoundError: + raise ImportError( + "`push_to_hub_fastai` and `from_pretrained_fastai` require the toml module." + " Install it with `pip install toml`." + ) + + # Checks that a `pyproject.toml`, with `build-system` and `requires` sections, exists in the repository. If so, get a list of required packages. + if not os.path.isfile(f"{storage_folder}/pyproject.toml"): + logger.warning( + "There is no `pyproject.toml` in the repository that contains the fastai" + " `Learner`. The `pyproject.toml` would allow us to verify that your fastai" + " and fastcore versions are compatible with those of the model you want to" + " load." + ) + return + pyproject_toml = toml.load(f"{storage_folder}/pyproject.toml") + + if "build-system" not in pyproject_toml.keys(): + logger.warning( + "There is no `build-system` section in the pyproject.toml of the repository" + " that contains the fastai `Learner`. The `build-system` would allow us to" + " verify that your fastai and fastcore versions are compatible with those" + " of the model you want to load." + ) + return + build_system_toml = pyproject_toml["build-system"] + + if "requires" not in build_system_toml.keys(): + logger.warning( + "There is no `requires` section in the pyproject.toml of the repository" + " that contains the fastai `Learner`. The `requires` would allow us to" + " verify that your fastai and fastcore versions are compatible with those" + " of the model you want to load." + ) + return + package_versions = build_system_toml["requires"] + + # Extracts contains fastai and fastcore versions from `pyproject.toml` if available. + # If the package is specified but not the version (e.g. "fastai" instead of "fastai=2.4"), the default versions are the highest. + fastai_packages = [pck for pck in package_versions if pck.startswith("fastai")] + if len(fastai_packages) == 0: + logger.warning("The repository does not have a fastai version specified in the `pyproject.toml`.") + # fastai_version is an empty string if not specified + else: + fastai_version = str(fastai_packages[0]).partition("=")[2] + if fastai_version != "" and version.Version(fastai_version) < version.Version(fastai_min_version): + raise ImportError( + "`from_pretrained_fastai` requires" + f" fastai>={fastai_min_version} version but the model to load uses" + f" {fastai_version} which is incompatible." + ) + + fastcore_packages = [pck for pck in package_versions if pck.startswith("fastcore")] + if len(fastcore_packages) == 0: + logger.warning("The repository does not have a fastcore version specified in the `pyproject.toml`.") + # fastcore_version is an empty string if not specified + else: + fastcore_version = str(fastcore_packages[0]).partition("=")[2] + if fastcore_version != "" and version.Version(fastcore_version) < version.Version(fastcore_min_version): + raise ImportError( + "`from_pretrained_fastai` requires" + f" fastcore>={fastcore_min_version} version, but you are using fastcore" + f" version {fastcore_version} which is incompatible." + ) + + +README_TEMPLATE = """--- +tags: +- fastai +--- + +# Amazing! + +🥳 Congratulations on hosting your fastai model on the Hugging Face Hub! + +# Some next steps +1. Fill out this model card with more information (see the template below and the [documentation here](https://huggingface.co/docs/hub/model-repos))! + +2. Create a demo in Gradio or Streamlit using 🤗 Spaces ([documentation here](https://huggingface.co/docs/hub/spaces)). + +3. Join the fastai community on the [Fastai Discord](https://discord.com/invite/YKrxeNn)! + +Greetings fellow fastlearner 🤝! Don't forget to delete this content from your model card. + + +--- + + +# Model card + +## Model description +More information needed + +## Intended uses & limitations +More information needed + +## Training and evaluation data +More information needed +""" + +PYPROJECT_TEMPLATE = f"""[build-system] +requires = ["setuptools>=40.8.0", "wheel", "python={get_python_version()}", "fastai={get_fastai_version()}", "fastcore={get_fastcore_version()}"] +build-backend = "setuptools.build_meta:__legacy__" +""" + + +def _create_model_card(repo_dir: Path): + """ + Creates a model card for the repository. + + Args: + repo_dir (`Path`): + Directory where model card is created. + """ + readme_path = repo_dir / "README.md" + + if not readme_path.exists(): + with readme_path.open("w", encoding="utf-8") as f: + f.write(README_TEMPLATE) + + +def _create_model_pyproject(repo_dir: Path): + """ + Creates a `pyproject.toml` for the repository. + + Args: + repo_dir (`Path`): + Directory where `pyproject.toml` is created. + """ + pyproject_path = repo_dir / "pyproject.toml" + + if not pyproject_path.exists(): + with pyproject_path.open("w", encoding="utf-8") as f: + f.write(PYPROJECT_TEMPLATE) + + +def _save_pretrained_fastai( + learner, + save_directory: Union[str, Path], + config: Optional[Dict[str, Any]] = None, +): + """ + Saves a fastai learner to `save_directory` in pickle format using the default pickle protocol for the version of python used. + + Args: + learner (`Learner`): + The `fastai.Learner` you'd like to save. + save_directory (`str` or `Path`): + Specific directory in which you want to save the fastai learner. + config (`dict`, *optional*): + Configuration object. Will be uploaded as a .json file. Example: 'https://huggingface.co/espejelomar/fastai-pet-breeds-classification/blob/main/config.json'. + + > [!TIP] + > Raises the following error: + > + > - [`RuntimeError`](https://docs.python.org/3/library/exceptions.html#RuntimeError) + > if the config file provided is not a dictionary. + """ + _check_fastai_fastcore_versions() + + os.makedirs(save_directory, exist_ok=True) + + # if the user provides config then we update it with the fastai and fastcore versions in CONFIG_TEMPLATE. + if config is not None: + if not isinstance(config, dict): + raise RuntimeError(f"Provided config should be a dict. Got: '{type(config)}'") + path = os.path.join(save_directory, constants.CONFIG_NAME) + with open(path, "w") as f: + json.dump(config, f) + + _create_model_card(Path(save_directory)) + _create_model_pyproject(Path(save_directory)) + + # learner.export saves the model in `self.path`. + learner.path = Path(save_directory) + os.makedirs(save_directory, exist_ok=True) + try: + learner.export( + fname="model.pkl", + pickle_protocol=DEFAULT_PROTOCOL, + ) + except PicklingError: + raise PicklingError( + "You are using a lambda function, i.e., an anonymous function. `pickle`" + " cannot pickle function objects and requires that all functions have" + " names. One possible solution is to name the function." + ) + + +@validate_hf_hub_args +def from_pretrained_fastai( + repo_id: str, + revision: Optional[str] = None, +): + """ + Load pretrained fastai model from the Hub or from a local directory. + + Args: + repo_id (`str`): + The location where the pickled fastai.Learner is. It can be either of the two: + - Hosted on the Hugging Face Hub. E.g.: 'espejelomar/fatai-pet-breeds-classification' or 'distilgpt2'. + You can add a `revision` by appending `@` at the end of `repo_id`. E.g.: `dbmdz/bert-base-german-cased@main`. + Revision is the specific model version to use. Since we use a git-based system for storing models and other + artifacts on the Hugging Face Hub, it can be a branch name, a tag name, or a commit id. + - Hosted locally. `repo_id` would be a directory containing the pickle and a pyproject.toml + indicating the fastai and fastcore versions used to build the `fastai.Learner`. E.g.: `./my_model_directory/`. + revision (`str`, *optional*): + Revision at which the repo's files are downloaded. See documentation of `snapshot_download`. + + Returns: + The `fastai.Learner` model in the `repo_id` repo. + """ + _check_fastai_fastcore_versions() + + # Load the `repo_id` repo. + # `snapshot_download` returns the folder where the model was stored. + # `cache_dir` will be the default '/root/.cache/huggingface/hub' + if not os.path.isdir(repo_id): + storage_folder = snapshot_download( + repo_id=repo_id, + revision=revision, + library_name="fastai", + library_version=get_fastai_version(), + ) + else: + storage_folder = repo_id + + _check_fastai_fastcore_pyproject_versions(storage_folder) + + from fastai.learner import load_learner # type: ignore + + return load_learner(os.path.join(storage_folder, "model.pkl")) + + +@validate_hf_hub_args +def push_to_hub_fastai( + learner, + *, + repo_id: str, + commit_message: str = "Push FastAI model using huggingface_hub.", + private: Optional[bool] = None, + token: Optional[str] = None, + config: Optional[dict] = None, + branch: Optional[str] = None, + create_pr: Optional[bool] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + delete_patterns: Optional[Union[List[str], str]] = None, + api_endpoint: Optional[str] = None, +): + """ + Upload learner checkpoint files to the Hub. + + Use `allow_patterns` and `ignore_patterns` to precisely filter which files should be pushed to the hub. Use + `delete_patterns` to delete existing remote files in the same commit. See [`upload_folder`] reference for more + details. + + Args: + learner (`Learner`): + The `fastai.Learner' you'd like to push to the Hub. + repo_id (`str`): + The repository id for your model in Hub in the format of "namespace/repo_name". The namespace can be your individual account or an organization to which you have write access (for example, 'stanfordnlp/stanza-de'). + commit_message (`str`, *optional*): + Message to commit while pushing. Will default to :obj:`"add model"`. + private (`bool`, *optional*): + Whether or not the repository created should be private. + If `None` (default), will default to been public except if the organization's default is private. + token (`str`, *optional*): + The Hugging Face account token to use as HTTP bearer authorization for remote files. If :obj:`None`, the token will be asked by a prompt. + config (`dict`, *optional*): + Configuration object to be saved alongside the model weights. + branch (`str`, *optional*): + The git branch on which to push the model. This defaults to + the default branch as specified in your repository, which + defaults to `"main"`. + create_pr (`boolean`, *optional*): + Whether or not to create a Pull Request from `branch` with that commit. + Defaults to `False`. + api_endpoint (`str`, *optional*): + The API endpoint to use when pushing the model to the hub. + allow_patterns (`List[str]` or `str`, *optional*): + If provided, only files matching at least one pattern are pushed. + ignore_patterns (`List[str]` or `str`, *optional*): + If provided, files matching any of the patterns are not pushed. + delete_patterns (`List[str]` or `str`, *optional*): + If provided, remote files matching any of the patterns will be deleted from the repo. + + Returns: + The url of the commit of your model in the given repository. + + > [!TIP] + > Raises the following error: + > + > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + > if the user is not log on to the Hugging Face Hub. + """ + _check_fastai_fastcore_versions() + api = HfApi(endpoint=api_endpoint) + repo_id = api.create_repo(repo_id=repo_id, token=token, private=private, exist_ok=True).repo_id + + # Push the files to the repo in a single commit + with SoftTemporaryDirectory() as tmp: + saved_path = Path(tmp) / repo_id + _save_pretrained_fastai(learner, saved_path, config=config) + return api.upload_folder( + repo_id=repo_id, + token=token, + folder_path=saved_path, + commit_message=commit_message, + revision=branch, + create_pr=create_pr, + allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, + delete_patterns=delete_patterns, + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/file_download.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/file_download.py new file mode 100644 index 0000000000000000000000000000000000000000..aff7236b4da41271e77a3cd58e84352904362ee9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/file_download.py @@ -0,0 +1,1820 @@ +import copy +import errno +import inspect +import os +import re +import shutil +import stat +import time +import uuid +import warnings +from dataclasses import dataclass +from pathlib import Path +from typing import Any, BinaryIO, Dict, Literal, NoReturn, Optional, Tuple, Union +from urllib.parse import quote, urlparse + +import requests + +from . import ( + __version__, # noqa: F401 # for backward compatibility + constants, +) +from ._local_folder import get_local_download_paths, read_download_metadata, write_download_metadata +from .constants import ( + HUGGINGFACE_CO_URL_TEMPLATE, # noqa: F401 # for backward compatibility + HUGGINGFACE_HUB_CACHE, # noqa: F401 # for backward compatibility +) +from .errors import ( + EntryNotFoundError, + FileMetadataError, + GatedRepoError, + HfHubHTTPError, + LocalEntryNotFoundError, + RepositoryNotFoundError, + RevisionNotFoundError, +) +from .utils import ( + OfflineModeIsEnabled, + SoftTemporaryDirectory, + WeakFileLock, + XetFileData, + build_hf_headers, + get_fastai_version, # noqa: F401 # for backward compatibility + get_fastcore_version, # noqa: F401 # for backward compatibility + get_graphviz_version, # noqa: F401 # for backward compatibility + get_jinja_version, # noqa: F401 # for backward compatibility + get_pydot_version, # noqa: F401 # for backward compatibility + get_tf_version, # noqa: F401 # for backward compatibility + get_torch_version, # noqa: F401 # for backward compatibility + hf_raise_for_status, + is_fastai_available, # noqa: F401 # for backward compatibility + is_fastcore_available, # noqa: F401 # for backward compatibility + is_graphviz_available, # noqa: F401 # for backward compatibility + is_jinja_available, # noqa: F401 # for backward compatibility + is_pydot_available, # noqa: F401 # for backward compatibility + is_tf_available, # noqa: F401 # for backward compatibility + is_torch_available, # noqa: F401 # for backward compatibility + logging, + parse_xet_file_data_from_response, + refresh_xet_connection_info, + reset_sessions, + tqdm, + validate_hf_hub_args, +) +from .utils._http import _adjust_range_header, http_backoff +from .utils._runtime import _PY_VERSION, is_xet_available # noqa: F401 # for backward compatibility +from .utils._typing import HTTP_METHOD_T +from .utils.sha import sha_fileobj +from .utils.tqdm import _get_progress_bar_context + + +logger = logging.get_logger(__name__) + +# Return value when trying to load a file from cache but the file does not exist in the distant repo. +_CACHED_NO_EXIST = object() +_CACHED_NO_EXIST_T = Any + +# Regex to get filename from a "Content-Disposition" header for CDN-served files +HEADER_FILENAME_PATTERN = re.compile(r'filename="(?P.*?)";') + +# Regex to check if the revision IS directly a commit_hash +REGEX_COMMIT_HASH = re.compile(r"^[0-9a-f]{40}$") + +# Regex to check if the file etag IS a valid sha256 +REGEX_SHA256 = re.compile(r"^[0-9a-f]{64}$") + +_are_symlinks_supported_in_dir: Dict[str, bool] = {} + + +def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool: + """Return whether the symlinks are supported on the machine. + + Since symlinks support can change depending on the mounted disk, we need to check + on the precise cache folder. By default, the default HF cache directory is checked. + + Args: + cache_dir (`str`, `Path`, *optional*): + Path to the folder where cached files are stored. + + Returns: [bool] Whether symlinks are supported in the directory. + """ + # Defaults to HF cache + if cache_dir is None: + cache_dir = constants.HF_HUB_CACHE + cache_dir = str(Path(cache_dir).expanduser().resolve()) # make it unique + + # Check symlink compatibility only once (per cache directory) at first time use + if cache_dir not in _are_symlinks_supported_in_dir: + _are_symlinks_supported_in_dir[cache_dir] = True + + os.makedirs(cache_dir, exist_ok=True) + with SoftTemporaryDirectory(dir=cache_dir) as tmpdir: + src_path = Path(tmpdir) / "dummy_file_src" + src_path.touch() + dst_path = Path(tmpdir) / "dummy_file_dst" + + # Relative source path as in `_create_symlink`` + relative_src = os.path.relpath(src_path, start=os.path.dirname(dst_path)) + try: + os.symlink(relative_src, dst_path) + except OSError: + # Likely running on Windows + _are_symlinks_supported_in_dir[cache_dir] = False + + if not constants.HF_HUB_DISABLE_SYMLINKS_WARNING: + message = ( + "`huggingface_hub` cache-system uses symlinks by default to" + " efficiently store duplicated files but your machine does not" + f" support them in {cache_dir}. Caching files will still work" + " but in a degraded version that might require more space on" + " your disk. This warning can be disabled by setting the" + " `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For" + " more details, see" + " https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations." + ) + if os.name == "nt": + message += ( + "\nTo support symlinks on Windows, you either need to" + " activate Developer Mode or to run Python as an" + " administrator. In order to activate developer mode," + " see this article:" + " https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development" + ) + warnings.warn(message) + + return _are_symlinks_supported_in_dir[cache_dir] + + +@dataclass(frozen=True) +class HfFileMetadata: + """Data structure containing information about a file versioned on the Hub. + + Returned by [`get_hf_file_metadata`] based on a URL. + + Args: + commit_hash (`str`, *optional*): + The commit_hash related to the file. + etag (`str`, *optional*): + Etag of the file on the server. + location (`str`): + Location where to download the file. Can be a Hub url or not (CDN). + size (`size`): + Size of the file. In case of an LFS file, contains the size of the actual + LFS file, not the pointer. + xet_file_data (`XetFileData`, *optional*): + Xet information for the file. This is only set if the file is stored using Xet storage. + """ + + commit_hash: Optional[str] + etag: Optional[str] + location: str + size: Optional[int] + xet_file_data: Optional[XetFileData] + + +@validate_hf_hub_args +def hf_hub_url( + repo_id: str, + filename: str, + *, + subfolder: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + endpoint: Optional[str] = None, +) -> str: + """Construct the URL of a file from the given information. + + The resolved address can either be a huggingface.co-hosted url, or a link to + Cloudfront (a Content Delivery Network, or CDN) for large files which are + more than a few MBs. + + Args: + repo_id (`str`): + A namespace (user or an organization) name and a repo name separated + by a `/`. + filename (`str`): + The name of the file in the repo. + subfolder (`str`, *optional*): + An optional value corresponding to a folder inside the repo. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if downloading from a dataset or space, + `None` or `"model"` if downloading from a model. Default is `None`. + revision (`str`, *optional*): + An optional Git revision id which can be a branch name, a tag, or a + commit hash. + + Example: + + ```python + >>> from huggingface_hub import hf_hub_url + + >>> hf_hub_url( + ... repo_id="julien-c/EsperBERTo-small", filename="pytorch_model.bin" + ... ) + 'https://huggingface.co/julien-c/EsperBERTo-small/resolve/main/pytorch_model.bin' + ``` + + > [!TIP] + > Notes: + > + > Cloudfront is replicated over the globe so downloads are way faster for + > the end user (and it also lowers our bandwidth costs). + > + > Cloudfront aggressively caches files by default (default TTL is 24 + > hours), however this is not an issue here because we implement a + > git-based versioning system on huggingface.co, which means that we store + > the files on S3/Cloudfront in a content-addressable way (i.e., the file + > name is its hash). Using content-addressable filenames means cache can't + > ever be stale. + > + > In terms of client-side caching from this library, we base our caching + > on the objects' entity tag (`ETag`), which is an identifier of a + > specific version of a resource [1]_. An object's ETag is: its git-sha1 + > if stored in git, or its sha256 if stored in git-lfs. + + References: + + - [1] https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag + """ + if subfolder == "": + subfolder = None + if subfolder is not None: + filename = f"{subfolder}/{filename}" + + if repo_type not in constants.REPO_TYPES: + raise ValueError("Invalid repo type") + + if repo_type in constants.REPO_TYPES_URL_PREFIXES: + repo_id = constants.REPO_TYPES_URL_PREFIXES[repo_type] + repo_id + + if revision is None: + revision = constants.DEFAULT_REVISION + url = HUGGINGFACE_CO_URL_TEMPLATE.format( + repo_id=repo_id, revision=quote(revision, safe=""), filename=quote(filename) + ) + # Update endpoint if provided + if endpoint is not None and url.startswith(constants.ENDPOINT): + url = endpoint + url[len(constants.ENDPOINT) :] + return url + + +def _request_wrapper( + method: HTTP_METHOD_T, url: str, *, follow_relative_redirects: bool = False, **params +) -> requests.Response: + """Wrapper around requests methods to follow relative redirects if `follow_relative_redirects=True` even when + `allow_redirection=False`. + + A backoff mechanism retries the HTTP call on 5xx errors and network errors. + + Args: + method (`str`): + HTTP method, such as 'GET' or 'HEAD'. + url (`str`): + The URL of the resource to fetch. + follow_relative_redirects (`bool`, *optional*, defaults to `False`) + If True, relative redirection (redirection to the same site) will be resolved even when `allow_redirection` + kwarg is set to False. Useful when we want to follow a redirection to a renamed repository without + following redirection to a CDN. + **params (`dict`, *optional*): + Params to pass to `requests.request`. + """ + # Recursively follow relative redirects + if follow_relative_redirects: + response = _request_wrapper( + method=method, + url=url, + follow_relative_redirects=False, + **params, + ) + + # If redirection, we redirect only relative paths. + # This is useful in case of a renamed repository. + if 300 <= response.status_code <= 399: + parsed_target = urlparse(response.headers["Location"]) + if parsed_target.netloc == "": + # This means it is a relative 'location' headers, as allowed by RFC 7231. + # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') + # We want to follow this relative redirect ! + # + # Highly inspired by `resolve_redirects` from requests library. + # See https://github.com/psf/requests/blob/main/requests/sessions.py#L159 + next_url = urlparse(url)._replace(path=parsed_target.path).geturl() + return _request_wrapper(method=method, url=next_url, follow_relative_redirects=True, **params) + return response + + # Perform request and return if status_code is not in the retry list. + response = http_backoff(method=method, url=url, **params) + hf_raise_for_status(response) + return response + + +def _get_file_length_from_http_response(response: requests.Response) -> Optional[int]: + """ + Get the length of the file from the HTTP response headers. + + This function extracts the file size from the HTTP response headers, either from the + `Content-Range` or `Content-Length` header, if available (in that order). + + Args: + response (`requests.Response`): + The HTTP response object. + + Returns: + `int` or `None`: The length of the file in bytes, or None if not available. + """ + + # If HTTP response contains compressed body (e.g. gzip), the `Content-Length` header will + # contain the length of the compressed body, not the uncompressed file size. + # And at the start of transmission there's no way to know the uncompressed file size for gzip, + # thus we return None in that case. + content_encoding = response.headers.get("Content-Encoding", "identity").lower() + if content_encoding != "identity": + # gzip/br/deflate/zstd etc + return None + + content_range = response.headers.get("Content-Range") + if content_range is not None: + return int(content_range.rsplit("/")[-1]) + + content_length = response.headers.get("Content-Length") + if content_length is not None: + return int(content_length) + + return None + + +def http_get( + url: str, + temp_file: BinaryIO, + *, + proxies: Optional[Dict] = None, + resume_size: int = 0, + headers: Optional[Dict[str, Any]] = None, + expected_size: Optional[int] = None, + displayed_filename: Optional[str] = None, + _nb_retries: int = 5, + _tqdm_bar: Optional[tqdm] = None, +) -> None: + """ + Download a remote file. Do not gobble up errors, and will return errors tailored to the Hugging Face Hub. + + If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely a + transient error (network outage?). We log a warning message and try to resume the download a few times before + giving up. The method gives up after 5 attempts if no new data has being received from the server. + + Args: + url (`str`): + The URL of the file to download. + temp_file (`BinaryIO`): + The file-like object where to save the file. + proxies (`dict`, *optional*): + Dictionary mapping protocol to the URL of the proxy passed to `requests.request`. + resume_size (`int`, *optional*): + The number of bytes already downloaded. If set to 0 (default), the whole file is download. If set to a + positive number, the download will resume at the given position. + headers (`dict`, *optional*): + Dictionary of HTTP Headers to send with the request. + expected_size (`int`, *optional*): + The expected size of the file to download. If set, the download will raise an error if the size of the + received content is different from the expected one. + displayed_filename (`str`, *optional*): + The filename of the file that is being downloaded. Value is used only to display a nice progress bar. If + not set, the filename is guessed from the URL or the `Content-Disposition` header. + """ + if expected_size is not None and resume_size == expected_size: + # If the file is already fully downloaded, we don't need to download it again. + return + + has_custom_range_header = headers is not None and any(h.lower() == "range" for h in headers) + hf_transfer = None + if constants.HF_HUB_ENABLE_HF_TRANSFER: + if resume_size != 0: + warnings.warn("'hf_transfer' does not support `resume_size`: falling back to regular download method") + elif proxies is not None: + warnings.warn("'hf_transfer' does not support `proxies`: falling back to regular download method") + elif has_custom_range_header: + warnings.warn("'hf_transfer' ignores custom 'Range' headers; falling back to regular download method") + else: + try: + import hf_transfer # type: ignore[no-redef] + except ImportError: + raise ValueError( + "Fast download using 'hf_transfer' is enabled" + " (HF_HUB_ENABLE_HF_TRANSFER=1) but 'hf_transfer' package is not" + " available in your environment. Try `pip install hf_transfer`." + ) + + initial_headers = headers + headers = copy.deepcopy(headers) or {} + if resume_size > 0: + headers["Range"] = _adjust_range_header(headers.get("Range"), resume_size) + elif expected_size and expected_size > constants.MAX_HTTP_DOWNLOAD_SIZE: + # Any files over 50GB will not be available through basic http request. + # Setting the range header to 0-0 will force the server to return the file size in the Content-Range header. + # Since hf_transfer splits the download into chunks, the process will succeed afterwards. + if hf_transfer: + headers["Range"] = "bytes=0-0" + else: + raise ValueError( + "The file is too large to be downloaded using the regular download method. Use `hf_transfer` or `hf_xet` instead." + " Try `pip install hf_transfer` or `pip install hf_xet`." + ) + + r = _request_wrapper( + method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT + ) + hf_raise_for_status(r) + + # If we requested a Range but got 200 back, the server ignored our Range header + # (e.g. CloudFront with Accept-Encoding: gzip). Reset file to avoid corruption. + if resume_size > 0 and r.status_code == 200: + temp_file.seek(0) + temp_file.truncate() + resume_size = 0 + + total: Optional[int] = _get_file_length_from_http_response(r) + + if displayed_filename is None: + displayed_filename = url + content_disposition = r.headers.get("Content-Disposition") + if content_disposition is not None: + match = HEADER_FILENAME_PATTERN.search(content_disposition) + if match is not None: + # Means file is on CDN + displayed_filename = match.groupdict()["filename"] + + # Truncate filename if too long to display + if len(displayed_filename) > 40: + displayed_filename = f"(…){displayed_filename[-40:]}" + + consistency_error_message = ( + f"Consistency check failed: file should be of size {expected_size} but has size" + f" {{actual_size}} ({displayed_filename}).\nThis is usually due to network issues while downloading the file." + " Please retry with `force_download=True`." + ) + progress_cm = _get_progress_bar_context( + desc=displayed_filename, + log_level=logger.getEffectiveLevel(), + total=total, + initial=resume_size, + name="huggingface_hub.http_get", + _tqdm_bar=_tqdm_bar, + ) + + with progress_cm as progress: + if hf_transfer and total is not None and total > 5 * constants.DOWNLOAD_CHUNK_SIZE: + supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters + if not supports_callback: + warnings.warn( + "You are using an outdated version of `hf_transfer`. " + "Consider upgrading to latest version to enable progress bars " + "using `pip install -U hf_transfer`." + ) + try: + hf_transfer.download( + url=url, + filename=temp_file.name, + max_files=constants.HF_TRANSFER_CONCURRENCY, + chunk_size=constants.DOWNLOAD_CHUNK_SIZE, + headers=initial_headers, + parallel_failures=3, + max_retries=5, + **({"callback": progress.update} if supports_callback else {}), + ) + except Exception as e: + raise RuntimeError( + "An error occurred while downloading using `hf_transfer`. Consider" + " disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling." + ) from e + if not supports_callback: + progress.update(total) + if expected_size is not None and expected_size != os.path.getsize(temp_file.name): + raise EnvironmentError( + consistency_error_message.format( + actual_size=os.path.getsize(temp_file.name), + ) + ) + return + new_resume_size = resume_size + try: + for chunk in r.iter_content(chunk_size=constants.DOWNLOAD_CHUNK_SIZE): + if chunk: # filter out keep-alive new chunks + progress.update(len(chunk)) + temp_file.write(chunk) + new_resume_size += len(chunk) + # Some data has been downloaded from the server so we reset the number of retries. + _nb_retries = 5 + except (requests.ConnectionError, requests.ReadTimeout) as e: + # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely + # a transient error (network outage?). We log a warning message and try to resume the download a few times + # before giving up. Tre retry mechanism is basic but should be enough in most cases. + if _nb_retries <= 0: + logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e)) + raise + logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e)) + time.sleep(1) + reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects + return http_get( + url=url, + temp_file=temp_file, + proxies=proxies, + resume_size=new_resume_size, + headers=initial_headers, + expected_size=expected_size, + _nb_retries=_nb_retries - 1, + _tqdm_bar=_tqdm_bar, + ) + + if expected_size is not None and expected_size != temp_file.tell(): + raise EnvironmentError( + consistency_error_message.format( + actual_size=temp_file.tell(), + ) + ) + + +def xet_get( + *, + incomplete_path: Path, + xet_file_data: XetFileData, + headers: Dict[str, str], + expected_size: Optional[int] = None, + displayed_filename: Optional[str] = None, + _tqdm_bar: Optional[tqdm] = None, +) -> None: + """ + Download a file using Xet storage service. + + Args: + incomplete_path (`Path`): + The path to the file to download. + xet_file_data (`XetFileData`): + The file metadata needed to make the request to the xet storage service. + headers (`Dict[str, str]`): + The headers to send to the xet storage service. + expected_size (`int`, *optional*): + The expected size of the file to download. If set, the download will raise an error if the size of the + received content is different from the expected one. + displayed_filename (`str`, *optional*): + The filename of the file that is being downloaded. Value is used only to display a nice progress bar. If + not set, the filename is guessed from the URL or the `Content-Disposition` header. + + **How it works:** + The file download system uses Xet storage, which is a content-addressable storage system that breaks files into chunks + for efficient storage and transfer. + + `hf_xet.download_files` manages downloading files by: + - Taking a list of files to download (each with its unique content hash) + - Connecting to a storage server (CAS server) that knows how files are chunked + - Using authentication to ensure secure access + - Providing progress updates during download + + Authentication works by regularly refreshing access tokens through `refresh_xet_connection_info` to maintain a valid + connection to the storage server. + + The download process works like this: + 1. Create a local cache folder at `~/.cache/huggingface/xet/chunk-cache` to store reusable file chunks + 2. Download files in parallel: + 2.1. Prepare to write the file to disk + 2.2. Ask the server "how is this file split into chunks?" using the file's unique hash + The server responds with: + - Which chunks make up the complete file + - Where each chunk can be downloaded from + 2.3. For each needed chunk: + - Checks if we already have it in our local cache + - If not, download it from cloud storage (S3) + - Save it to cache for future use + - Assemble the chunks in order to recreate the original file + + """ + try: + from hf_xet import PyXetDownloadInfo, download_files # type: ignore[no-redef] + except ImportError: + raise ValueError( + "To use optimized download using Xet storage, you need to install the hf_xet package. " + 'Try `pip install "huggingface_hub[hf_xet]"` or `pip install hf_xet`.' + ) + + connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers) + + def token_refresher() -> Tuple[str, int]: + connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers) + if connection_info is None: + raise ValueError("Failed to refresh token using xet metadata.") + return connection_info.access_token, connection_info.expiration_unix_epoch + + xet_download_info = [ + PyXetDownloadInfo( + destination_path=str(incomplete_path.absolute()), hash=xet_file_data.file_hash, file_size=expected_size + ) + ] + + if not displayed_filename: + displayed_filename = incomplete_path.name + + # Truncate filename if too long to display + if len(displayed_filename) > 40: + displayed_filename = f"{displayed_filename[:40]}(…)" + + progress_cm = _get_progress_bar_context( + desc=displayed_filename, + log_level=logger.getEffectiveLevel(), + total=expected_size, + initial=0, + name="huggingface_hub.xet_get", + _tqdm_bar=_tqdm_bar, + ) + + with progress_cm as progress: + + def progress_updater(progress_bytes: float): + progress.update(progress_bytes) + + download_files( + xet_download_info, + endpoint=connection_info.endpoint, + token_info=(connection_info.access_token, connection_info.expiration_unix_epoch), + token_refresher=token_refresher, + progress_updater=[progress_updater], + ) + + +def _normalize_etag(etag: Optional[str]) -> Optional[str]: + """Normalize ETag HTTP header, so it can be used to create nice filepaths. + + The HTTP spec allows two forms of ETag: + ETag: W/"" + ETag: "" + + For now, we only expect the second form from the server, but we want to be future-proof so we support both. For + more context, see `TestNormalizeEtag` tests and https://github.com/huggingface/huggingface_hub/pull/1428. + + Args: + etag (`str`, *optional*): HTTP header + + Returns: + `str` or `None`: string that can be used as a nice directory name. + Returns `None` if input is None. + """ + if etag is None: + return None + return etag.lstrip("W/").strip('"') + + +def _create_relative_symlink(src: str, dst: str, new_blob: bool = False) -> None: + """Alias method used in `transformers` conversion script.""" + return _create_symlink(src=src, dst=dst, new_blob=new_blob) + + +def _create_symlink(src: str, dst: str, new_blob: bool = False) -> None: + """Create a symbolic link named dst pointing to src. + + By default, it will try to create a symlink using a relative path. Relative paths have 2 advantages: + - If the cache_folder is moved (example: back-up on a shared drive), relative paths within the cache folder will + not break. + - Relative paths seems to be better handled on Windows. Issue was reported 3 times in less than a week when + changing from relative to absolute paths. See https://github.com/huggingface/huggingface_hub/issues/1398, + https://github.com/huggingface/diffusers/issues/2729 and https://github.com/huggingface/transformers/pull/22228. + NOTE: The issue with absolute paths doesn't happen on admin mode. + When creating a symlink from the cache to a local folder, it is possible that a relative path cannot be created. + This happens when paths are not on the same volume. In that case, we use absolute paths. + + + The result layout looks something like + └── [ 128] snapshots + ├── [ 128] 2439f60ef33a0d46d85da5001d52aeda5b00ce9f + │ ├── [ 52] README.md -> ../../../blobs/d7edf6bd2a681fb0175f7735299831ee1b22b812 + │ └── [ 76] pytorch_model.bin -> ../../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd + + If symlinks cannot be created on this platform (most likely to be Windows), the workaround is to avoid symlinks by + having the actual file in `dst`. If it is a new file (`new_blob=True`), we move it to `dst`. If it is not a new file + (`new_blob=False`), we don't know if the blob file is already referenced elsewhere. To avoid breaking existing + cache, the file is duplicated on the disk. + + In case symlinks are not supported, a warning message is displayed to the user once when loading `huggingface_hub`. + The warning message can be disabled with the `DISABLE_SYMLINKS_WARNING` environment variable. + """ + try: + os.remove(dst) + except OSError: + pass + + abs_src = os.path.abspath(os.path.expanduser(src)) + abs_dst = os.path.abspath(os.path.expanduser(dst)) + abs_dst_folder = os.path.dirname(abs_dst) + + # Use relative_dst in priority + try: + relative_src = os.path.relpath(abs_src, abs_dst_folder) + except ValueError: + # Raised on Windows if src and dst are not on the same volume. This is the case when creating a symlink to a + # local_dir instead of within the cache directory. + # See https://docs.python.org/3/library/os.path.html#os.path.relpath + relative_src = None + + try: + commonpath = os.path.commonpath([abs_src, abs_dst]) + _support_symlinks = are_symlinks_supported(commonpath) + except ValueError: + # Raised if src and dst are not on the same volume. Symlinks will still work on Linux/Macos. + # See https://docs.python.org/3/library/os.path.html#os.path.commonpath + _support_symlinks = os.name != "nt" + except PermissionError: + # Permission error means src and dst are not in the same volume (e.g. destination path has been provided + # by the user via `local_dir`. Let's test symlink support there) + _support_symlinks = are_symlinks_supported(abs_dst_folder) + except OSError as e: + # OS error (errno=30) means that the commonpath is readonly on Linux/MacOS. + if e.errno == errno.EROFS: + _support_symlinks = are_symlinks_supported(abs_dst_folder) + else: + raise + + # Symlinks are supported => let's create a symlink. + if _support_symlinks: + src_rel_or_abs = relative_src or abs_src + logger.debug(f"Creating pointer from {src_rel_or_abs} to {abs_dst}") + try: + os.symlink(src_rel_or_abs, abs_dst) + return + except FileExistsError: + if os.path.islink(abs_dst) and os.path.realpath(abs_dst) == os.path.realpath(abs_src): + # `abs_dst` already exists and is a symlink to the `abs_src` blob. It is most likely that the file has + # been cached twice concurrently (exactly between `os.remove` and `os.symlink`). Do nothing. + return + else: + # Very unlikely to happen. Means a file `dst` has been created exactly between `os.remove` and + # `os.symlink` and is not a symlink to the `abs_src` blob file. Raise exception. + raise + except PermissionError: + # Permission error means src and dst are not in the same volume (e.g. download to local dir) and symlink + # is supported on both volumes but not between them. Let's just make a hard copy in that case. + pass + + # Symlinks are not supported => let's move or copy the file. + if new_blob: + logger.info(f"Symlink not supported. Moving file from {abs_src} to {abs_dst}") + shutil.move(abs_src, abs_dst, copy_function=_copy_no_matter_what) + else: + logger.info(f"Symlink not supported. Copying file from {abs_src} to {abs_dst}") + shutil.copyfile(abs_src, abs_dst) + + +def _cache_commit_hash_for_specific_revision(storage_folder: str, revision: str, commit_hash: str) -> None: + """Cache reference between a revision (tag, branch or truncated commit hash) and the corresponding commit hash. + + Does nothing if `revision` is already a proper `commit_hash` or reference is already cached. + """ + if revision != commit_hash: + ref_path = Path(storage_folder) / "refs" / revision + ref_path.parent.mkdir(parents=True, exist_ok=True) + if not ref_path.exists() or commit_hash != ref_path.read_text(): + # Update ref only if has been updated. Could cause useless error in case + # repo is already cached and user doesn't have write access to cache folder. + # See https://github.com/huggingface/huggingface_hub/issues/1216. + ref_path.write_text(commit_hash) + + +@validate_hf_hub_args +def repo_folder_name(*, repo_id: str, repo_type: str) -> str: + """Return a serialized version of a hf.co repo name and type, safe for disk storage + as a single non-nested folder. + + Example: models--julien-c--EsperBERTo-small + """ + # remove all `/` occurrences to correctly convert repo to directory name + parts = [f"{repo_type}s", *repo_id.split("/")] + return constants.REPO_ID_SEPARATOR.join(parts) + + +def _check_disk_space(expected_size: int, target_dir: Union[str, Path]) -> None: + """Check disk usage and log a warning if there is not enough disk space to download the file. + + Args: + expected_size (`int`): + The expected size of the file in bytes. + target_dir (`str`): + The directory where the file will be stored after downloading. + """ + + target_dir = Path(target_dir) # format as `Path` + for path in [target_dir] + list(target_dir.parents): # first check target_dir, then each parents one by one + try: + target_dir_free = shutil.disk_usage(path).free + if target_dir_free < expected_size: + warnings.warn( + "Not enough free disk space to download the file. " + f"The expected file size is: {expected_size / 1e6:.2f} MB. " + f"The target location {target_dir} only has {target_dir_free / 1e6:.2f} MB free disk space." + ) + return + except OSError: # raise on anything: file does not exist or space disk cannot be checked + pass + + +@validate_hf_hub_args +def hf_hub_download( + repo_id: str, + filename: str, + *, + subfolder: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + library_name: Optional[str] = None, + library_version: Optional[str] = None, + cache_dir: Union[str, Path, None] = None, + local_dir: Union[str, Path, None] = None, + user_agent: Union[Dict, str, None] = None, + force_download: bool = False, + proxies: Optional[Dict] = None, + etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT, + token: Union[bool, str, None] = None, + local_files_only: bool = False, + headers: Optional[Dict[str, str]] = None, + endpoint: Optional[str] = None, + resume_download: Optional[bool] = None, + force_filename: Optional[str] = None, + local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto", +) -> str: + """Download a given file if it's not already present in the local cache. + + The new cache file layout looks like this: + - The cache directory contains one subfolder per repo_id (namespaced by repo type) + - inside each repo folder: + - refs is a list of the latest known revision => commit_hash pairs + - blobs contains the actual file blobs (identified by their git-sha or sha256, depending on + whether they're LFS files or not) + - snapshots contains one subfolder per commit, each "commit" contains the subset of the files + that have been resolved at that particular commit. Each filename is a symlink to the blob + at that particular commit. + + ``` + [ 96] . + └── [ 160] models--julien-c--EsperBERTo-small + ├── [ 160] blobs + │ ├── [321M] 403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd + │ ├── [ 398] 7cb18dc9bafbfcf74629a4b760af1b160957a83e + │ └── [1.4K] d7edf6bd2a681fb0175f7735299831ee1b22b812 + ├── [ 96] refs + │ └── [ 40] main + └── [ 128] snapshots + ├── [ 128] 2439f60ef33a0d46d85da5001d52aeda5b00ce9f + │ ├── [ 52] README.md -> ../../blobs/d7edf6bd2a681fb0175f7735299831ee1b22b812 + │ └── [ 76] pytorch_model.bin -> ../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd + └── [ 128] bbc77c8132af1cc5cf678da3f1ddf2de43606d48 + ├── [ 52] README.md -> ../../blobs/7cb18dc9bafbfcf74629a4b760af1b160957a83e + └── [ 76] pytorch_model.bin -> ../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd + ``` + + If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this + option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir` + to store some metadata related to the downloaded files. While this mechanism is not as robust as the main + cache-system, it's optimized for regularly pulling the latest version of a repository. + + Args: + repo_id (`str`): + A user or an organization name and a repo name separated by a `/`. + filename (`str`): + The name of the file in the repo. + subfolder (`str`, *optional*): + An optional value corresponding to a folder inside the model repo. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if downloading from a dataset or space, + `None` or `"model"` if downloading from a model. Default is `None`. + revision (`str`, *optional*): + An optional Git revision id which can be a branch name, a tag, or a + commit hash. + library_name (`str`, *optional*): + The name of the library to which the object corresponds. + library_version (`str`, *optional*): + The version of the library. + cache_dir (`str`, `Path`, *optional*): + Path to the folder where cached files are stored. + local_dir (`str` or `Path`, *optional*): + If provided, the downloaded file will be placed under this directory. + user_agent (`dict`, `str`, *optional*): + The user-agent info in the form of a dictionary or a string. + force_download (`bool`, *optional*, defaults to `False`): + Whether the file should be downloaded even if it already exists in + the local cache. + proxies (`dict`, *optional*): + Dictionary mapping protocol to the URL of the proxy passed to + `requests.request`. + etag_timeout (`float`, *optional*, defaults to `10`): + When fetching ETag, how many seconds to wait for the server to send + data before giving up which is passed to `requests.request`. + token (`str`, `bool`, *optional*): + A token to be used for the download. + - If `True`, the token is read from the HuggingFace config + folder. + - If a string, it's used as the authentication token. + local_files_only (`bool`, *optional*, defaults to `False`): + If `True`, avoid downloading the file and return the path to the + local cached file if it exists. + headers (`dict`, *optional*): + Additional headers to be sent with the request. + + Returns: + `str`: Local path of file or if networking is off, last version of file cached on disk. + + Raises: + [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + [`~utils.RevisionNotFoundError`] + If the revision to download from cannot be found. + [`~utils.EntryNotFoundError`] + If the file to download cannot be found. + [`~utils.LocalEntryNotFoundError`] + If network is disabled or unavailable and file is not found in cache. + [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) + If `token=True` but the token cannot be found. + [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) + If ETag cannot be determined. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If some parameter value is invalid. + + """ + if constants.HF_HUB_ETAG_TIMEOUT != constants.DEFAULT_ETAG_TIMEOUT: + # Respect environment variable above user value + etag_timeout = constants.HF_HUB_ETAG_TIMEOUT + + if force_filename is not None: + warnings.warn( + "The `force_filename` parameter is deprecated as a new caching system, " + "which keeps the filenames as they are on the Hub, is now in place.", + FutureWarning, + ) + if resume_download is not None: + warnings.warn( + "`resume_download` is deprecated and will be removed in version 1.0.0. " + "Downloads always resume when possible. " + "If you want to force a new download, use `force_download=True`.", + FutureWarning, + ) + + if cache_dir is None: + cache_dir = constants.HF_HUB_CACHE + if revision is None: + revision = constants.DEFAULT_REVISION + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + if isinstance(local_dir, Path): + local_dir = str(local_dir) + + if subfolder == "": + subfolder = None + if subfolder is not None: + # This is used to create a URL, and not a local path, hence the forward slash. + filename = f"{subfolder}/{filename}" + + if repo_type is None: + repo_type = "model" + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}") + + hf_headers = build_hf_headers( + token=token, + library_name=library_name, + library_version=library_version, + user_agent=user_agent, + headers=headers, + ) + + if local_dir is not None: + if local_dir_use_symlinks != "auto": + warnings.warn( + "`local_dir_use_symlinks` parameter is deprecated and will be ignored. " + "The process to download files to a local folder has been updated and do " + "not rely on symlinks anymore. You only need to pass a destination folder " + "as`local_dir`.\n" + "For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder." + ) + + return _hf_hub_download_to_local_dir( + # Destination + local_dir=local_dir, + # File info + repo_id=repo_id, + repo_type=repo_type, + filename=filename, + revision=revision, + # HTTP info + endpoint=endpoint, + etag_timeout=etag_timeout, + headers=hf_headers, + proxies=proxies, + token=token, + # Additional options + cache_dir=cache_dir, + force_download=force_download, + local_files_only=local_files_only, + ) + else: + return _hf_hub_download_to_cache_dir( + # Destination + cache_dir=cache_dir, + # File info + repo_id=repo_id, + filename=filename, + repo_type=repo_type, + revision=revision, + # HTTP info + endpoint=endpoint, + etag_timeout=etag_timeout, + headers=hf_headers, + proxies=proxies, + token=token, + # Additional options + local_files_only=local_files_only, + force_download=force_download, + ) + + +def _hf_hub_download_to_cache_dir( + *, + # Destination + cache_dir: str, + # File info + repo_id: str, + filename: str, + repo_type: str, + revision: str, + # HTTP info + endpoint: Optional[str], + etag_timeout: float, + headers: Dict[str, str], + proxies: Optional[Dict], + token: Optional[Union[bool, str]], + # Additional options + local_files_only: bool, + force_download: bool, +) -> str: + """Download a given file to a cache folder, if not already present. + + Method should not be called directly. Please use `hf_hub_download` instead. + """ + locks_dir = os.path.join(cache_dir, ".locks") + storage_folder = os.path.join(cache_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type)) + + # cross platform transcription of filename, to be used as a local file path. + relative_filename = os.path.join(*filename.split("/")) + if os.name == "nt": + if relative_filename.startswith("..\\") or "\\..\\" in relative_filename: + raise ValueError( + f"Invalid filename: cannot handle filename '{relative_filename}' on Windows. Please ask the repository" + " owner to rename this file." + ) + + # if user provides a commit_hash and they already have the file on disk, shortcut everything. + if REGEX_COMMIT_HASH.match(revision): + pointer_path = _get_pointer_path(storage_folder, revision, relative_filename) + if os.path.exists(pointer_path) and not force_download: + return pointer_path + + # Try to get metadata (etag, commit_hash, url, size) from the server. + # If we can't, a HEAD request error is returned. + (url_to_download, etag, commit_hash, expected_size, xet_file_data, head_call_error) = _get_metadata_or_catch_error( + repo_id=repo_id, + filename=filename, + repo_type=repo_type, + revision=revision, + endpoint=endpoint, + proxies=proxies, + etag_timeout=etag_timeout, + headers=headers, + token=token, + local_files_only=local_files_only, + storage_folder=storage_folder, + relative_filename=relative_filename, + ) + + # etag can be None for several reasons: + # 1. we passed local_files_only. + # 2. we don't have a connection + # 3. Hub is down (HTTP 500, 503, 504) + # 4. repo is not found -for example private or gated- and invalid/missing token sent + # 5. Hub is blocked by a firewall or proxy is not set correctly. + # => Try to get the last downloaded one from the specified revision. + # + # If the specified revision is a commit hash, look inside "snapshots". + # If the specified revision is a branch or tag, look inside "refs". + if head_call_error is not None: + # Couldn't make a HEAD call => let's try to find a local file + if not force_download: + commit_hash = None + if REGEX_COMMIT_HASH.match(revision): + commit_hash = revision + else: + ref_path = os.path.join(storage_folder, "refs", revision) + if os.path.isfile(ref_path): + with open(ref_path) as f: + commit_hash = f.read() + + # Return pointer file if exists + if commit_hash is not None: + pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename) + if os.path.exists(pointer_path) and not force_download: + return pointer_path + + # Otherwise, raise appropriate error + _raise_on_head_call_error(head_call_error, force_download, local_files_only) + + # From now on, etag, commit_hash, url and size are not None. + assert etag is not None, "etag must have been retrieved from server" + assert commit_hash is not None, "commit_hash must have been retrieved from server" + assert url_to_download is not None, "file location must have been retrieved from server" + assert expected_size is not None, "expected_size must have been retrieved from server" + blob_path = os.path.join(storage_folder, "blobs", etag) + pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename) + + os.makedirs(os.path.dirname(blob_path), exist_ok=True) + os.makedirs(os.path.dirname(pointer_path), exist_ok=True) + + # if passed revision is not identical to commit_hash + # then revision has to be a branch name or tag name. + # In that case store a ref. + _cache_commit_hash_for_specific_revision(storage_folder, revision, commit_hash) + + # Prevent parallel downloads of the same file with a lock. + # etag could be duplicated across repos, + lock_path = os.path.join(locks_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type), f"{etag}.lock") + + # Some Windows versions do not allow for paths longer than 255 characters. + # In this case, we must specify it as an extended path by using the "\\?\" prefix. + if ( + os.name == "nt" + and len(os.path.abspath(lock_path)) > 255 + and not os.path.abspath(lock_path).startswith("\\\\?\\") + ): + lock_path = "\\\\?\\" + os.path.abspath(lock_path) + + if ( + os.name == "nt" + and len(os.path.abspath(blob_path)) > 255 + and not os.path.abspath(blob_path).startswith("\\\\?\\") + ): + blob_path = "\\\\?\\" + os.path.abspath(blob_path) + + Path(lock_path).parent.mkdir(parents=True, exist_ok=True) + + # pointer already exists -> immediate return + if not force_download and os.path.exists(pointer_path): + return pointer_path + + # Blob exists but pointer must be (safely) created -> take the lock + if not force_download and os.path.exists(blob_path): + with WeakFileLock(lock_path): + if not os.path.exists(pointer_path): + _create_symlink(blob_path, pointer_path, new_blob=False) + return pointer_path + + # Local file doesn't exist or etag isn't a match => retrieve file from remote (or cache) + + with WeakFileLock(lock_path): + _download_to_tmp_and_move( + incomplete_path=Path(blob_path + ".incomplete"), + destination_path=Path(blob_path), + url_to_download=url_to_download, + proxies=proxies, + headers=headers, + expected_size=expected_size, + filename=filename, + force_download=force_download, + etag=etag, + xet_file_data=xet_file_data, + ) + if not os.path.exists(pointer_path): + _create_symlink(blob_path, pointer_path, new_blob=True) + + return pointer_path + + +def _hf_hub_download_to_local_dir( + *, + # Destination + local_dir: Union[str, Path], + # File info + repo_id: str, + repo_type: str, + filename: str, + revision: str, + # HTTP info + endpoint: Optional[str], + etag_timeout: float, + headers: Dict[str, str], + proxies: Optional[Dict], + token: Union[bool, str, None], + # Additional options + cache_dir: str, + force_download: bool, + local_files_only: bool, +) -> str: + """Download a given file to a local folder, if not already present. + + Method should not be called directly. Please use `hf_hub_download` instead. + """ + # Some Windows versions do not allow for paths longer than 255 characters. + # In this case, we must specify it as an extended path by using the "\\?\" prefix. + if os.name == "nt" and len(os.path.abspath(local_dir)) > 255: + local_dir = "\\\\?\\" + os.path.abspath(local_dir) + local_dir = Path(local_dir) + paths = get_local_download_paths(local_dir=local_dir, filename=filename) + local_metadata = read_download_metadata(local_dir=local_dir, filename=filename) + + # Local file exists + metadata exists + commit_hash matches => return file + if ( + not force_download + and REGEX_COMMIT_HASH.match(revision) + and paths.file_path.is_file() + and local_metadata is not None + and local_metadata.commit_hash == revision + ): + return str(paths.file_path) + + # Local file doesn't exist or commit_hash doesn't match => we need the etag + (url_to_download, etag, commit_hash, expected_size, xet_file_data, head_call_error) = _get_metadata_or_catch_error( + repo_id=repo_id, + filename=filename, + repo_type=repo_type, + revision=revision, + endpoint=endpoint, + proxies=proxies, + etag_timeout=etag_timeout, + headers=headers, + token=token, + local_files_only=local_files_only, + ) + + if head_call_error is not None: + # No HEAD call but local file exists => default to local file + if not force_download and paths.file_path.is_file(): + logger.warning( + f"Couldn't access the Hub to check for update but local file already exists. Defaulting to existing file. (error: {head_call_error})" + ) + return str(paths.file_path) + # Otherwise => raise + _raise_on_head_call_error(head_call_error, force_download, local_files_only) + + # From now on, etag, commit_hash, url and size are not None. + assert etag is not None, "etag must have been retrieved from server" + assert commit_hash is not None, "commit_hash must have been retrieved from server" + assert url_to_download is not None, "file location must have been retrieved from server" + assert expected_size is not None, "expected_size must have been retrieved from server" + + # Local file exists => check if it's up-to-date + if not force_download and paths.file_path.is_file(): + # etag matches => update metadata and return file + if local_metadata is not None and local_metadata.etag == etag: + write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag) + return str(paths.file_path) + + # metadata is outdated + etag is a sha256 + # => means it's an LFS file (large) + # => let's compute local hash and compare + # => if match, update metadata and return file + if local_metadata is None and REGEX_SHA256.match(etag) is not None: + with open(paths.file_path, "rb") as f: + file_hash = sha_fileobj(f).hex() + if file_hash == etag: + write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag) + return str(paths.file_path) + + # Local file doesn't exist or etag isn't a match => retrieve file from remote (or cache) + + # If we are lucky enough, the file is already in the cache => copy it + if not force_download: + cached_path = try_to_load_from_cache( + repo_id=repo_id, + filename=filename, + cache_dir=cache_dir, + revision=commit_hash, + repo_type=repo_type, + ) + if isinstance(cached_path, str): + with WeakFileLock(paths.lock_path): + paths.file_path.parent.mkdir(parents=True, exist_ok=True) + shutil.copyfile(cached_path, paths.file_path) + write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag) + return str(paths.file_path) + + # Otherwise, let's download the file! + with WeakFileLock(paths.lock_path): + paths.file_path.unlink(missing_ok=True) # delete outdated file first + _download_to_tmp_and_move( + incomplete_path=paths.incomplete_path(etag), + destination_path=paths.file_path, + url_to_download=url_to_download, + proxies=proxies, + headers=headers, + expected_size=expected_size, + filename=filename, + force_download=force_download, + etag=etag, + xet_file_data=xet_file_data, + ) + + write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag) + return str(paths.file_path) + + +@validate_hf_hub_args +def try_to_load_from_cache( + repo_id: str, + filename: str, + cache_dir: Union[str, Path, None] = None, + revision: Optional[str] = None, + repo_type: Optional[str] = None, +) -> Union[str, _CACHED_NO_EXIST_T, None]: + """ + Explores the cache to return the latest cached file for a given revision if found. + + This function will not raise any exception if the file in not cached. + + Args: + cache_dir (`str` or `os.PathLike`): + The folder where the cached files lie. + repo_id (`str`): + The ID of the repo on huggingface.co. + filename (`str`): + The filename to look for inside `repo_id`. + revision (`str`, *optional*): + The specific model version to use. Will default to `"main"` if it's not provided and no `commit_hash` is + provided either. + repo_type (`str`, *optional*): + The type of the repository. Will default to `"model"`. + + Returns: + `Optional[str]` or `_CACHED_NO_EXIST`: + Will return `None` if the file was not cached. Otherwise: + - The exact path to the cached file if it's found in the cache + - A special value `_CACHED_NO_EXIST` if the file does not exist at the given commit hash and this fact was + cached. + + Example: + + ```python + from huggingface_hub import try_to_load_from_cache, _CACHED_NO_EXIST + + filepath = try_to_load_from_cache() + if isinstance(filepath, str): + # file exists and is cached + ... + elif filepath is _CACHED_NO_EXIST: + # non-existence of file is cached + ... + else: + # file is not cached + ... + ``` + """ + if revision is None: + revision = "main" + if repo_type is None: + repo_type = "model" + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}") + if cache_dir is None: + cache_dir = constants.HF_HUB_CACHE + + object_id = repo_id.replace("/", "--") + repo_cache = os.path.join(cache_dir, f"{repo_type}s--{object_id}") + if not os.path.isdir(repo_cache): + # No cache for this model + return None + + refs_dir = os.path.join(repo_cache, "refs") + snapshots_dir = os.path.join(repo_cache, "snapshots") + no_exist_dir = os.path.join(repo_cache, ".no_exist") + + # Resolve refs (for instance to convert main to the associated commit sha) + if os.path.isdir(refs_dir): + revision_file = os.path.join(refs_dir, revision) + if os.path.isfile(revision_file): + with open(revision_file) as f: + revision = f.read() + + # Check if file is cached as "no_exist" + if os.path.isfile(os.path.join(no_exist_dir, revision, filename)): + return _CACHED_NO_EXIST + + # Check if revision folder exists + if not os.path.exists(snapshots_dir): + return None + cached_shas = os.listdir(snapshots_dir) + if revision not in cached_shas: + # No cache for this revision and we won't try to return a random revision + return None + + # Check if file exists in cache + cached_file = os.path.join(snapshots_dir, revision, filename) + return cached_file if os.path.isfile(cached_file) else None + + +@validate_hf_hub_args +def get_hf_file_metadata( + url: str, + token: Union[bool, str, None] = None, + proxies: Optional[Dict] = None, + timeout: Optional[float] = constants.DEFAULT_REQUEST_TIMEOUT, + library_name: Optional[str] = None, + library_version: Optional[str] = None, + user_agent: Union[Dict, str, None] = None, + headers: Optional[Dict[str, str]] = None, + endpoint: Optional[str] = None, +) -> HfFileMetadata: + """Fetch metadata of a file versioned on the Hub for a given url. + + Args: + url (`str`): + File url, for example returned by [`hf_hub_url`]. + token (`str` or `bool`, *optional*): + A token to be used for the download. + - If `True`, the token is read from the HuggingFace config + folder. + - If `False` or `None`, no token is provided. + - If a string, it's used as the authentication token. + proxies (`dict`, *optional*): + Dictionary mapping protocol to the URL of the proxy passed to + `requests.request`. + timeout (`float`, *optional*, defaults to 10): + How many seconds to wait for the server to send metadata before giving up. + library_name (`str`, *optional*): + The name of the library to which the object corresponds. + library_version (`str`, *optional*): + The version of the library. + user_agent (`dict`, `str`, *optional*): + The user-agent info in the form of a dictionary or a string. + headers (`dict`, *optional*): + Additional headers to be sent with the request. + endpoint (`str`, *optional*): + Endpoint of the Hub. Defaults to . + + Returns: + A [`HfFileMetadata`] object containing metadata such as location, etag, size and + commit_hash. + """ + hf_headers = build_hf_headers( + token=token, + library_name=library_name, + library_version=library_version, + user_agent=user_agent, + headers=headers, + ) + hf_headers["Accept-Encoding"] = "identity" # prevent any compression => we want to know the real size of the file + + # Retrieve metadata + r = _request_wrapper( + method="HEAD", + url=url, + headers=hf_headers, + allow_redirects=False, + follow_relative_redirects=True, + proxies=proxies, + timeout=timeout, + ) + hf_raise_for_status(r) + + # Return + return HfFileMetadata( + commit_hash=r.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT), + # We favor a custom header indicating the etag of the linked resource, and + # we fallback to the regular etag header. + etag=_normalize_etag(r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")), + # Either from response headers (if redirected) or defaults to request url + # Do not use directly `url`, as `_request_wrapper` might have followed relative + # redirects. + location=r.headers.get("Location") or r.request.url, # type: ignore + size=_int_or_none( + r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_SIZE) or r.headers.get("Content-Length") + ), + xet_file_data=parse_xet_file_data_from_response(r, endpoint=endpoint), # type: ignore + ) + + +def _get_metadata_or_catch_error( + *, + repo_id: str, + filename: str, + repo_type: str, + revision: str, + endpoint: Optional[str], + proxies: Optional[Dict], + etag_timeout: Optional[float], + headers: Dict[str, str], # mutated inplace! + token: Union[bool, str, None], + local_files_only: bool, + relative_filename: Optional[str] = None, # only used to store `.no_exists` in cache + storage_folder: Optional[str] = None, # only used to store `.no_exists` in cache +) -> Union[ + # Either an exception is caught and returned + Tuple[None, None, None, None, None, Exception], + # Or the metadata is returned as + # `(url_to_download, etag, commit_hash, expected_size, xet_file_data, None)` + Tuple[str, str, str, int, Optional[XetFileData], None], +]: + """Get metadata for a file on the Hub, safely handling network issues. + + Returns either the etag, commit_hash and expected size of the file, or the error + raised while fetching the metadata. + + NOTE: This function mutates `headers` inplace! It removes the `authorization` header + if the file is a LFS blob and the domain of the url is different from the + domain of the location (typically an S3 bucket). + """ + if local_files_only: + return ( + None, + None, + None, + None, + None, + OfflineModeIsEnabled( + f"Cannot access file since 'local_files_only=True' as been set. (repo_id: {repo_id}, repo_type: {repo_type}, revision: {revision}, filename: {filename})" + ), + ) + + url = hf_hub_url(repo_id, filename, repo_type=repo_type, revision=revision, endpoint=endpoint) + url_to_download: str = url + etag: Optional[str] = None + commit_hash: Optional[str] = None + expected_size: Optional[int] = None + head_error_call: Optional[Exception] = None + xet_file_data: Optional[XetFileData] = None + + # Try to get metadata from the server. + # Do not raise yet if the file is not found or not accessible. + if not local_files_only: + try: + try: + metadata = get_hf_file_metadata( + url=url, proxies=proxies, timeout=etag_timeout, headers=headers, token=token, endpoint=endpoint + ) + except EntryNotFoundError as http_error: + if storage_folder is not None and relative_filename is not None: + # Cache the non-existence of the file + commit_hash = http_error.response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT) + if commit_hash is not None: + no_exist_file_path = Path(storage_folder) / ".no_exist" / commit_hash / relative_filename + try: + no_exist_file_path.parent.mkdir(parents=True, exist_ok=True) + no_exist_file_path.touch() + except OSError as e: + logger.error( + f"Could not cache non-existence of file. Will ignore error and continue. Error: {e}" + ) + _cache_commit_hash_for_specific_revision(storage_folder, revision, commit_hash) + raise + + # Commit hash must exist + commit_hash = metadata.commit_hash + if commit_hash is None: + raise FileMetadataError( + "Distant resource does not seem to be on huggingface.co. It is possible that a configuration issue" + " prevents you from downloading resources from https://huggingface.co. Please check your firewall" + " and proxy settings and make sure your SSL certificates are updated." + ) + + # Etag must exist + # If we don't have any of those, raise an error. + etag = metadata.etag + if etag is None: + raise FileMetadataError( + "Distant resource does not have an ETag, we won't be able to reliably ensure reproducibility." + ) + + # Size must exist + expected_size = metadata.size + if expected_size is None: + raise FileMetadataError("Distant resource does not have a Content-Length.") + + xet_file_data = metadata.xet_file_data + + # In case of a redirect, save an extra redirect on the request.get call, + # and ensure we download the exact atomic version even if it changed + # between the HEAD and the GET (unlikely, but hey). + # + # If url domain is different => we are downloading from a CDN => url is signed => don't send auth + # If url domain is the same => redirect due to repo rename AND downloading a regular file => keep auth + if xet_file_data is None and url != metadata.location: + url_to_download = metadata.location + if urlparse(url).netloc != urlparse(metadata.location).netloc: + # Remove authorization header when downloading a LFS blob + headers.pop("authorization", None) + except (requests.exceptions.SSLError, requests.exceptions.ProxyError): + # Actually raise for those subclasses of ConnectionError + raise + except ( + requests.exceptions.ConnectionError, + requests.exceptions.Timeout, + OfflineModeIsEnabled, + ) as error: + # Otherwise, our Internet connection is down. + # etag is None + head_error_call = error + except (RevisionNotFoundError, EntryNotFoundError): + # The repo was found but the revision or entry doesn't exist on the Hub (never existed or got deleted) + raise + except requests.HTTPError as error: + # Multiple reasons for an http error: + # - Repository is private and invalid/missing token sent + # - Repository is gated and invalid/missing token sent + # - Hub is down (error 500 or 504) + # => let's switch to 'local_files_only=True' to check if the files are already cached. + # (if it's not the case, the error will be re-raised) + head_error_call = error + except FileMetadataError as error: + # Multiple reasons for a FileMetadataError: + # - Wrong network configuration (proxy, firewall, SSL certificates) + # - Inconsistency on the Hub + # => let's switch to 'local_files_only=True' to check if the files are already cached. + # (if it's not the case, the error will be re-raised) + head_error_call = error + + if not (local_files_only or etag is not None or head_error_call is not None): + raise RuntimeError("etag is empty due to uncovered problems") + + return (url_to_download, etag, commit_hash, expected_size, xet_file_data, head_error_call) # type: ignore [return-value] + + +def _raise_on_head_call_error(head_call_error: Exception, force_download: bool, local_files_only: bool) -> NoReturn: + """Raise an appropriate error when the HEAD call failed and we cannot locate a local file.""" + # No head call => we cannot force download. + if force_download: + if local_files_only: + raise ValueError("Cannot pass 'force_download=True' and 'local_files_only=True' at the same time.") + elif isinstance(head_call_error, OfflineModeIsEnabled): + raise ValueError("Cannot pass 'force_download=True' when offline mode is enabled.") from head_call_error + else: + raise ValueError("Force download failed due to the above error.") from head_call_error + + # No head call + couldn't find an appropriate file on disk => raise an error. + if local_files_only: + raise LocalEntryNotFoundError( + "Cannot find the requested files in the disk cache and outgoing traffic has been disabled. To enable" + " hf.co look-ups and downloads online, set 'local_files_only' to False." + ) + elif isinstance(head_call_error, (RepositoryNotFoundError, GatedRepoError)) or ( + isinstance(head_call_error, HfHubHTTPError) and head_call_error.response.status_code == 401 + ): + # Repo not found or gated => let's raise the actual error + # Unauthorized => likely a token issue => let's raise the actual error + raise head_call_error + else: + # Otherwise: most likely a connection issue or Hub downtime => let's warn the user + raise LocalEntryNotFoundError( + "An error happened while trying to locate the file on the Hub and we cannot find the requested files" + " in the local cache. Please check your connection and try again or make sure your Internet connection" + " is on." + ) from head_call_error + + +def _download_to_tmp_and_move( + incomplete_path: Path, + destination_path: Path, + url_to_download: str, + proxies: Optional[Dict], + headers: Dict[str, str], + expected_size: Optional[int], + filename: str, + force_download: bool, + etag: Optional[str], + xet_file_data: Optional[XetFileData], +) -> None: + """Download content from a URL to a destination path. + + Internal logic: + - return early if file is already downloaded + - resume download if possible (from incomplete file) + - do not resume download if `force_download=True` or `HF_HUB_ENABLE_HF_TRANSFER=True` + - check disk space before downloading + - download content to a temporary file + - set correct permissions on temporary file + - move the temporary file to the destination path + + Both `incomplete_path` and `destination_path` must be on the same volume to avoid a local copy. + """ + if destination_path.exists() and not force_download: + # Do nothing if already exists (except if force_download=True) + return + + if incomplete_path.exists() and (force_download or (constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies)): + # By default, we will try to resume the download if possible. + # However, if the user has set `force_download=True` or if `hf_transfer` is enabled, then we should + # not resume the download => delete the incomplete file. + message = f"Removing incomplete file '{incomplete_path}'" + if force_download: + message += " (force_download=True)" + elif constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies: + message += " (hf_transfer=True)" + logger.info(message) + incomplete_path.unlink(missing_ok=True) + + with incomplete_path.open("ab") as f: + resume_size = f.tell() + message = f"Downloading '{filename}' to '{incomplete_path}'" + if resume_size > 0 and expected_size is not None: + message += f" (resume from {resume_size}/{expected_size})" + logger.info(message) + + if expected_size is not None: # might be None if HTTP header not set correctly + # Check disk space in both tmp and destination path + _check_disk_space(expected_size, incomplete_path.parent) + _check_disk_space(expected_size, destination_path.parent) + + if xet_file_data is not None and is_xet_available(): + logger.debug("Xet Storage is enabled for this repo. Downloading file from Xet Storage..") + xet_get( + incomplete_path=incomplete_path, + xet_file_data=xet_file_data, + headers=headers, + expected_size=expected_size, + displayed_filename=filename, + ) + else: + if xet_file_data is not None and not constants.HF_HUB_DISABLE_XET: + logger.warning( + "Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. " + "Falling back to regular HTTP download. " + "For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`" + ) + + http_get( + url_to_download, + f, + proxies=proxies, + resume_size=resume_size, + headers=headers, + expected_size=expected_size, + ) + + logger.info(f"Download complete. Moving file to {destination_path}") + _chmod_and_move(incomplete_path, destination_path) + + +def _int_or_none(value: Optional[str]) -> Optional[int]: + try: + return int(value) # type: ignore + except (TypeError, ValueError): + return None + + +def _chmod_and_move(src: Path, dst: Path) -> None: + """Set correct permission before moving a blob from tmp directory to cache dir. + + Do not take into account the `umask` from the process as there is no convenient way + to get it that is thread-safe. + + See: + - About umask: https://docs.python.org/3/library/os.html#os.umask + - Thread-safety: https://stackoverflow.com/a/70343066 + - About solution: https://github.com/huggingface/huggingface_hub/pull/1220#issuecomment-1326211591 + - Fix issue: https://github.com/huggingface/huggingface_hub/issues/1141 + - Fix issue: https://github.com/huggingface/huggingface_hub/issues/1215 + """ + # Get umask by creating a temporary file in the cached repo folder. + tmp_file = dst.parent.parent / f"tmp_{uuid.uuid4()}" + try: + tmp_file.touch() + cache_dir_mode = Path(tmp_file).stat().st_mode + os.chmod(str(src), stat.S_IMODE(cache_dir_mode)) + except OSError as e: + logger.warning( + f"Could not set the permissions on the file '{src}'. Error: {e}.\nContinuing without setting permissions." + ) + finally: + try: + tmp_file.unlink() + except OSError: + # fails if `tmp_file.touch()` failed => do nothing + # See https://github.com/huggingface/huggingface_hub/issues/2359 + pass + + shutil.move(str(src), str(dst), copy_function=_copy_no_matter_what) + + +def _copy_no_matter_what(src: str, dst: str) -> None: + """Copy file from src to dst. + + If `shutil.copy2` fails, fallback to `shutil.copyfile`. + """ + try: + # Copy file with metadata and permission + # Can fail e.g. if dst is an S3 mount + shutil.copy2(src, dst) + except OSError: + # Copy only file content + shutil.copyfile(src, dst) + + +def _get_pointer_path(storage_folder: str, revision: str, relative_filename: str) -> str: + # Using `os.path.abspath` instead of `Path.resolve()` to avoid resolving symlinks + snapshot_path = os.path.join(storage_folder, "snapshots") + pointer_path = os.path.join(snapshot_path, revision, relative_filename) + if Path(os.path.abspath(snapshot_path)) not in Path(os.path.abspath(pointer_path)).parents: + raise ValueError( + "Invalid pointer path: cannot create pointer path in snapshot folder if" + f" `storage_folder='{storage_folder}'`, `revision='{revision}'` and" + f" `relative_filename='{relative_filename}'`." + ) + return pointer_path diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/hf_api.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/hf_api.py new file mode 100644 index 0000000000000000000000000000000000000000..8977e202da9631530dee3009427eae6ad26ec17f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/hf_api.py @@ -0,0 +1,11036 @@ +# coding=utf-8 +# Copyright 2019-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import inspect +import json +import re +import struct +import time +import warnings +from collections import defaultdict +from concurrent.futures import Future, ThreadPoolExecutor +from dataclasses import asdict, dataclass, field +from datetime import datetime +from functools import wraps +from itertools import islice +from pathlib import Path +from textwrap import dedent +from typing import ( + TYPE_CHECKING, + Any, + BinaryIO, + Callable, + Dict, + Iterable, + Iterator, + List, + Literal, + Optional, + Tuple, + Type, + TypeVar, + Union, + overload, +) +from urllib.parse import quote + +import requests +from requests.exceptions import HTTPError +from tqdm.auto import tqdm as base_tqdm +from tqdm.contrib.concurrent import thread_map + +from . import constants +from ._commit_api import ( + CommitOperation, + CommitOperationAdd, + CommitOperationCopy, + CommitOperationDelete, + _fetch_files_to_copy, + _fetch_upload_modes, + _prepare_commit_payload, + _upload_files, + _warn_on_overwriting_operations, +) +from ._inference_endpoints import InferenceEndpoint, InferenceEndpointType +from ._jobs_api import JobInfo, JobSpec, ScheduledJobInfo, _create_job_spec +from ._space_api import SpaceHardware, SpaceRuntime, SpaceStorage, SpaceVariable +from ._upload_large_folder import upload_large_folder_internal +from .community import ( + Discussion, + DiscussionComment, + DiscussionStatusChange, + DiscussionTitleChange, + DiscussionWithDetails, + deserialize_event, +) +from .constants import ( + DEFAULT_ETAG_TIMEOUT, # noqa: F401 # kept for backward compatibility + DEFAULT_REQUEST_TIMEOUT, # noqa: F401 # kept for backward compatibility + DEFAULT_REVISION, # noqa: F401 # kept for backward compatibility + DISCUSSION_STATUS, # noqa: F401 # kept for backward compatibility + DISCUSSION_TYPES, # noqa: F401 # kept for backward compatibility + ENDPOINT, # noqa: F401 # kept for backward compatibility + INFERENCE_ENDPOINTS_ENDPOINT, # noqa: F401 # kept for backward compatibility + REGEX_COMMIT_OID, # noqa: F401 # kept for backward compatibility + REPO_TYPE_MODEL, # noqa: F401 # kept for backward compatibility + REPO_TYPES, # noqa: F401 # kept for backward compatibility + REPO_TYPES_MAPPING, # noqa: F401 # kept for backward compatibility + REPO_TYPES_URL_PREFIXES, # noqa: F401 # kept for backward compatibility + SAFETENSORS_INDEX_FILE, # noqa: F401 # kept for backward compatibility + SAFETENSORS_MAX_HEADER_LENGTH, # noqa: F401 # kept for backward compatibility + SAFETENSORS_SINGLE_FILE, # noqa: F401 # kept for backward compatibility + SPACES_SDK_TYPES, # noqa: F401 # kept for backward compatibility + WEBHOOK_DOMAIN_T, # noqa: F401 # kept for backward compatibility + DiscussionStatusFilter, # noqa: F401 # kept for backward compatibility + DiscussionTypeFilter, # noqa: F401 # kept for backward compatibility +) +from .errors import ( + BadRequestError, + EntryNotFoundError, + GatedRepoError, + HfHubHTTPError, + RepositoryNotFoundError, + RevisionNotFoundError, +) +from .file_download import HfFileMetadata, get_hf_file_metadata, hf_hub_url +from .repocard_data import DatasetCardData, ModelCardData, SpaceCardData +from .utils import ( + DEFAULT_IGNORE_PATTERNS, + HfFolder, # noqa: F401 # kept for backward compatibility + LocalTokenNotFoundError, + NotASafetensorsRepoError, + SafetensorsFileMetadata, + SafetensorsParsingError, + SafetensorsRepoMetadata, + TensorInfo, + build_hf_headers, + chunk_iterable, + experimental, + filter_repo_objects, + fix_hf_endpoint_in_url, + get_session, + get_token, + hf_raise_for_status, + logging, + paginate, + parse_datetime, + validate_hf_hub_args, +) +from .utils import tqdm as hf_tqdm +from .utils._auth import _get_token_from_environment, _get_token_from_file, _get_token_from_google_colab +from .utils._deprecation import _deprecate_arguments, _deprecate_method +from .utils._typing import CallableT +from .utils.endpoint_helpers import _is_emission_within_threshold + + +if TYPE_CHECKING: + from .inference._providers import PROVIDER_T + +R = TypeVar("R") # Return type +CollectionItemType_T = Literal["model", "dataset", "space", "paper", "collection"] + +ExpandModelProperty_T = Literal[ + "author", + "baseModels", + "cardData", + "childrenModelCount", + "config", + "createdAt", + "disabled", + "downloads", + "downloadsAllTime", + "gated", + "gguf", + "inference", + "inferenceProviderMapping", + "lastModified", + "library_name", + "likes", + "mask_token", + "model-index", + "pipeline_tag", + "private", + "resourceGroup", + "safetensors", + "sha", + "siblings", + "spaces", + "tags", + "transformersInfo", + "trendingScore", + "usedStorage", + "widgetData", + "xetEnabled", +] + +ExpandDatasetProperty_T = Literal[ + "author", + "cardData", + "citation", + "createdAt", + "description", + "disabled", + "downloads", + "downloadsAllTime", + "gated", + "lastModified", + "likes", + "paperswithcode_id", + "private", + "resourceGroup", + "sha", + "siblings", + "tags", + "trendingScore", + "usedStorage", + "xetEnabled", +] + +ExpandSpaceProperty_T = Literal[ + "author", + "cardData", + "createdAt", + "datasets", + "disabled", + "lastModified", + "likes", + "models", + "private", + "resourceGroup", + "runtime", + "sdk", + "sha", + "siblings", + "subdomain", + "tags", + "trendingScore", + "usedStorage", + "xetEnabled", +] + +USERNAME_PLACEHOLDER = "hf_user" +_REGEX_DISCUSSION_URL = re.compile(r".*/discussions/(\d+)$") + +_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE = ( + "\nNote: Creating a commit assumes that the repo already exists on the" + " Huggingface Hub. Please use `create_repo` if it's not the case." +) +_AUTH_CHECK_NO_REPO_ERROR_MESSAGE = ( + "\nNote: The repository either does not exist or you do not have access rights." + " Please check the repository ID and your access permissions." + " If this is a private repository, ensure that your token is correct." +) +logger = logging.get_logger(__name__) + + +def repo_type_and_id_from_hf_id(hf_id: str, hub_url: Optional[str] = None) -> Tuple[Optional[str], Optional[str], str]: + """ + Returns the repo type and ID from a huggingface.co URL linking to a + repository + + Args: + hf_id (`str`): + An URL or ID of a repository on the HF hub. Accepted values are: + + - https://huggingface.co/// + - https://huggingface.co// + - hf://// + - hf:/// + - // + - / + - + hub_url (`str`, *optional*): + The URL of the HuggingFace Hub, defaults to https://huggingface.co + + Returns: + A tuple with three items: repo_type (`str` or `None`), namespace (`str` or + `None`) and repo_id (`str`). + + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If URL cannot be parsed. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If `repo_type` is unknown. + """ + input_hf_id = hf_id + + hub_url = re.sub(r"https?://", "", hub_url if hub_url is not None else constants.ENDPOINT) + is_hf_url = hub_url in hf_id and "@" not in hf_id + + HFFS_PREFIX = "hf://" + if hf_id.startswith(HFFS_PREFIX): # Remove "hf://" prefix if exists + hf_id = hf_id[len(HFFS_PREFIX) :] + + url_segments = hf_id.split("/") + is_hf_id = len(url_segments) <= 3 + + namespace: Optional[str] + if is_hf_url: + namespace, repo_id = url_segments[-2:] + if namespace == hub_url: + namespace = None + if len(url_segments) > 2 and hub_url not in url_segments[-3]: + repo_type = url_segments[-3] + elif namespace in constants.REPO_TYPES_MAPPING: + # Mean canonical dataset or model + repo_type = constants.REPO_TYPES_MAPPING[namespace] + namespace = None + else: + repo_type = None + elif is_hf_id: + if len(url_segments) == 3: + # Passed // or // + repo_type, namespace, repo_id = url_segments[-3:] + elif len(url_segments) == 2: + if url_segments[0] in constants.REPO_TYPES_MAPPING: + # Passed '' or 'datasets/' for a canonical model or dataset + repo_type = constants.REPO_TYPES_MAPPING[url_segments[0]] + namespace = None + repo_id = hf_id.split("/")[-1] + else: + # Passed / or / + namespace, repo_id = hf_id.split("/")[-2:] + repo_type = None + else: + # Passed + repo_id = url_segments[0] + namespace, repo_type = None, None + else: + raise ValueError(f"Unable to retrieve user and repo ID from the passed HF ID: {hf_id}") + + # Check if repo type is known (mapping "spaces" => "space" + empty value => `None`) + if repo_type in constants.REPO_TYPES_MAPPING: + repo_type = constants.REPO_TYPES_MAPPING[repo_type] + if repo_type == "": + repo_type = None + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Unknown `repo_type`: '{repo_type}' ('{input_hf_id}')") + + return repo_type, namespace, repo_id + + +@dataclass +class LastCommitInfo(dict): + oid: str + title: str + date: datetime + + def __post_init__(self): # hack to make LastCommitInfo backward compatible + self.update(asdict(self)) + + +@dataclass +class BlobLfsInfo(dict): + size: int + sha256: str + pointer_size: int + + def __post_init__(self): # hack to make BlobLfsInfo backward compatible + self.update(asdict(self)) + + +@dataclass +class BlobSecurityInfo(dict): + safe: bool # duplicate information with "status" field, keeping it for backward compatibility + status: str + av_scan: Optional[Dict] + pickle_import_scan: Optional[Dict] + + def __post_init__(self): # hack to make BlogSecurityInfo backward compatible + self.update(asdict(self)) + + +@dataclass +class TransformersInfo(dict): + auto_model: str + custom_class: Optional[str] = None + # possible `pipeline_tag` values: https://github.com/huggingface/huggingface.js/blob/3ee32554b8620644a6287e786b2a83bf5caf559c/packages/tasks/src/pipelines.ts#L72 + pipeline_tag: Optional[str] = None + processor: Optional[str] = None + + def __post_init__(self): # hack to make TransformersInfo backward compatible + self.update(asdict(self)) + + +@dataclass +class SafeTensorsInfo(dict): + parameters: Dict[str, int] + total: int + + def __post_init__(self): # hack to make SafeTensorsInfo backward compatible + self.update(asdict(self)) + + +@dataclass +class CommitInfo(str): + """Data structure containing information about a newly created commit. + + Returned by any method that creates a commit on the Hub: [`create_commit`], [`upload_file`], [`upload_folder`], + [`delete_file`], [`delete_folder`]. It inherits from `str` for backward compatibility but using methods specific + to `str` is deprecated. + + Attributes: + commit_url (`str`): + Url where to find the commit. + + commit_message (`str`): + The summary (first line) of the commit that has been created. + + commit_description (`str`): + Description of the commit that has been created. Can be empty. + + oid (`str`): + Commit hash id. Example: `"91c54ad1727ee830252e457677f467be0bfd8a57"`. + + pr_url (`str`, *optional*): + Url to the PR that has been created, if any. Populated when `create_pr=True` + is passed. + + pr_revision (`str`, *optional*): + Revision of the PR that has been created, if any. Populated when + `create_pr=True` is passed. Example: `"refs/pr/1"`. + + pr_num (`int`, *optional*): + Number of the PR discussion that has been created, if any. Populated when + `create_pr=True` is passed. Can be passed as `discussion_num` in + [`get_discussion_details`]. Example: `1`. + + repo_url (`RepoUrl`): + Repo URL of the commit containing info like repo_id, repo_type, etc. + + _url (`str`, *optional*): + Legacy url for `str` compatibility. Can be the url to the uploaded file on the Hub (if returned by + [`upload_file`]), to the uploaded folder on the Hub (if returned by [`upload_folder`]) or to the commit on + the Hub (if returned by [`create_commit`]). Defaults to `commit_url`. It is deprecated to use this + attribute. Please use `commit_url` instead. + """ + + commit_url: str + commit_message: str + commit_description: str + oid: str + pr_url: Optional[str] = None + + # Computed from `commit_url` in `__post_init__` + repo_url: RepoUrl = field(init=False) + + # Computed from `pr_url` in `__post_init__` + pr_revision: Optional[str] = field(init=False) + pr_num: Optional[str] = field(init=False) + + # legacy url for `str` compatibility (ex: url to uploaded file, url to uploaded folder, url to PR, etc.) + _url: str = field(repr=False, default=None) # type: ignore # defaults to `commit_url` + + def __new__(cls, *args, commit_url: str, _url: Optional[str] = None, **kwargs): + return str.__new__(cls, _url or commit_url) + + def __post_init__(self): + """Populate pr-related fields after initialization. + + See https://docs.python.org/3.10/library/dataclasses.html#post-init-processing. + """ + # Repo info + self.repo_url = RepoUrl(self.commit_url.split("/commit/")[0]) + + # PR info + if self.pr_url is not None: + self.pr_revision = _parse_revision_from_pr_url(self.pr_url) + self.pr_num = int(self.pr_revision.split("/")[-1]) + else: + self.pr_revision = None + self.pr_num = None + + +@dataclass +class AccessRequest: + """Data structure containing information about a user access request. + + Attributes: + username (`str`): + Username of the user who requested access. + fullname (`str`): + Fullname of the user who requested access. + email (`Optional[str]`): + Email of the user who requested access. + Can only be `None` in the /accepted list if the user was granted access manually. + timestamp (`datetime`): + Timestamp of the request. + status (`Literal["pending", "accepted", "rejected"]`): + Status of the request. Can be one of `["pending", "accepted", "rejected"]`. + fields (`Dict[str, Any]`, *optional*): + Additional fields filled by the user in the gate form. + """ + + username: str + fullname: str + email: Optional[str] + timestamp: datetime + status: Literal["pending", "accepted", "rejected"] + + # Additional fields filled by the user in the gate form + fields: Optional[Dict[str, Any]] = None + + +@dataclass +class WebhookWatchedItem: + """Data structure containing information about the items watched by a webhook. + + Attributes: + type (`Literal["dataset", "model", "org", "space", "user"]`): + Type of the item to be watched. Can be one of `["dataset", "model", "org", "space", "user"]`. + name (`str`): + Name of the item to be watched. Can be the username, organization name, model name, dataset name or space name. + """ + + type: Literal["dataset", "model", "org", "space", "user"] + name: str + + +@dataclass +class WebhookInfo: + """Data structure containing information about a webhook. + + One of `url` or `job` is specified, but not both. + + Attributes: + id (`str`): + ID of the webhook. + url (`str`, *optional*): + URL of the webhook. + job (`JobSpec`, *optional*): + Specifications of the Job to trigger. + watched (`List[WebhookWatchedItem]`): + List of items watched by the webhook, see [`WebhookWatchedItem`]. + domains (`List[WEBHOOK_DOMAIN_T]`): + List of domains the webhook is watching. Can be one of `["repo", "discussions"]`. + secret (`str`, *optional*): + Secret of the webhook. + disabled (`bool`): + Whether the webhook is disabled or not. + """ + + id: str + url: Optional[str] + job: Optional[JobSpec] + watched: List[WebhookWatchedItem] + domains: List[constants.WEBHOOK_DOMAIN_T] + secret: Optional[str] + disabled: bool + + +class RepoUrl(str): + """Subclass of `str` describing a repo URL on the Hub. + + `RepoUrl` is returned by `HfApi.create_repo`. It inherits from `str` for backward + compatibility. At initialization, the URL is parsed to populate properties: + - endpoint (`str`) + - namespace (`Optional[str]`) + - repo_name (`str`) + - repo_id (`str`) + - repo_type (`Literal["model", "dataset", "space"]`) + - url (`str`) + + Args: + url (`Any`): + String value of the repo url. + endpoint (`str`, *optional*): + Endpoint of the Hub. Defaults to . + + Example: + ```py + >>> RepoUrl('https://huggingface.co/gpt2') + RepoUrl('https://huggingface.co/gpt2', endpoint='https://huggingface.co', repo_type='model', repo_id='gpt2') + + >>> RepoUrl('https://hub-ci.huggingface.co/datasets/dummy_user/dummy_dataset', endpoint='https://hub-ci.huggingface.co') + RepoUrl('https://hub-ci.huggingface.co/datasets/dummy_user/dummy_dataset', endpoint='https://hub-ci.huggingface.co', repo_type='dataset', repo_id='dummy_user/dummy_dataset') + + >>> RepoUrl('hf://datasets/my-user/my-dataset') + RepoUrl('hf://datasets/my-user/my-dataset', endpoint='https://huggingface.co', repo_type='dataset', repo_id='user/dataset') + + >>> HfApi.create_repo("dummy_model") + RepoUrl('https://huggingface.co/Wauplin/dummy_model', endpoint='https://huggingface.co', repo_type='model', repo_id='Wauplin/dummy_model') + ``` + + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If URL cannot be parsed. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If `repo_type` is unknown. + """ + + def __new__(cls, url: Any, endpoint: Optional[str] = None): + url = fix_hf_endpoint_in_url(url, endpoint=endpoint) + return super(RepoUrl, cls).__new__(cls, url) + + def __init__(self, url: Any, endpoint: Optional[str] = None) -> None: + super().__init__() + # Parse URL + self.endpoint = endpoint or constants.ENDPOINT + repo_type, namespace, repo_name = repo_type_and_id_from_hf_id(self, hub_url=self.endpoint) + + # Populate fields + self.namespace = namespace + self.repo_name = repo_name + self.repo_id = repo_name if namespace is None else f"{namespace}/{repo_name}" + self.repo_type = repo_type or constants.REPO_TYPE_MODEL + self.url = str(self) # just in case it's needed + + def __repr__(self) -> str: + return f"RepoUrl('{self}', endpoint='{self.endpoint}', repo_type='{self.repo_type}', repo_id='{self.repo_id}')" + + +@dataclass +class RepoSibling: + """ + Contains basic information about a repo file inside a repo on the Hub. + + > [!TIP] + > All attributes of this class are optional except `rfilename`. This is because only the file names are returned when + > listing repositories on the Hub (with [`list_models`], [`list_datasets`] or [`list_spaces`]). If you need more + > information like file size, blob id or lfs details, you must request them specifically from one repo at a time + > (using [`model_info`], [`dataset_info`] or [`space_info`]) as it adds more constraints on the backend server to + > retrieve these. + + Attributes: + rfilename (str): + file name, relative to the repo root. + size (`int`, *optional*): + The file's size, in bytes. This attribute is defined when `files_metadata` argument of [`repo_info`] is set + to `True`. It's `None` otherwise. + blob_id (`str`, *optional*): + The file's git OID. This attribute is defined when `files_metadata` argument of [`repo_info`] is set to + `True`. It's `None` otherwise. + lfs (`BlobLfsInfo`, *optional*): + The file's LFS metadata. This attribute is defined when`files_metadata` argument of [`repo_info`] is set to + `True` and the file is stored with Git LFS. It's `None` otherwise. + """ + + rfilename: str + size: Optional[int] = None + blob_id: Optional[str] = None + lfs: Optional[BlobLfsInfo] = None + + +@dataclass +class RepoFile: + """ + Contains information about a file on the Hub. + + Attributes: + path (str): + file path relative to the repo root. + size (`int`): + The file's size, in bytes. + blob_id (`str`): + The file's git OID. + lfs (`BlobLfsInfo`): + The file's LFS metadata. + last_commit (`LastCommitInfo`, *optional*): + The file's last commit metadata. Only defined if [`list_repo_tree`] and [`get_paths_info`] + are called with `expand=True`. + security (`BlobSecurityInfo`, *optional*): + The file's security scan metadata. Only defined if [`list_repo_tree`] and [`get_paths_info`] + are called with `expand=True`. + """ + + path: str + size: int + blob_id: str + lfs: Optional[BlobLfsInfo] = None + last_commit: Optional[LastCommitInfo] = None + security: Optional[BlobSecurityInfo] = None + + def __init__(self, **kwargs): + self.path = kwargs.pop("path") + self.size = kwargs.pop("size") + self.blob_id = kwargs.pop("oid") + lfs = kwargs.pop("lfs", None) + if lfs is not None: + lfs = BlobLfsInfo(size=lfs["size"], sha256=lfs["oid"], pointer_size=lfs["pointerSize"]) + self.lfs = lfs + last_commit = kwargs.pop("lastCommit", None) or kwargs.pop("last_commit", None) + if last_commit is not None: + last_commit = LastCommitInfo( + oid=last_commit["id"], title=last_commit["title"], date=parse_datetime(last_commit["date"]) + ) + self.last_commit = last_commit + security = kwargs.pop("securityFileStatus", None) + if security is not None: + safe = security["status"] == "safe" + security = BlobSecurityInfo( + safe=safe, + status=security["status"], + av_scan=security["avScan"], + pickle_import_scan=security["pickleImportScan"], + ) + self.security = security + + # backwards compatibility + self.rfilename = self.path + self.lastCommit = self.last_commit + + +@dataclass +class RepoFolder: + """ + Contains information about a folder on the Hub. + + Attributes: + path (str): + folder path relative to the repo root. + tree_id (`str`): + The folder's git OID. + last_commit (`LastCommitInfo`, *optional*): + The folder's last commit metadata. Only defined if [`list_repo_tree`] and [`get_paths_info`] + are called with `expand=True`. + """ + + path: str + tree_id: str + last_commit: Optional[LastCommitInfo] = None + + def __init__(self, **kwargs): + self.path = kwargs.pop("path") + self.tree_id = kwargs.pop("oid") + last_commit = kwargs.pop("lastCommit", None) or kwargs.pop("last_commit", None) + if last_commit is not None: + last_commit = LastCommitInfo( + oid=last_commit["id"], title=last_commit["title"], date=parse_datetime(last_commit["date"]) + ) + self.last_commit = last_commit + + +@dataclass +class InferenceProviderMapping: + provider: "PROVIDER_T" # Provider name + hf_model_id: str # ID of the model on the Hugging Face Hub + provider_id: str # ID of the model on the provider's side + status: Literal["error", "live", "staging"] + task: str + + adapter: Optional[str] = None + adapter_weights_path: Optional[str] = None + type: Optional[Literal["single-model", "tag-filter"]] = None + + def __init__(self, **kwargs): + self.provider = kwargs.pop("provider") + self.hf_model_id = kwargs.pop("hf_model_id") + self.provider_id = kwargs.pop("providerId") + self.status = kwargs.pop("status") + self.task = kwargs.pop("task") + + self.adapter = kwargs.pop("adapter", None) + self.adapter_weights_path = kwargs.pop("adapterWeightsPath", None) + self.type = kwargs.pop("type", None) + self.__dict__.update(**kwargs) + + +@dataclass +class ModelInfo: + """ + Contains information about a model on the Hub. This object is returned by [`model_info`] and [`list_models`]. + + > [!TIP] + > Most attributes of this class are optional. This is because the data returned by the Hub depends on the query made. + > In general, the more specific the query, the more information is returned. On the contrary, when listing models + > using [`list_models`] only a subset of the attributes are returned. + + Attributes: + id (`str`): + ID of model. + author (`str`, *optional*): + Author of the model. + sha (`str`, *optional*): + Repo SHA at this particular revision. + created_at (`datetime`, *optional*): + Date of creation of the repo on the Hub. Note that the lowest value is `2022-03-02T23:29:04.000Z`, + corresponding to the date when we began to store creation dates. + last_modified (`datetime`, *optional*): + Date of last commit to the repo. + private (`bool`): + Is the repo private. + disabled (`bool`, *optional*): + Is the repo disabled. + downloads (`int`): + Number of downloads of the model over the last 30 days. + downloads_all_time (`int`): + Cumulated number of downloads of the model since its creation. + gated (`Literal["auto", "manual", False]`, *optional*): + Is the repo gated. + If so, whether there is manual or automatic approval. + gguf (`Dict`, *optional*): + GGUF information of the model. + inference (`Literal["warm"]`, *optional*): + Status of the model on Inference Providers. Warm if the model is served by at least one provider. + inference_provider_mapping (`List[InferenceProviderMapping]`, *optional*): + A list of [`InferenceProviderMapping`] ordered after the user's provider order. + likes (`int`): + Number of likes of the model. + library_name (`str`, *optional*): + Library associated with the model. + tags (`List[str]`): + List of tags of the model. Compared to `card_data.tags`, contains extra tags computed by the Hub + (e.g. supported libraries, model's arXiv). + pipeline_tag (`str`, *optional*): + Pipeline tag associated with the model. + mask_token (`str`, *optional*): + Mask token used by the model. + widget_data (`Any`, *optional*): + Widget data associated with the model. + model_index (`Dict`, *optional*): + Model index for evaluation. + config (`Dict`, *optional*): + Model configuration. + transformers_info (`TransformersInfo`, *optional*): + Transformers-specific info (auto class, processor, etc.) associated with the model. + trending_score (`int`, *optional*): + Trending score of the model. + card_data (`ModelCardData`, *optional*): + Model Card Metadata as a [`huggingface_hub.repocard_data.ModelCardData`] object. + siblings (`List[RepoSibling]`): + List of [`huggingface_hub.hf_api.RepoSibling`] objects that constitute the model. + spaces (`List[str]`, *optional*): + List of spaces using the model. + safetensors (`SafeTensorsInfo`, *optional*): + Model's safetensors information. + security_repo_status (`Dict`, *optional*): + Model's security scan status. + """ + + id: str + author: Optional[str] + sha: Optional[str] + created_at: Optional[datetime] + last_modified: Optional[datetime] + private: Optional[bool] + disabled: Optional[bool] + downloads: Optional[int] + downloads_all_time: Optional[int] + gated: Optional[Literal["auto", "manual", False]] + gguf: Optional[Dict] + inference: Optional[Literal["warm"]] + inference_provider_mapping: Optional[List[InferenceProviderMapping]] + likes: Optional[int] + library_name: Optional[str] + tags: Optional[List[str]] + pipeline_tag: Optional[str] + mask_token: Optional[str] + card_data: Optional[ModelCardData] + widget_data: Optional[Any] + model_index: Optional[Dict] + config: Optional[Dict] + transformers_info: Optional[TransformersInfo] + trending_score: Optional[int] + siblings: Optional[List[RepoSibling]] + spaces: Optional[List[str]] + safetensors: Optional[SafeTensorsInfo] + security_repo_status: Optional[Dict] + xet_enabled: Optional[bool] + + def __init__(self, **kwargs): + self.id = kwargs.pop("id") + self.author = kwargs.pop("author", None) + self.sha = kwargs.pop("sha", None) + last_modified = kwargs.pop("lastModified", None) or kwargs.pop("last_modified", None) + self.last_modified = parse_datetime(last_modified) if last_modified else None + created_at = kwargs.pop("createdAt", None) or kwargs.pop("created_at", None) + self.created_at = parse_datetime(created_at) if created_at else None + self.private = kwargs.pop("private", None) + self.gated = kwargs.pop("gated", None) + self.disabled = kwargs.pop("disabled", None) + self.downloads = kwargs.pop("downloads", None) + self.downloads_all_time = kwargs.pop("downloadsAllTime", None) + self.likes = kwargs.pop("likes", None) + self.library_name = kwargs.pop("library_name", None) + self.gguf = kwargs.pop("gguf", None) + + self.inference = kwargs.pop("inference", None) + + # little hack to simplify Inference Providers logic and make it backward and forward compatible + # right now, API returns a dict on model_info and a list on list_models. Let's harmonize to list. + mapping = kwargs.pop("inferenceProviderMapping", None) + if isinstance(mapping, list): + self.inference_provider_mapping = [ + InferenceProviderMapping(**{**value, "hf_model_id": self.id}) for value in mapping + ] + elif isinstance(mapping, dict): + self.inference_provider_mapping = [ + InferenceProviderMapping(**{**value, "hf_model_id": self.id, "provider": provider}) + for provider, value in mapping.items() + ] + elif mapping is None: + self.inference_provider_mapping = None + else: + raise ValueError( + f"Unexpected type for `inferenceProviderMapping`. Expecting `dict` or `list`. Got {mapping}." + ) + + self.tags = kwargs.pop("tags", None) + self.pipeline_tag = kwargs.pop("pipeline_tag", None) + self.mask_token = kwargs.pop("mask_token", None) + self.trending_score = kwargs.pop("trendingScore", None) + + card_data = kwargs.pop("cardData", None) or kwargs.pop("card_data", None) + self.card_data = ( + ModelCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data + ) + + self.widget_data = kwargs.pop("widgetData", None) + self.model_index = kwargs.pop("model-index", None) or kwargs.pop("model_index", None) + self.config = kwargs.pop("config", None) + transformers_info = kwargs.pop("transformersInfo", None) or kwargs.pop("transformers_info", None) + self.transformers_info = TransformersInfo(**transformers_info) if transformers_info else None + siblings = kwargs.pop("siblings", None) + self.siblings = ( + [ + RepoSibling( + rfilename=sibling["rfilename"], + size=sibling.get("size"), + blob_id=sibling.get("blobId"), + lfs=( + BlobLfsInfo( + size=sibling["lfs"]["size"], + sha256=sibling["lfs"]["sha256"], + pointer_size=sibling["lfs"]["pointerSize"], + ) + if sibling.get("lfs") + else None + ), + ) + for sibling in siblings + ] + if siblings is not None + else None + ) + self.spaces = kwargs.pop("spaces", None) + safetensors = kwargs.pop("safetensors", None) + self.safetensors = ( + SafeTensorsInfo( + parameters=safetensors["parameters"], + total=safetensors["total"], + ) + if safetensors + else None + ) + self.security_repo_status = kwargs.pop("securityRepoStatus", None) + self.xet_enabled = kwargs.pop("xetEnabled", None) + # backwards compatibility + self.lastModified = self.last_modified + self.cardData = self.card_data + self.transformersInfo = self.transformers_info + self.__dict__.update(**kwargs) + + +@dataclass +class DatasetInfo: + """ + Contains information about a dataset on the Hub. This object is returned by [`dataset_info`] and [`list_datasets`]. + + > [!TIP] + > Most attributes of this class are optional. This is because the data returned by the Hub depends on the query made. + > In general, the more specific the query, the more information is returned. On the contrary, when listing datasets + > using [`list_datasets`] only a subset of the attributes are returned. + + Attributes: + id (`str`): + ID of dataset. + author (`str`): + Author of the dataset. + sha (`str`): + Repo SHA at this particular revision. + created_at (`datetime`, *optional*): + Date of creation of the repo on the Hub. Note that the lowest value is `2022-03-02T23:29:04.000Z`, + corresponding to the date when we began to store creation dates. + last_modified (`datetime`, *optional*): + Date of last commit to the repo. + private (`bool`): + Is the repo private. + disabled (`bool`, *optional*): + Is the repo disabled. + gated (`Literal["auto", "manual", False]`, *optional*): + Is the repo gated. + If so, whether there is manual or automatic approval. + downloads (`int`): + Number of downloads of the dataset over the last 30 days. + downloads_all_time (`int`): + Cumulated number of downloads of the model since its creation. + likes (`int`): + Number of likes of the dataset. + tags (`List[str]`): + List of tags of the dataset. + card_data (`DatasetCardData`, *optional*): + Model Card Metadata as a [`huggingface_hub.repocard_data.DatasetCardData`] object. + siblings (`List[RepoSibling]`): + List of [`huggingface_hub.hf_api.RepoSibling`] objects that constitute the dataset. + paperswithcode_id (`str`, *optional*): + Papers with code ID of the dataset. + trending_score (`int`, *optional*): + Trending score of the dataset. + """ + + id: str + author: Optional[str] + sha: Optional[str] + created_at: Optional[datetime] + last_modified: Optional[datetime] + private: Optional[bool] + gated: Optional[Literal["auto", "manual", False]] + disabled: Optional[bool] + downloads: Optional[int] + downloads_all_time: Optional[int] + likes: Optional[int] + paperswithcode_id: Optional[str] + tags: Optional[List[str]] + trending_score: Optional[int] + card_data: Optional[DatasetCardData] + siblings: Optional[List[RepoSibling]] + xet_enabled: Optional[bool] + + def __init__(self, **kwargs): + self.id = kwargs.pop("id") + self.author = kwargs.pop("author", None) + self.sha = kwargs.pop("sha", None) + created_at = kwargs.pop("createdAt", None) or kwargs.pop("created_at", None) + self.created_at = parse_datetime(created_at) if created_at else None + last_modified = kwargs.pop("lastModified", None) or kwargs.pop("last_modified", None) + self.last_modified = parse_datetime(last_modified) if last_modified else None + self.private = kwargs.pop("private", None) + self.gated = kwargs.pop("gated", None) + self.disabled = kwargs.pop("disabled", None) + self.downloads = kwargs.pop("downloads", None) + self.downloads_all_time = kwargs.pop("downloadsAllTime", None) + self.likes = kwargs.pop("likes", None) + self.paperswithcode_id = kwargs.pop("paperswithcode_id", None) + self.tags = kwargs.pop("tags", None) + self.trending_score = kwargs.pop("trendingScore", None) + + card_data = kwargs.pop("cardData", None) or kwargs.pop("card_data", None) + self.card_data = ( + DatasetCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data + ) + siblings = kwargs.pop("siblings", None) + self.siblings = ( + [ + RepoSibling( + rfilename=sibling["rfilename"], + size=sibling.get("size"), + blob_id=sibling.get("blobId"), + lfs=( + BlobLfsInfo( + size=sibling["lfs"]["size"], + sha256=sibling["lfs"]["sha256"], + pointer_size=sibling["lfs"]["pointerSize"], + ) + if sibling.get("lfs") + else None + ), + ) + for sibling in siblings + ] + if siblings is not None + else None + ) + self.xet_enabled = kwargs.pop("xetEnabled", None) + # backwards compatibility + self.lastModified = self.last_modified + self.cardData = self.card_data + self.__dict__.update(**kwargs) + + +@dataclass +class SpaceInfo: + """ + Contains information about a Space on the Hub. This object is returned by [`space_info`] and [`list_spaces`]. + + > [!TIP] + > Most attributes of this class are optional. This is because the data returned by the Hub depends on the query made. + > In general, the more specific the query, the more information is returned. On the contrary, when listing spaces + > using [`list_spaces`] only a subset of the attributes are returned. + + Attributes: + id (`str`): + ID of the Space. + author (`str`, *optional*): + Author of the Space. + sha (`str`, *optional*): + Repo SHA at this particular revision. + created_at (`datetime`, *optional*): + Date of creation of the repo on the Hub. Note that the lowest value is `2022-03-02T23:29:04.000Z`, + corresponding to the date when we began to store creation dates. + last_modified (`datetime`, *optional*): + Date of last commit to the repo. + private (`bool`): + Is the repo private. + gated (`Literal["auto", "manual", False]`, *optional*): + Is the repo gated. + If so, whether there is manual or automatic approval. + disabled (`bool`, *optional*): + Is the Space disabled. + host (`str`, *optional*): + Host URL of the Space. + subdomain (`str`, *optional*): + Subdomain of the Space. + likes (`int`): + Number of likes of the Space. + tags (`List[str]`): + List of tags of the Space. + siblings (`List[RepoSibling]`): + List of [`huggingface_hub.hf_api.RepoSibling`] objects that constitute the Space. + card_data (`SpaceCardData`, *optional*): + Space Card Metadata as a [`huggingface_hub.repocard_data.SpaceCardData`] object. + runtime (`SpaceRuntime`, *optional*): + Space runtime information as a [`huggingface_hub.hf_api.SpaceRuntime`] object. + sdk (`str`, *optional*): + SDK used by the Space. + models (`List[str]`, *optional*): + List of models used by the Space. + datasets (`List[str]`, *optional*): + List of datasets used by the Space. + trending_score (`int`, *optional*): + Trending score of the Space. + """ + + id: str + author: Optional[str] + sha: Optional[str] + created_at: Optional[datetime] + last_modified: Optional[datetime] + private: Optional[bool] + gated: Optional[Literal["auto", "manual", False]] + disabled: Optional[bool] + host: Optional[str] + subdomain: Optional[str] + likes: Optional[int] + sdk: Optional[str] + tags: Optional[List[str]] + siblings: Optional[List[RepoSibling]] + trending_score: Optional[int] + card_data: Optional[SpaceCardData] + runtime: Optional[SpaceRuntime] + models: Optional[List[str]] + datasets: Optional[List[str]] + xet_enabled: Optional[bool] + + def __init__(self, **kwargs): + self.id = kwargs.pop("id") + self.author = kwargs.pop("author", None) + self.sha = kwargs.pop("sha", None) + created_at = kwargs.pop("createdAt", None) or kwargs.pop("created_at", None) + self.created_at = parse_datetime(created_at) if created_at else None + last_modified = kwargs.pop("lastModified", None) or kwargs.pop("last_modified", None) + self.last_modified = parse_datetime(last_modified) if last_modified else None + self.private = kwargs.pop("private", None) + self.gated = kwargs.pop("gated", None) + self.disabled = kwargs.pop("disabled", None) + self.host = kwargs.pop("host", None) + self.subdomain = kwargs.pop("subdomain", None) + self.likes = kwargs.pop("likes", None) + self.sdk = kwargs.pop("sdk", None) + self.tags = kwargs.pop("tags", None) + self.trending_score = kwargs.pop("trendingScore", None) + card_data = kwargs.pop("cardData", None) or kwargs.pop("card_data", None) + self.card_data = ( + SpaceCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data + ) + siblings = kwargs.pop("siblings", None) + self.siblings = ( + [ + RepoSibling( + rfilename=sibling["rfilename"], + size=sibling.get("size"), + blob_id=sibling.get("blobId"), + lfs=( + BlobLfsInfo( + size=sibling["lfs"]["size"], + sha256=sibling["lfs"]["sha256"], + pointer_size=sibling["lfs"]["pointerSize"], + ) + if sibling.get("lfs") + else None + ), + ) + for sibling in siblings + ] + if siblings is not None + else None + ) + runtime = kwargs.pop("runtime", None) + self.runtime = SpaceRuntime(runtime) if runtime else None + self.models = kwargs.pop("models", None) + self.datasets = kwargs.pop("datasets", None) + self.xet_enabled = kwargs.pop("xetEnabled", None) + # backwards compatibility + self.lastModified = self.last_modified + self.cardData = self.card_data + self.__dict__.update(**kwargs) + + +@dataclass +class CollectionItem: + """ + Contains information about an item of a Collection (model, dataset, Space, paper or collection). + + Attributes: + item_object_id (`str`): + Unique ID of the item in the collection. + item_id (`str`): + ID of the underlying object on the Hub. Can be either a repo_id, a paper id or a collection slug. + e.g. `"jbilcke-hf/ai-comic-factory"`, `"2307.09288"`, `"celinah/cerebras-function-calling-682607169c35fbfa98b30b9a"`. + item_type (`str`): + Type of the underlying object. Can be one of `"model"`, `"dataset"`, `"space"`, `"paper"` or `"collection"`. + position (`int`): + Position of the item in the collection. + note (`str`, *optional*): + Note associated with the item, as plain text. + """ + + item_object_id: str # id in database + item_id: str # repo_id or paper id + item_type: str + position: int + note: Optional[str] = None + + def __init__( + self, + _id: str, + id: str, + type: CollectionItemType_T, + position: int, + note: Optional[Dict] = None, + **kwargs, + ) -> None: + self.item_object_id: str = _id # id in database + self.item_id: str = id # repo_id or paper id + # if the item is a collection, override item_id with the slug + slug = kwargs.get("slug") + if slug is not None: + self.item_id = slug # collection slug + self.item_type: CollectionItemType_T = type + self.position: int = position + self.note: str = note["text"] if note is not None else None + + +@dataclass +class Collection: + """ + Contains information about a Collection on the Hub. + + Attributes: + slug (`str`): + Slug of the collection. E.g. `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. + title (`str`): + Title of the collection. E.g. `"Recent models"`. + owner (`str`): + Owner of the collection. E.g. `"TheBloke"`. + items (`List[CollectionItem]`): + List of items in the collection. + last_updated (`datetime`): + Date of the last update of the collection. + position (`int`): + Position of the collection in the list of collections of the owner. + private (`bool`): + Whether the collection is private or not. + theme (`str`): + Theme of the collection. E.g. `"green"`. + upvotes (`int`): + Number of upvotes of the collection. + description (`str`, *optional*): + Description of the collection, as plain text. + url (`str`): + (property) URL of the collection on the Hub. + """ + + slug: str + title: str + owner: str + items: List[CollectionItem] + last_updated: datetime + position: int + private: bool + theme: str + upvotes: int + description: Optional[str] = None + + def __init__(self, **kwargs) -> None: + self.slug = kwargs.pop("slug") + self.title = kwargs.pop("title") + self.owner = kwargs.pop("owner") + self.items = [CollectionItem(**item) for item in kwargs.pop("items")] + self.last_updated = parse_datetime(kwargs.pop("lastUpdated")) + self.position = kwargs.pop("position") + self.private = kwargs.pop("private") + self.theme = kwargs.pop("theme") + self.upvotes = kwargs.pop("upvotes") + self.description = kwargs.pop("description", None) + endpoint = kwargs.pop("endpoint", None) + if endpoint is None: + endpoint = constants.ENDPOINT + self._url = f"{endpoint}/collections/{self.slug}" + + @property + def url(self) -> str: + """Returns the URL of the collection on the Hub.""" + return self._url + + +@dataclass +class GitRefInfo: + """ + Contains information about a git reference for a repo on the Hub. + + Attributes: + name (`str`): + Name of the reference (e.g. tag name or branch name). + ref (`str`): + Full git ref on the Hub (e.g. `"refs/heads/main"` or `"refs/tags/v1.0"`). + target_commit (`str`): + OID of the target commit for the ref (e.g. `"e7da7f221d5bf496a48136c0cd264e630fe9fcc8"`) + """ + + name: str + ref: str + target_commit: str + + +@dataclass +class GitRefs: + """ + Contains information about all git references for a repo on the Hub. + + Object is returned by [`list_repo_refs`]. + + Attributes: + branches (`List[GitRefInfo]`): + A list of [`GitRefInfo`] containing information about branches on the repo. + converts (`List[GitRefInfo]`): + A list of [`GitRefInfo`] containing information about "convert" refs on the repo. + Converts are refs used (internally) to push preprocessed data in Dataset repos. + tags (`List[GitRefInfo]`): + A list of [`GitRefInfo`] containing information about tags on the repo. + pull_requests (`List[GitRefInfo]`, *optional*): + A list of [`GitRefInfo`] containing information about pull requests on the repo. + Only returned if `include_prs=True` is set. + """ + + branches: List[GitRefInfo] + converts: List[GitRefInfo] + tags: List[GitRefInfo] + pull_requests: Optional[List[GitRefInfo]] = None + + +@dataclass +class GitCommitInfo: + """ + Contains information about a git commit for a repo on the Hub. Check out [`list_repo_commits`] for more details. + + Attributes: + commit_id (`str`): + OID of the commit (e.g. `"e7da7f221d5bf496a48136c0cd264e630fe9fcc8"`) + authors (`List[str]`): + List of authors of the commit. + created_at (`datetime`): + Datetime when the commit was created. + title (`str`): + Title of the commit. This is a free-text value entered by the authors. + message (`str`): + Description of the commit. This is a free-text value entered by the authors. + formatted_title (`str`): + Title of the commit formatted as HTML. Only returned if `formatted=True` is set. + formatted_message (`str`): + Description of the commit formatted as HTML. Only returned if `formatted=True` is set. + """ + + commit_id: str + + authors: List[str] + created_at: datetime + title: str + message: str + + formatted_title: Optional[str] + formatted_message: Optional[str] + + +@dataclass +class UserLikes: + """ + Contains information about a user likes on the Hub. + + Attributes: + user (`str`): + Name of the user for which we fetched the likes. + total (`int`): + Total number of likes. + datasets (`List[str]`): + List of datasets liked by the user (as repo_ids). + models (`List[str]`): + List of models liked by the user (as repo_ids). + spaces (`List[str]`): + List of spaces liked by the user (as repo_ids). + """ + + # Metadata + user: str + total: int + + # User likes + datasets: List[str] + models: List[str] + spaces: List[str] + + +@dataclass +class Organization: + """ + Contains information about an organization on the Hub. + + Attributes: + avatar_url (`str`): + URL of the organization's avatar. + name (`str`): + Name of the organization on the Hub (unique). + fullname (`str`): + Organization's full name. + details (`str`, *optional*): + Organization's description. + is_verified (`bool`, *optional*): + Whether the organization is verified. + is_following (`bool`, *optional*): + Whether the authenticated user follows this organization. + num_users (`int`, *optional*): + Number of members in the organization. + num_models (`int`, *optional*): + Number of models owned by the organization. + num_spaces (`int`, *optional*): + Number of Spaces owned by the organization. + num_datasets (`int`, *optional*): + Number of datasets owned by the organization. + num_followers (`int`, *optional*): + Number of followers of the organization. + """ + + avatar_url: str + name: str + fullname: str + details: Optional[str] = None + is_verified: Optional[bool] = None + is_following: Optional[bool] = None + num_users: Optional[int] = None + num_models: Optional[int] = None + num_spaces: Optional[int] = None + num_datasets: Optional[int] = None + num_followers: Optional[int] = None + + def __init__(self, **kwargs) -> None: + self.avatar_url = kwargs.pop("avatarUrl", "") + self.name = kwargs.pop("name", "") + self.fullname = kwargs.pop("fullname", "") + self.details = kwargs.pop("details", None) + self.is_verified = kwargs.pop("isVerified", None) + self.is_following = kwargs.pop("isFollowing", None) + self.num_users = kwargs.pop("numUsers", None) + self.num_models = kwargs.pop("numModels", None) + self.num_spaces = kwargs.pop("numSpaces", None) + self.num_datasets = kwargs.pop("numDatasets", None) + self.num_followers = kwargs.pop("numFollowers", None) + + # forward compatibility + self.__dict__.update(**kwargs) + + +@dataclass +class User: + """ + Contains information about a user on the Hub. + + Attributes: + username (`str`): + Name of the user on the Hub (unique). + fullname (`str`): + User's full name. + avatar_url (`str`): + URL of the user's avatar. + details (`str`, *optional*): + User's details. + is_following (`bool`, *optional*): + Whether the authenticated user is following this user. + is_pro (`bool`, *optional*): + Whether the user is a pro user. + num_models (`int`, *optional*): + Number of models created by the user. + num_datasets (`int`, *optional*): + Number of datasets created by the user. + num_spaces (`int`, *optional*): + Number of spaces created by the user. + num_discussions (`int`, *optional*): + Number of discussions initiated by the user. + num_papers (`int`, *optional*): + Number of papers authored by the user. + num_upvotes (`int`, *optional*): + Number of upvotes received by the user. + num_likes (`int`, *optional*): + Number of likes given by the user. + num_following (`int`, *optional*): + Number of users this user is following. + num_followers (`int`, *optional*): + Number of users following this user. + orgs (list of [`Organization`]): + List of organizations the user is part of. + """ + + # Metadata + username: str + fullname: str + avatar_url: str + details: Optional[str] = None + is_following: Optional[bool] = None + is_pro: Optional[bool] = None + num_models: Optional[int] = None + num_datasets: Optional[int] = None + num_spaces: Optional[int] = None + num_discussions: Optional[int] = None + num_papers: Optional[int] = None + num_upvotes: Optional[int] = None + num_likes: Optional[int] = None + num_following: Optional[int] = None + num_followers: Optional[int] = None + orgs: List[Organization] = field(default_factory=list) + + def __init__(self, **kwargs) -> None: + self.username = kwargs.pop("user", "") + self.fullname = kwargs.pop("fullname", "") + self.avatar_url = kwargs.pop("avatarUrl", "") + self.is_following = kwargs.pop("isFollowing", None) + self.is_pro = kwargs.pop("isPro", None) + self.details = kwargs.pop("details", None) + self.num_models = kwargs.pop("numModels", None) + self.num_datasets = kwargs.pop("numDatasets", None) + self.num_spaces = kwargs.pop("numSpaces", None) + self.num_discussions = kwargs.pop("numDiscussions", None) + self.num_papers = kwargs.pop("numPapers", None) + self.num_upvotes = kwargs.pop("numUpvotes", None) + self.num_likes = kwargs.pop("numLikes", None) + self.num_following = kwargs.pop("numFollowing", None) + self.num_followers = kwargs.pop("numFollowers", None) + self.user_type = kwargs.pop("type", None) + self.orgs = [Organization(**org) for org in kwargs.pop("orgs", [])] + + # forward compatibility + self.__dict__.update(**kwargs) + + +@dataclass +class PaperInfo: + """ + Contains information about a paper on the Hub. + + Attributes: + id (`str`): + arXiv paper ID. + authors (`List[str]`, **optional**): + Names of paper authors + published_at (`datetime`, **optional**): + Date paper published. + title (`str`, **optional**): + Title of the paper. + summary (`str`, **optional**): + Summary of the paper. + upvotes (`int`, **optional**): + Number of upvotes for the paper on the Hub. + discussion_id (`str`, **optional**): + Discussion ID for the paper on the Hub. + source (`str`, **optional**): + Source of the paper. + comments (`int`, **optional**): + Number of comments for the paper on the Hub. + submitted_at (`datetime`, **optional**): + Date paper appeared in daily papers on the Hub. + submitted_by (`User`, **optional**): + Information about who submitted the daily paper. + """ + + id: str + authors: Optional[List[str]] + published_at: Optional[datetime] + title: Optional[str] + summary: Optional[str] + upvotes: Optional[int] + discussion_id: Optional[str] + source: Optional[str] + comments: Optional[int] + submitted_at: Optional[datetime] + submitted_by: Optional[User] + + def __init__(self, **kwargs) -> None: + paper = kwargs.pop("paper", {}) + self.id = kwargs.pop("id", None) or paper.pop("id", None) + authors = paper.pop("authors", None) or kwargs.pop("authors", None) + self.authors = [author.pop("name", None) for author in authors] if authors else None + published_at = paper.pop("publishedAt", None) or kwargs.pop("publishedAt", None) + self.published_at = parse_datetime(published_at) if published_at else None + self.title = kwargs.pop("title", None) + self.source = kwargs.pop("source", None) + self.summary = paper.pop("summary", None) or kwargs.pop("summary", None) + self.upvotes = paper.pop("upvotes", None) or kwargs.pop("upvotes", None) + self.discussion_id = paper.pop("discussionId", None) or kwargs.pop("discussionId", None) + self.comments = kwargs.pop("numComments", 0) + submitted_at = kwargs.pop("publishedAt", None) or kwargs.pop("submittedOnDailyAt", None) + self.submitted_at = parse_datetime(submitted_at) if submitted_at else None + submitted_by = kwargs.pop("submittedBy", None) or kwargs.pop("submittedOnDailyBy", None) + self.submitted_by = User(**submitted_by) if submitted_by else None + + # forward compatibility + self.__dict__.update(**kwargs) + + +@dataclass +class LFSFileInfo: + """ + Contains information about a file stored as LFS on a repo on the Hub. + + Used in the context of listing and permanently deleting LFS files from a repo to free-up space. + See [`list_lfs_files`] and [`permanently_delete_lfs_files`] for more details. + + Git LFS files are tracked using SHA-256 object IDs, rather than file paths, to optimize performance + This approach is necessary because a single object can be referenced by multiple paths across different commits, + making it impractical to search and resolve these connections. Check out [our documentation](https://huggingface.co/docs/hub/storage-limits#advanced-track-lfs-file-references) + to learn how to know which filename(s) is(are) associated with each SHA. + + Attributes: + file_oid (`str`): + SHA-256 object ID of the file. This is the identifier to pass when permanently deleting the file. + filename (`str`): + Possible filename for the LFS object. See the note above for more information. + oid (`str`): + OID of the LFS object. + pushed_at (`datetime`): + Date the LFS object was pushed to the repo. + ref (`str`, *optional*): + Ref where the LFS object has been pushed (if any). + size (`int`): + Size of the LFS object. + + Example: + ```py + >>> from huggingface_hub import HfApi + >>> api = HfApi() + >>> lfs_files = api.list_lfs_files("username/my-cool-repo") + + # Filter files files to delete based on a combination of `filename`, `pushed_at`, `ref` or `size`. + # e.g. select only LFS files in the "checkpoints" folder + >>> lfs_files_to_delete = (lfs_file for lfs_file in lfs_files if lfs_file.filename.startswith("checkpoints/")) + + # Permanently delete LFS files + >>> api.permanently_delete_lfs_files("username/my-cool-repo", lfs_files_to_delete) + ``` + """ + + file_oid: str + filename: str + oid: str + pushed_at: datetime + ref: Optional[str] + size: int + + def __init__(self, **kwargs) -> None: + self.file_oid = kwargs.pop("fileOid") + self.filename = kwargs.pop("filename") + self.oid = kwargs.pop("oid") + self.pushed_at = parse_datetime(kwargs.pop("pushedAt")) + self.ref = kwargs.pop("ref", None) + self.size = kwargs.pop("size") + + # forward compatibility + self.__dict__.update(**kwargs) + + +def future_compatible(fn: CallableT) -> CallableT: + """Wrap a method of `HfApi` to handle `run_as_future=True`. + + A method flagged as "future_compatible" will be called in a thread if `run_as_future=True` and return a + `concurrent.futures.Future` instance. Otherwise, it will be called normally and return the result. + """ + sig = inspect.signature(fn) + args_params = list(sig.parameters)[1:] # remove "self" from list + + @wraps(fn) + def _inner(self, *args, **kwargs): + # Get `run_as_future` value if provided (default to False) + if "run_as_future" in kwargs: + run_as_future = kwargs["run_as_future"] + kwargs["run_as_future"] = False # avoid recursion error + else: + run_as_future = False + for param, value in zip(args_params, args): + if param == "run_as_future": + run_as_future = value + break + + # Call the function in a thread if `run_as_future=True` + if run_as_future: + return self.run_as_future(fn, self, *args, **kwargs) + + # Otherwise, call the function normally + return fn(self, *args, **kwargs) + + _inner.is_future_compatible = True # type: ignore + return _inner # type: ignore + + +class HfApi: + """ + Client to interact with the Hugging Face Hub via HTTP. + + The client is initialized with some high-level settings used in all requests + made to the Hub (HF endpoint, authentication, user agents...). Using the `HfApi` + client is preferred but not mandatory as all of its public methods are exposed + directly at the root of `huggingface_hub`. + + Args: + endpoint (`str`, *optional*): + Endpoint of the Hub. Defaults to . + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + library_name (`str`, *optional*): + The name of the library that is making the HTTP request. Will be added to + the user-agent header. Example: `"transformers"`. + library_version (`str`, *optional*): + The version of the library that is making the HTTP request. Will be added + to the user-agent header. Example: `"4.24.0"`. + user_agent (`str`, `dict`, *optional*): + The user agent info in the form of a dictionary or a single string. It will + be completed with information about the installed packages. + headers (`dict`, *optional*): + Additional headers to be sent with each request. Example: `{"X-My-Header": "value"}`. + Headers passed here are taking precedence over the default headers. + """ + + def __init__( + self, + endpoint: Optional[str] = None, + token: Union[str, bool, None] = None, + library_name: Optional[str] = None, + library_version: Optional[str] = None, + user_agent: Union[Dict, str, None] = None, + headers: Optional[Dict[str, str]] = None, + ) -> None: + self.endpoint = endpoint if endpoint is not None else constants.ENDPOINT + self.token = token + self.library_name = library_name + self.library_version = library_version + self.user_agent = user_agent + self.headers = headers + self._thread_pool: Optional[ThreadPoolExecutor] = None + + def run_as_future(self, fn: Callable[..., R], *args, **kwargs) -> Future[R]: + """ + Run a method in the background and return a Future instance. + + The main goal is to run methods without blocking the main thread (e.g. to push data during a training). + Background jobs are queued to preserve order but are not ran in parallel. If you need to speed-up your scripts + by parallelizing lots of call to the API, you must setup and use your own [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor). + + Note: Most-used methods like [`upload_file`], [`upload_folder`] and [`create_commit`] have a `run_as_future: bool` + argument to directly call them in the background. This is equivalent to calling `api.run_as_future(...)` on them + but less verbose. + + Args: + fn (`Callable`): + The method to run in the background. + *args, **kwargs: + Arguments with which the method will be called. + + Return: + `Future`: a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) instance to + get the result of the task. + + Example: + ```py + >>> from huggingface_hub import HfApi + >>> api = HfApi() + >>> future = api.run_as_future(api.whoami) # instant + >>> future.done() + False + >>> future.result() # wait until complete and return result + (...) + >>> future.done() + True + ``` + """ + if self._thread_pool is None: + self._thread_pool = ThreadPoolExecutor(max_workers=1) + self._thread_pool + return self._thread_pool.submit(fn, *args, **kwargs) + + @validate_hf_hub_args + def whoami(self, token: Union[bool, str, None] = None) -> Dict: + """ + Call HF API to know "whoami". + + Args: + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + """ + # Get the effective token using the helper function get_token + effective_token = token or self.token or get_token() or True + r = get_session().get( + f"{self.endpoint}/api/whoami-v2", + headers=self._build_hf_headers(token=effective_token), + ) + try: + hf_raise_for_status(r) + except HTTPError as e: + if e.response.status_code == 401: + error_message = "Invalid user token." + # Check which token is the effective one and generate the error message accordingly + if effective_token == _get_token_from_google_colab(): + error_message += " The token from Google Colab vault is invalid. Please update it from the UI." + elif effective_token == _get_token_from_environment(): + error_message += ( + " The token from HF_TOKEN environment variable is invalid. " + "Note that HF_TOKEN takes precedence over `hf auth login`." + ) + elif effective_token == _get_token_from_file(): + error_message += " The token stored is invalid. Please run `hf auth login` to update it." + raise HTTPError(error_message, request=e.request, response=e.response) from e + raise + return r.json() + + @_deprecate_method( + version="1.0", + message=( + "Permissions are more complex than when `get_token_permission` was first introduced. " + "OAuth and fine-grain tokens allows for more detailed permissions. " + "If you need to know the permissions associated with a token, please use `whoami` and check the `'auth'` key." + ), + ) + def get_token_permission( + self, token: Union[bool, str, None] = None + ) -> Literal["read", "write", "fineGrained", None]: + """ + Check if a given `token` is valid and return its permissions. + + > [!WARNING] + > This method is deprecated and will be removed in version 1.0. Permissions are more complex than when + > `get_token_permission` was first introduced. OAuth and fine-grain tokens allows for more detailed permissions. + > If you need to know the permissions associated with a token, please use `whoami` and check the `'auth'` key. + + For more details about tokens, please refer to https://huggingface.co/docs/hub/security-tokens#what-are-user-access-tokens. + + Args: + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Literal["read", "write", "fineGrained", None]`: Permission granted by the token ("read" or "write"). Returns `None` if no + token passed, if token is invalid or if role is not returned by the server. This typically happens when the token is an OAuth token. + """ + try: + return self.whoami(token=token)["auth"]["accessToken"]["role"] + except (LocalTokenNotFoundError, HTTPError, KeyError): + return None + + def get_model_tags(self) -> Dict: + """ + List all valid model tags as a nested namespace object + """ + path = f"{self.endpoint}/api/models-tags-by-type" + r = get_session().get(path) + hf_raise_for_status(r) + return r.json() + + def get_dataset_tags(self) -> Dict: + """ + List all valid dataset tags as a nested namespace object. + """ + path = f"{self.endpoint}/api/datasets-tags-by-type" + r = get_session().get(path) + hf_raise_for_status(r) + return r.json() + + @_deprecate_arguments( + version="1.0", deprecated_args=["language", "library", "task", "tags"], custom_message="Use `filter` instead." + ) + @validate_hf_hub_args + def list_models( + self, + *, + # Search-query parameter + filter: Union[str, Iterable[str], None] = None, + author: Optional[str] = None, + apps: Optional[Union[str, List[str]]] = None, + gated: Optional[bool] = None, + inference: Optional[Literal["warm"]] = None, + inference_provider: Optional[Union[Literal["all"], "PROVIDER_T", List["PROVIDER_T"]]] = None, + model_name: Optional[str] = None, + trained_dataset: Optional[Union[str, List[str]]] = None, + search: Optional[str] = None, + pipeline_tag: Optional[str] = None, + emissions_thresholds: Optional[Tuple[float, float]] = None, + # Sorting and pagination parameters + sort: Union[Literal["last_modified"], str, None] = None, + direction: Optional[Literal[-1]] = None, + limit: Optional[int] = None, + # Additional data to fetch + expand: Optional[List[ExpandModelProperty_T]] = None, + full: Optional[bool] = None, + cardData: bool = False, + fetch_config: bool = False, + token: Union[bool, str, None] = None, + # Deprecated arguments - use `filter` instead + language: Optional[Union[str, List[str]]] = None, + library: Optional[Union[str, List[str]]] = None, + tags: Optional[Union[str, List[str]]] = None, + task: Optional[Union[str, List[str]]] = None, + ) -> Iterable[ModelInfo]: + """ + List models hosted on the Huggingface Hub, given some filters. + + Args: + filter (`str` or `Iterable[str]`, *optional*): + A string or list of string to filter models on the Hub. + Models can be filtered by library, language, task, tags, and more. + author (`str`, *optional*): + A string which identify the author (user or organization) of the + returned models. + apps (`str` or `List`, *optional*): + A string or list of strings to filter models on the Hub that + support the specified apps. Example values include `"ollama"` or `["ollama", "vllm"]`. + gated (`bool`, *optional*): + A boolean to filter models on the Hub that are gated or not. By default, all models are returned. + If `gated=True` is passed, only gated models are returned. + If `gated=False` is passed, only non-gated models are returned. + inference (`Literal["warm"]`, *optional*): + If "warm", filter models on the Hub currently served by at least one provider. + inference_provider (`Literal["all"]` or `str`, *optional*): + A string to filter models on the Hub that are served by a specific provider. + Pass `"all"` to get all models served by at least one provider. + library (`str` or `List`, *optional*): + Deprecated. Pass a library name in `filter` to filter models by library. + language (`str` or `List`, *optional*): + Deprecated. Pass a language in `filter` to filter models by language. + model_name (`str`, *optional*): + A string that contain complete or partial names for models on the + Hub, such as "bert" or "bert-base-cased" + task (`str` or `List`, *optional*): + Deprecated. Pass a task in `filter` to filter models by task. + trained_dataset (`str` or `List`, *optional*): + A string tag or a list of string tags of the trained dataset for a + model on the Hub. + tags (`str` or `List`, *optional*): + Deprecated. Pass tags in `filter` to filter models by tags. + search (`str`, *optional*): + A string that will be contained in the returned model ids. + pipeline_tag (`str`, *optional*): + A string pipeline tag to filter models on the Hub by, such as `summarization`. + emissions_thresholds (`Tuple`, *optional*): + A tuple of two ints or floats representing a minimum and maximum + carbon footprint to filter the resulting models with in grams. + sort (`Literal["last_modified"]` or `str`, *optional*): + The key with which to sort the resulting models. Possible values are "last_modified", "trending_score", + "created_at", "downloads" and "likes". + direction (`Literal[-1]` or `int`, *optional*): + Direction in which to sort. The value `-1` sorts by descending + order while all other values sort by ascending order. + limit (`int`, *optional*): + The limit on the number of models fetched. Leaving this option + to `None` fetches all models. + expand (`List[ExpandModelProperty_T]`, *optional*): + List properties to return in the response. When used, only the properties in the list will be returned. + This parameter cannot be used if `full`, `cardData` or `fetch_config` are passed. + Possible values are `"author"`, `"cardData"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"gguf"`, `"inference"`, `"inferenceProviderMapping"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"`, `"trendingScore"`, `"widgetData"`, `"resourceGroup"` and `"xetEnabled"`. + full (`bool`, *optional*): + Whether to fetch all model data, including the `last_modified`, + the `sha`, the files and the `tags`. This is set to `True` by + default when using a filter. + cardData (`bool`, *optional*): + Whether to grab the metadata for the model as well. Can contain + useful information such as carbon emissions, metrics, and + datasets trained on. + fetch_config (`bool`, *optional*): + Whether to fetch the model configs as well. This is not included + in `full` due to its size. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + + Returns: + `Iterable[ModelInfo]`: an iterable of [`huggingface_hub.hf_api.ModelInfo`] objects. + + Example: + + ```python + >>> from huggingface_hub import HfApi + + >>> api = HfApi() + + # List all models + >>> api.list_models() + + # List text classification models + >>> api.list_models(filter="text-classification") + + # List models from the KerasHub library + >>> api.list_models(filter="keras-hub") + + # List models served by Cohere + >>> api.list_models(inference_provider="cohere") + + # List models with "bert" in their name + >>> api.list_models(search="bert") + + # List models with "bert" in their name and pushed by google + >>> api.list_models(search="bert", author="google") + ``` + """ + if expand and (full or cardData or fetch_config): + raise ValueError("`expand` cannot be used if `full`, `cardData` or `fetch_config` are passed.") + + if emissions_thresholds is not None and not cardData: + raise ValueError("`emissions_thresholds` were passed without setting `cardData=True`.") + + path = f"{self.endpoint}/api/models" + headers = self._build_hf_headers(token=token) + params: Dict[str, Any] = {} + + # Build the filter list + filter_list: List[str] = [] + if filter: + filter_list.extend([filter] if isinstance(filter, str) else filter) + if library: + filter_list.extend([library] if isinstance(library, str) else library) + if task: + filter_list.extend([task] if isinstance(task, str) else task) + if trained_dataset: + if isinstance(trained_dataset, str): + trained_dataset = [trained_dataset] + for dataset in trained_dataset: + if not dataset.startswith("dataset:"): + dataset = f"dataset:{dataset}" + filter_list.append(dataset) + if language: + filter_list.extend([language] if isinstance(language, str) else language) + if tags: + filter_list.extend([tags] if isinstance(tags, str) else tags) + if len(filter_list) > 0: + params["filter"] = filter_list + + # Handle other query params + if author: + params["author"] = author + if apps: + if isinstance(apps, str): + apps = [apps] + params["apps"] = apps + if gated is not None: + params["gated"] = gated + if inference is not None: + params["inference"] = inference + if inference_provider is not None: + params["inference_provider"] = inference_provider + if pipeline_tag: + params["pipeline_tag"] = pipeline_tag + search_list = [] + if model_name: + search_list.append(model_name) + if search: + search_list.append(search) + if len(search_list) > 0: + params["search"] = search_list + if sort is not None: + params["sort"] = ( + "lastModified" + if sort == "last_modified" + else "trendingScore" + if sort == "trending_score" + else "createdAt" + if sort == "created_at" + else sort + ) + if direction is not None: + params["direction"] = direction + if limit is not None: + params["limit"] = limit + + # Request additional data + if full: + params["full"] = True + if fetch_config: + params["config"] = True + if cardData: + params["cardData"] = True + if expand: + params["expand"] = expand + + # `items` is a generator + items = paginate(path, params=params, headers=headers) + if limit is not None: + items = islice(items, limit) # Do not iterate over all pages + for item in items: + if "siblings" not in item: + item["siblings"] = None + model_info = ModelInfo(**item) + if emissions_thresholds is None or _is_emission_within_threshold(model_info, *emissions_thresholds): + yield model_info + + @_deprecate_arguments(version="1.0", deprecated_args=["tags"], custom_message="Use `filter` instead.") + @validate_hf_hub_args + def list_datasets( + self, + *, + # Search-query parameter + filter: Union[str, Iterable[str], None] = None, + author: Optional[str] = None, + benchmark: Optional[Union[str, List[str]]] = None, + dataset_name: Optional[str] = None, + gated: Optional[bool] = None, + language_creators: Optional[Union[str, List[str]]] = None, + language: Optional[Union[str, List[str]]] = None, + multilinguality: Optional[Union[str, List[str]]] = None, + size_categories: Optional[Union[str, List[str]]] = None, + task_categories: Optional[Union[str, List[str]]] = None, + task_ids: Optional[Union[str, List[str]]] = None, + search: Optional[str] = None, + # Sorting and pagination parameters + sort: Optional[Union[Literal["last_modified"], str]] = None, + direction: Optional[Literal[-1]] = None, + limit: Optional[int] = None, + # Additional data to fetch + expand: Optional[List[ExpandDatasetProperty_T]] = None, + full: Optional[bool] = None, + token: Union[bool, str, None] = None, + # Deprecated arguments - use `filter` instead + tags: Optional[Union[str, List[str]]] = None, + ) -> Iterable[DatasetInfo]: + """ + List datasets hosted on the Huggingface Hub, given some filters. + + Args: + filter (`str` or `Iterable[str]`, *optional*): + A string or list of string to filter datasets on the hub. + author (`str`, *optional*): + A string which identify the author of the returned datasets. + benchmark (`str` or `List`, *optional*): + A string or list of strings that can be used to identify datasets on + the Hub by their official benchmark. + dataset_name (`str`, *optional*): + A string or list of strings that can be used to identify datasets on + the Hub by its name, such as `SQAC` or `wikineural` + gated (`bool`, *optional*): + A boolean to filter datasets on the Hub that are gated or not. By default, all datasets are returned. + If `gated=True` is passed, only gated datasets are returned. + If `gated=False` is passed, only non-gated datasets are returned. + language_creators (`str` or `List`, *optional*): + A string or list of strings that can be used to identify datasets on + the Hub with how the data was curated, such as `crowdsourced` or + `machine_generated`. + language (`str` or `List`, *optional*): + A string or list of strings representing a two-character language to + filter datasets by on the Hub. + multilinguality (`str` or `List`, *optional*): + A string or list of strings representing a filter for datasets that + contain multiple languages. + size_categories (`str` or `List`, *optional*): + A string or list of strings that can be used to identify datasets on + the Hub by the size of the dataset such as `100K>> from huggingface_hub import HfApi + + >>> api = HfApi() + + # List all datasets + >>> api.list_datasets() + + + # List only the text classification datasets + >>> api.list_datasets(filter="task_categories:text-classification") + + + # List only the datasets in russian for language modeling + >>> api.list_datasets( + ... filter=("language:ru", "task_ids:language-modeling") + ... ) + + # List FiftyOne datasets (identified by the tag "fiftyone" in dataset card) + >>> api.list_datasets(tags="fiftyone") + ``` + + Example usage with the `search` argument: + + ```python + >>> from huggingface_hub import HfApi + + >>> api = HfApi() + + # List all datasets with "text" in their name + >>> api.list_datasets(search="text") + + # List all datasets with "text" in their name made by google + >>> api.list_datasets(search="text", author="google") + ``` + """ + if expand and full: + raise ValueError("`expand` cannot be used if `full` is passed.") + + path = f"{self.endpoint}/api/datasets" + headers = self._build_hf_headers(token=token) + params: Dict[str, Any] = {} + + # Build `filter` list + filter_list = [] + if filter is not None: + if isinstance(filter, str): + filter_list.append(filter) + else: + filter_list.extend(filter) + for key, value in ( + ("benchmark", benchmark), + ("language_creators", language_creators), + ("language", language), + ("multilinguality", multilinguality), + ("size_categories", size_categories), + ("task_categories", task_categories), + ("task_ids", task_ids), + ): + if value: + if isinstance(value, str): + value = [value] + for value_item in value: + if not value_item.startswith(f"{key}:"): + data = f"{key}:{value_item}" + filter_list.append(data) + if tags is not None: + filter_list.extend([tags] if isinstance(tags, str) else tags) + if len(filter_list) > 0: + params["filter"] = filter_list + + # Handle other query params + if author: + params["author"] = author + if gated is not None: + params["gated"] = gated + search_list = [] + if dataset_name: + search_list.append(dataset_name) + if search: + search_list.append(search) + if len(search_list) > 0: + params["search"] = search_list + if sort is not None: + params["sort"] = ( + "lastModified" + if sort == "last_modified" + else "trendingScore" + if sort == "trending_score" + else "createdAt" + if sort == "created_at" + else sort + ) + if direction is not None: + params["direction"] = direction + if limit is not None: + params["limit"] = limit + + # Request additional data + if expand: + params["expand"] = expand + if full: + params["full"] = True + + items = paginate(path, params=params, headers=headers) + if limit is not None: + items = islice(items, limit) # Do not iterate over all pages + for item in items: + if "siblings" not in item: + item["siblings"] = None + yield DatasetInfo(**item) + + @validate_hf_hub_args + def list_spaces( + self, + *, + # Search-query parameter + filter: Union[str, Iterable[str], None] = None, + author: Optional[str] = None, + search: Optional[str] = None, + datasets: Union[str, Iterable[str], None] = None, + models: Union[str, Iterable[str], None] = None, + linked: bool = False, + # Sorting and pagination parameters + sort: Union[Literal["last_modified"], str, None] = None, + direction: Optional[Literal[-1]] = None, + limit: Optional[int] = None, + # Additional data to fetch + expand: Optional[List[ExpandSpaceProperty_T]] = None, + full: Optional[bool] = None, + token: Union[bool, str, None] = None, + ) -> Iterable[SpaceInfo]: + """ + List spaces hosted on the Huggingface Hub, given some filters. + + Args: + filter (`str` or `Iterable`, *optional*): + A string tag or list of tags that can be used to identify Spaces on the Hub. + author (`str`, *optional*): + A string which identify the author of the returned Spaces. + search (`str`, *optional*): + A string that will be contained in the returned Spaces. + datasets (`str` or `Iterable`, *optional*): + Whether to return Spaces that make use of a dataset. + The name of a specific dataset can be passed as a string. + models (`str` or `Iterable`, *optional*): + Whether to return Spaces that make use of a model. + The name of a specific model can be passed as a string. + linked (`bool`, *optional*): + Whether to return Spaces that make use of either a model or a dataset. + sort (`Literal["last_modified"]` or `str`, *optional*): + The key with which to sort the resulting models. Possible values are "last_modified", "trending_score", + "created_at" and "likes". + direction (`Literal[-1]` or `int`, *optional*): + Direction in which to sort. The value `-1` sorts by descending + order while all other values sort by ascending order. + limit (`int`, *optional*): + The limit on the number of Spaces fetched. Leaving this option + to `None` fetches all Spaces. + expand (`List[ExpandSpaceProperty_T]`, *optional*): + List properties to return in the response. When used, only the properties in the list will be returned. + This parameter cannot be used if `full` is passed. + Possible values are `"author"`, `"cardData"`, `"datasets"`, `"disabled"`, `"lastModified"`, `"createdAt"`, `"likes"`, `"models"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"`, `"trendingScore"`, `"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`. + full (`bool`, *optional*): + Whether to fetch all Spaces data, including the `last_modified`, `siblings` + and `card_data` fields. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Iterable[SpaceInfo]`: an iterable of [`huggingface_hub.hf_api.SpaceInfo`] objects. + """ + if expand and full: + raise ValueError("`expand` cannot be used if `full` is passed.") + + path = f"{self.endpoint}/api/spaces" + headers = self._build_hf_headers(token=token) + params: Dict[str, Any] = {} + if filter is not None: + params["filter"] = filter + if author is not None: + params["author"] = author + if search is not None: + params["search"] = search + if sort is not None: + params["sort"] = ( + "lastModified" + if sort == "last_modified" + else "trendingScore" + if sort == "trending_score" + else "createdAt" + if sort == "created_at" + else sort + ) + if direction is not None: + params["direction"] = direction + if limit is not None: + params["limit"] = limit + if linked: + params["linked"] = True + if datasets is not None: + params["datasets"] = datasets + if models is not None: + params["models"] = models + + # Request additional data + if expand: + params["expand"] = expand + if full: + params["full"] = True + + items = paginate(path, params=params, headers=headers) + if limit is not None: + items = islice(items, limit) # Do not iterate over all pages + for item in items: + if "siblings" not in item: + item["siblings"] = None + yield SpaceInfo(**item) + + @validate_hf_hub_args + def unlike( + self, + repo_id: str, + *, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + ) -> None: + """ + Unlike a given repo on the Hub (e.g. remove from favorite list). + + To prevent spam usage, it is not possible to `like` a repository from a script. + + See also [`list_liked_repos`]. + + Args: + repo_id (`str`): + The repository to unlike. Example: `"user/my-cool-model"`. + + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if unliking a dataset or space, `None` or + `"model"` if unliking a model. Default is `None`. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private + but not authenticated or repo does not exist. + + Example: + ```python + >>> from huggingface_hub import list_liked_repos, unlike + >>> "gpt2" in list_liked_repos().models # we assume you have already liked gpt2 + True + >>> unlike("gpt2") + >>> "gpt2" in list_liked_repos().models + False + ``` + """ + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + response = get_session().delete( + url=f"{self.endpoint}/api/{repo_type}s/{repo_id}/like", headers=self._build_hf_headers(token=token) + ) + hf_raise_for_status(response) + + @validate_hf_hub_args + def list_liked_repos( + self, + user: Optional[str] = None, + *, + token: Union[bool, str, None] = None, + ) -> UserLikes: + """ + List all public repos liked by a user on huggingface.co. + + This list is public so token is optional. If `user` is not passed, it defaults to + the logged in user. + + See also [`unlike`]. + + Args: + user (`str`, *optional*): + Name of the user for which you want to fetch the likes. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`UserLikes`]: object containing the user name and 3 lists of repo ids (1 for + models, 1 for datasets and 1 for Spaces). + + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If `user` is not passed and no token found (either from argument or from machine). + + Example: + ```python + >>> from huggingface_hub import list_liked_repos + + >>> likes = list_liked_repos("julien-c") + + >>> likes.user + "julien-c" + + >>> likes.models + ["osanseviero/streamlit_1.15", "Xhaheen/ChatGPT_HF", ...] + ``` + """ + # User is either provided explicitly or retrieved from current token. + if user is None: + me = self.whoami(token=token) + if me["type"] == "user": + user = me["name"] + else: + raise ValueError( + "Cannot list liked repos. You must provide a 'user' as input or be logged in as a user." + ) + + path = f"{self.endpoint}/api/users/{user}/likes" + headers = self._build_hf_headers(token=token) + + likes = list(paginate(path, params={}, headers=headers)) + # Looping over a list of items similar to: + # { + # 'createdAt': '2021-09-09T21:53:27.000Z', + # 'repo': { + # 'name': 'PaddlePaddle/PaddleOCR', + # 'type': 'space' + # } + # } + # Let's loop 3 times over the received list. Less efficient but more straightforward to read. + return UserLikes( + user=user, + total=len(likes), + models=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "model"], + datasets=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "dataset"], + spaces=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "space"], + ) + + @validate_hf_hub_args + def list_repo_likers( + self, + repo_id: str, + *, + repo_type: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> Iterable[User]: + """ + List all users who liked a given repo on the hugging Face Hub. + + See also [`list_liked_repos`]. + + Args: + repo_id (`str`): + The repository to retrieve . Example: `"user/my-cool-model"`. + + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + + Returns: + `Iterable[User]`: an iterable of [`huggingface_hub.hf_api.User`] objects. + """ + + # Construct the API endpoint + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/likers" + for liker in paginate(path, params={}, headers=self._build_hf_headers(token=token)): + yield User(username=liker["user"], fullname=liker["fullname"], avatar_url=liker["avatarUrl"]) + + @validate_hf_hub_args + def model_info( + self, + repo_id: str, + *, + revision: Optional[str] = None, + timeout: Optional[float] = None, + securityStatus: Optional[bool] = None, + files_metadata: bool = False, + expand: Optional[List[ExpandModelProperty_T]] = None, + token: Union[bool, str, None] = None, + ) -> ModelInfo: + """ + Get info on one specific model on huggingface.co + + Model can be private if you pass an acceptable token or are logged in. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + revision (`str`, *optional*): + The revision of the model repository from which to get the + information. + timeout (`float`, *optional*): + Whether to set a timeout for the request to the Hub. + securityStatus (`bool`, *optional*): + Whether to retrieve the security status from the model + repository as well. The security status will be returned in the `security_repo_status` field. + files_metadata (`bool`, *optional*): + Whether or not to retrieve metadata for files in the repository + (size, LFS metadata, etc). Defaults to `False`. + expand (`List[ExpandModelProperty_T]`, *optional*): + List properties to return in the response. When used, only the properties in the list will be returned. + This parameter cannot be used if `securityStatus` or `files_metadata` are passed. + Possible values are `"author"`, `"baseModels"`, `"cardData"`, `"childrenModelCount"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"gguf"`, `"inference"`, `"inferenceProviderMapping"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"`, `"trendingScore"`, `"widgetData"`, `"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`huggingface_hub.hf_api.ModelInfo`]: The model repository information. + + > [!TIP] + > Raises the following errors: + > + > - [`~utils.RepositoryNotFoundError`] + > If the repository to download from cannot be found. This may be because it doesn't exist, + > or because it is set to `private` and you do not have access. + > - [`~utils.RevisionNotFoundError`] + > If the revision to download from cannot be found. + """ + if expand and (securityStatus or files_metadata): + raise ValueError("`expand` cannot be used if `securityStatus` or `files_metadata` are set.") + + headers = self._build_hf_headers(token=token) + path = ( + f"{self.endpoint}/api/models/{repo_id}" + if revision is None + else (f"{self.endpoint}/api/models/{repo_id}/revision/{quote(revision, safe='')}") + ) + params: Dict = {} + if securityStatus: + params["securityStatus"] = True + if files_metadata: + params["blobs"] = True + if expand: + params["expand"] = expand + r = get_session().get(path, headers=headers, timeout=timeout, params=params) + hf_raise_for_status(r) + data = r.json() + return ModelInfo(**data) + + @validate_hf_hub_args + def dataset_info( + self, + repo_id: str, + *, + revision: Optional[str] = None, + timeout: Optional[float] = None, + files_metadata: bool = False, + expand: Optional[List[ExpandDatasetProperty_T]] = None, + token: Union[bool, str, None] = None, + ) -> DatasetInfo: + """ + Get info on one specific dataset on huggingface.co. + + Dataset can be private if you pass an acceptable token. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + revision (`str`, *optional*): + The revision of the dataset repository from which to get the + information. + timeout (`float`, *optional*): + Whether to set a timeout for the request to the Hub. + files_metadata (`bool`, *optional*): + Whether or not to retrieve metadata for files in the repository + (size, LFS metadata, etc). Defaults to `False`. + expand (`List[ExpandDatasetProperty_T]`, *optional*): + List properties to return in the response. When used, only the properties in the list will be returned. + This parameter cannot be used if `files_metadata` is passed. + Possible values are `"author"`, `"cardData"`, `"citation"`, `"createdAt"`, `"disabled"`, `"description"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"lastModified"`, `"likes"`, `"paperswithcode_id"`, `"private"`, `"siblings"`, `"sha"`, `"tags"`, `"trendingScore"`,`"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`hf_api.DatasetInfo`]: The dataset repository information. + + > [!TIP] + > Raises the following errors: + > + > - [`~utils.RepositoryNotFoundError`] + > If the repository to download from cannot be found. This may be because it doesn't exist, + > or because it is set to `private` and you do not have access. + > - [`~utils.RevisionNotFoundError`] + > If the revision to download from cannot be found. + """ + if expand and files_metadata: + raise ValueError("`expand` cannot be used if `files_metadata` is set.") + + headers = self._build_hf_headers(token=token) + path = ( + f"{self.endpoint}/api/datasets/{repo_id}" + if revision is None + else (f"{self.endpoint}/api/datasets/{repo_id}/revision/{quote(revision, safe='')}") + ) + params: Dict = {} + if files_metadata: + params["blobs"] = True + if expand: + params["expand"] = expand + + r = get_session().get(path, headers=headers, timeout=timeout, params=params) + hf_raise_for_status(r) + data = r.json() + return DatasetInfo(**data) + + @validate_hf_hub_args + def space_info( + self, + repo_id: str, + *, + revision: Optional[str] = None, + timeout: Optional[float] = None, + files_metadata: bool = False, + expand: Optional[List[ExpandSpaceProperty_T]] = None, + token: Union[bool, str, None] = None, + ) -> SpaceInfo: + """ + Get info on one specific Space on huggingface.co. + + Space can be private if you pass an acceptable token. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + revision (`str`, *optional*): + The revision of the space repository from which to get the + information. + timeout (`float`, *optional*): + Whether to set a timeout for the request to the Hub. + files_metadata (`bool`, *optional*): + Whether or not to retrieve metadata for files in the repository + (size, LFS metadata, etc). Defaults to `False`. + expand (`List[ExpandSpaceProperty_T]`, *optional*): + List properties to return in the response. When used, only the properties in the list will be returned. + This parameter cannot be used if `full` is passed. + Possible values are `"author"`, `"cardData"`, `"createdAt"`, `"datasets"`, `"disabled"`, `"lastModified"`, `"likes"`, `"models"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"`, `"trendingScore"`, `"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`~hf_api.SpaceInfo`]: The space repository information. + + > [!TIP] + > Raises the following errors: + > + > - [`~utils.RepositoryNotFoundError`] + > If the repository to download from cannot be found. This may be because it doesn't exist, + > or because it is set to `private` and you do not have access. + > - [`~utils.RevisionNotFoundError`] + > If the revision to download from cannot be found. + """ + if expand and files_metadata: + raise ValueError("`expand` cannot be used if `files_metadata` is set.") + + headers = self._build_hf_headers(token=token) + path = ( + f"{self.endpoint}/api/spaces/{repo_id}" + if revision is None + else (f"{self.endpoint}/api/spaces/{repo_id}/revision/{quote(revision, safe='')}") + ) + params: Dict = {} + if files_metadata: + params["blobs"] = True + if expand: + params["expand"] = expand + + r = get_session().get(path, headers=headers, timeout=timeout, params=params) + hf_raise_for_status(r) + data = r.json() + return SpaceInfo(**data) + + @validate_hf_hub_args + def repo_info( + self, + repo_id: str, + *, + revision: Optional[str] = None, + repo_type: Optional[str] = None, + timeout: Optional[float] = None, + files_metadata: bool = False, + expand: Optional[Union[ExpandModelProperty_T, ExpandDatasetProperty_T, ExpandSpaceProperty_T]] = None, + token: Union[bool, str, None] = None, + ) -> Union[ModelInfo, DatasetInfo, SpaceInfo]: + """ + Get the info object for a given repo of a given type. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + revision (`str`, *optional*): + The revision of the repository from which to get the + information. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space, + `None` or `"model"` if getting repository info from a model. Default is `None`. + timeout (`float`, *optional*): + Whether to set a timeout for the request to the Hub. + expand (`ExpandModelProperty_T` or `ExpandDatasetProperty_T` or `ExpandSpaceProperty_T`, *optional*): + List properties to return in the response. When used, only the properties in the list will be returned. + This parameter cannot be used if `files_metadata` is passed. + For an exhaustive list of available properties, check out [`model_info`], [`dataset_info`] or [`space_info`]. + files_metadata (`bool`, *optional*): + Whether or not to retrieve metadata for files in the repository + (size, LFS metadata, etc). Defaults to `False`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Union[SpaceInfo, DatasetInfo, ModelInfo]`: The repository information, as a + [`huggingface_hub.hf_api.DatasetInfo`], [`huggingface_hub.hf_api.ModelInfo`] + or [`huggingface_hub.hf_api.SpaceInfo`] object. + + > [!TIP] + > Raises the following errors: + > + > - [`~utils.RepositoryNotFoundError`] + > If the repository to download from cannot be found. This may be because it doesn't exist, + > or because it is set to `private` and you do not have access. + > - [`~utils.RevisionNotFoundError`] + > If the revision to download from cannot be found. + """ + if repo_type is None or repo_type == "model": + method = self.model_info + elif repo_type == "dataset": + method = self.dataset_info # type: ignore + elif repo_type == "space": + method = self.space_info # type: ignore + else: + raise ValueError("Unsupported repo type.") + return method( + repo_id, + revision=revision, + token=token, + timeout=timeout, + expand=expand, # type: ignore[arg-type] + files_metadata=files_metadata, + ) + + @validate_hf_hub_args + def repo_exists( + self, + repo_id: str, + *, + repo_type: Optional[str] = None, + token: Union[str, bool, None] = None, + ) -> bool: + """ + Checks if a repository exists on the Hugging Face Hub. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space, + `None` or `"model"` if getting repository info from a model. Default is `None`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + True if the repository exists, False otherwise. + + Examples: + ```py + >>> from huggingface_hub import repo_exists + >>> repo_exists("google/gemma-7b") + True + >>> repo_exists("google/not-a-repo") + False + ``` + """ + try: + self.repo_info(repo_id=repo_id, repo_type=repo_type, token=token) + return True + except GatedRepoError: + return True # we don't have access but it exists + except RepositoryNotFoundError: + return False + + @validate_hf_hub_args + def revision_exists( + self, + repo_id: str, + revision: str, + *, + repo_type: Optional[str] = None, + token: Union[str, bool, None] = None, + ) -> bool: + """ + Checks if a specific revision exists on a repo on the Hugging Face Hub. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + revision (`str`): + The revision of the repository to check. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space, + `None` or `"model"` if getting repository info from a model. Default is `None`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + True if the repository and the revision exists, False otherwise. + + Examples: + ```py + >>> from huggingface_hub import revision_exists + >>> revision_exists("google/gemma-7b", "float16") + True + >>> revision_exists("google/gemma-7b", "not-a-revision") + False + ``` + """ + try: + self.repo_info(repo_id=repo_id, revision=revision, repo_type=repo_type, token=token) + return True + except RevisionNotFoundError: + return False + except RepositoryNotFoundError: + return False + + @validate_hf_hub_args + def file_exists( + self, + repo_id: str, + filename: str, + *, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + token: Union[str, bool, None] = None, + ) -> bool: + """ + Checks if a file exists in a repository on the Hugging Face Hub. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + filename (`str`): + The name of the file to check, for example: + `"config.json"` + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space, + `None` or `"model"` if getting repository info from a model. Default is `None`. + revision (`str`, *optional*): + The revision of the repository from which to get the information. Defaults to `"main"` branch. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + True if the file exists, False otherwise. + + Examples: + ```py + >>> from huggingface_hub import file_exists + >>> file_exists("bigcode/starcoder", "config.json") + True + >>> file_exists("bigcode/starcoder", "not-a-file") + False + >>> file_exists("bigcode/not-a-repo", "config.json") + False + ``` + """ + url = hf_hub_url( + repo_id=repo_id, repo_type=repo_type, revision=revision, filename=filename, endpoint=self.endpoint + ) + try: + if token is None: + token = self.token + get_hf_file_metadata(url, token=token) + return True + except GatedRepoError: # raise specifically on gated repo + raise + except (RepositoryNotFoundError, EntryNotFoundError, RevisionNotFoundError): + return False + + @validate_hf_hub_args + def list_repo_files( + self, + repo_id: str, + *, + revision: Optional[str] = None, + repo_type: Optional[str] = None, + token: Union[str, bool, None] = None, + ) -> List[str]: + """ + Get the list of files in a given repo. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated by a `/`. + revision (`str`, *optional*): + The revision of the repository from which to get the information. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or space, `None` or `"model"` if uploading to + a model. Default is `None`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `List[str]`: the list of files in a given repository. + """ + return [ + f.rfilename + for f in self.list_repo_tree( + repo_id=repo_id, recursive=True, revision=revision, repo_type=repo_type, token=token + ) + if isinstance(f, RepoFile) + ] + + @validate_hf_hub_args + def list_repo_tree( + self, + repo_id: str, + path_in_repo: Optional[str] = None, + *, + recursive: bool = False, + expand: bool = False, + revision: Optional[str] = None, + repo_type: Optional[str] = None, + token: Union[str, bool, None] = None, + ) -> Iterable[Union[RepoFile, RepoFolder]]: + """ + List a repo tree's files and folders and get information about them. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated by a `/`. + path_in_repo (`str`, *optional*): + Relative path of the tree (folder) in the repo, for example: + `"checkpoints/1fec34a/results"`. Will default to the root tree (folder) of the repository. + recursive (`bool`, *optional*, defaults to `False`): + Whether to list tree's files and folders recursively. + expand (`bool`, *optional*, defaults to `False`): + Whether to fetch more information about the tree's files and folders (e.g. last commit and files' security scan results). This + operation is more expensive for the server so only 50 results are returned per page (instead of 1000). + As pagination is implemented in `huggingface_hub`, this is transparent for you except for the time it + takes to get the results. + revision (`str`, *optional*): + The revision of the repository from which to get the tree. Defaults to `"main"` branch. + repo_type (`str`, *optional*): + The type of the repository from which to get the tree (`"model"`, `"dataset"` or `"space"`. + Defaults to `"model"`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Iterable[Union[RepoFile, RepoFolder]]`: + The information about the tree's files and folders, as an iterable of [`RepoFile`] and [`RepoFolder`] objects. The order of the files and folders is + not guaranteed. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo + does not exist. + [`~utils.RevisionNotFoundError`]: + If revision is not found (error 404) on the repo. + [`~utils.EntryNotFoundError`]: + If the tree (folder) does not exist (error 404) on the repo. + + Examples: + + Get information about a repo's tree. + ```py + >>> from huggingface_hub import list_repo_tree + >>> repo_tree = list_repo_tree("lysandre/arxiv-nlp") + >>> repo_tree + + >>> list(repo_tree) + [ + RepoFile(path='.gitattributes', size=391, blob_id='ae8c63daedbd4206d7d40126955d4e6ab1c80f8f', lfs=None, last_commit=None, security=None), + RepoFile(path='README.md', size=391, blob_id='43bd404b159de6fba7c2f4d3264347668d43af25', lfs=None, last_commit=None, security=None), + RepoFile(path='config.json', size=554, blob_id='2f9618c3a19b9a61add74f70bfb121335aeef666', lfs=None, last_commit=None, security=None), + RepoFile( + path='flax_model.msgpack', size=497764107, blob_id='8095a62ccb4d806da7666fcda07467e2d150218e', + lfs={'size': 497764107, 'sha256': 'd88b0d6a6ff9c3f8151f9d3228f57092aaea997f09af009eefd7373a77b5abb9', 'pointer_size': 134}, last_commit=None, security=None + ), + RepoFile(path='merges.txt', size=456318, blob_id='226b0752cac7789c48f0cb3ec53eda48b7be36cc', lfs=None, last_commit=None, security=None), + RepoFile( + path='pytorch_model.bin', size=548123560, blob_id='64eaa9c526867e404b68f2c5d66fd78e27026523', + lfs={'size': 548123560, 'sha256': '9be78edb5b928eba33aa88f431551348f7466ba9f5ef3daf1d552398722a5436', 'pointer_size': 134}, last_commit=None, security=None + ), + RepoFile(path='vocab.json', size=898669, blob_id='b00361fece0387ca34b4b8b8539ed830d644dbeb', lfs=None, last_commit=None, security=None)] + ] + ``` + + Get even more information about a repo's tree (last commit and files' security scan results) + ```py + >>> from huggingface_hub import list_repo_tree + >>> repo_tree = list_repo_tree("prompthero/openjourney-v4", expand=True) + >>> list(repo_tree) + [ + RepoFolder( + path='feature_extractor', + tree_id='aa536c4ea18073388b5b0bc791057a7296a00398', + last_commit={ + 'oid': '47b62b20b20e06b9de610e840282b7e6c3d51190', + 'title': 'Upload diffusers weights (#48)', + 'date': datetime.datetime(2023, 3, 21, 9, 5, 27, tzinfo=datetime.timezone.utc) + } + ), + RepoFolder( + path='safety_checker', + tree_id='65aef9d787e5557373fdf714d6c34d4fcdd70440', + last_commit={ + 'oid': '47b62b20b20e06b9de610e840282b7e6c3d51190', + 'title': 'Upload diffusers weights (#48)', + 'date': datetime.datetime(2023, 3, 21, 9, 5, 27, tzinfo=datetime.timezone.utc) + } + ), + RepoFile( + path='model_index.json', + size=582, + blob_id='d3d7c1e8c3e78eeb1640b8e2041ee256e24c9ee1', + lfs=None, + last_commit={ + 'oid': 'b195ed2d503f3eb29637050a886d77bd81d35f0e', + 'title': 'Fix deprecation warning by changing `CLIPFeatureExtractor` to `CLIPImageProcessor`. (#54)', + 'date': datetime.datetime(2023, 5, 15, 21, 41, 59, tzinfo=datetime.timezone.utc) + }, + security={ + 'safe': True, + 'av_scan': {'virusFound': False, 'virusNames': None}, + 'pickle_import_scan': None + } + ) + ... + ] + ``` + """ + repo_type = repo_type or constants.REPO_TYPE_MODEL + revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION + headers = self._build_hf_headers(token=token) + + encoded_path_in_repo = "/" + quote(path_in_repo, safe="") if path_in_repo else "" + tree_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/tree/{revision}{encoded_path_in_repo}" + for path_info in paginate(path=tree_url, headers=headers, params={"recursive": recursive, "expand": expand}): + yield (RepoFile(**path_info) if path_info["type"] == "file" else RepoFolder(**path_info)) + + @validate_hf_hub_args + def list_repo_refs( + self, + repo_id: str, + *, + repo_type: Optional[str] = None, + include_pull_requests: bool = False, + token: Union[str, bool, None] = None, + ) -> GitRefs: + """ + Get the list of refs of a given repo (both tags and branches). + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if listing refs from a dataset or a Space, + `None` or `"model"` if listing from a model. Default is `None`. + include_pull_requests (`bool`, *optional*): + Whether to include refs from pull requests in the list. Defaults to `False`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Example: + ```py + >>> from huggingface_hub import HfApi + >>> api = HfApi() + >>> api.list_repo_refs("gpt2") + GitRefs(branches=[GitRefInfo(name='main', ref='refs/heads/main', target_commit='e7da7f221d5bf496a48136c0cd264e630fe9fcc8')], converts=[], tags=[]) + + >>> api.list_repo_refs("bigcode/the-stack", repo_type='dataset') + GitRefs( + branches=[ + GitRefInfo(name='main', ref='refs/heads/main', target_commit='18edc1591d9ce72aa82f56c4431b3c969b210ae3'), + GitRefInfo(name='v1.1.a1', ref='refs/heads/v1.1.a1', target_commit='f9826b862d1567f3822d3d25649b0d6d22ace714') + ], + converts=[], + tags=[ + GitRefInfo(name='v1.0', ref='refs/tags/v1.0', target_commit='c37a8cd1e382064d8aced5e05543c5f7753834da') + ] + ) + ``` + + Returns: + [`GitRefs`]: object containing all information about branches and tags for a + repo on the Hub. + """ + repo_type = repo_type or constants.REPO_TYPE_MODEL + response = get_session().get( + f"{self.endpoint}/api/{repo_type}s/{repo_id}/refs", + headers=self._build_hf_headers(token=token), + params={"include_prs": 1} if include_pull_requests else {}, + ) + hf_raise_for_status(response) + data = response.json() + + def _format_as_git_ref_info(item: Dict) -> GitRefInfo: + return GitRefInfo(name=item["name"], ref=item["ref"], target_commit=item["targetCommit"]) + + return GitRefs( + branches=[_format_as_git_ref_info(item) for item in data["branches"]], + converts=[_format_as_git_ref_info(item) for item in data["converts"]], + tags=[_format_as_git_ref_info(item) for item in data["tags"]], + pull_requests=[_format_as_git_ref_info(item) for item in data["pullRequests"]] + if include_pull_requests + else None, + ) + + @validate_hf_hub_args + def list_repo_commits( + self, + repo_id: str, + *, + repo_type: Optional[str] = None, + token: Union[bool, str, None] = None, + revision: Optional[str] = None, + formatted: bool = False, + ) -> List[GitCommitInfo]: + """ + Get the list of commits of a given revision for a repo on the Hub. + + Commits are sorted by date (last commit first). + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated by a `/`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if listing commits from a dataset or a Space, `None` or `"model"` if + listing from a model. Default is `None`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + revision (`str`, *optional*): + The git revision to commit from. Defaults to the head of the `"main"` branch. + formatted (`bool`): + Whether to return the HTML-formatted title and description of the commits. Defaults to False. + + Example: + ```py + >>> from huggingface_hub import HfApi + >>> api = HfApi() + + # Commits are sorted by date (last commit first) + >>> initial_commit = api.list_repo_commits("gpt2")[-1] + + # Initial commit is always a system commit containing the `.gitattributes` file. + >>> initial_commit + GitCommitInfo( + commit_id='9b865efde13a30c13e0a33e536cf3e4a5a9d71d8', + authors=['system'], + created_at=datetime.datetime(2019, 2, 18, 10, 36, 15, tzinfo=datetime.timezone.utc), + title='initial commit', + message='', + formatted_title=None, + formatted_message=None + ) + + # Create an empty branch by deriving from initial commit + >>> api.create_branch("gpt2", "new_empty_branch", revision=initial_commit.commit_id) + ``` + + Returns: + List[[`GitCommitInfo`]]: list of objects containing information about the commits for a repo on the Hub. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo + does not exist. + [`~utils.RevisionNotFoundError`]: + If revision is not found (error 404) on the repo. + """ + repo_type = repo_type or constants.REPO_TYPE_MODEL + revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION + + # Paginate over results and return the list of commits. + return [ + GitCommitInfo( + commit_id=item["id"], + authors=[author["user"] for author in item["authors"]], + created_at=parse_datetime(item["date"]), + title=item["title"], + message=item["message"], + formatted_title=item.get("formatted", {}).get("title"), + formatted_message=item.get("formatted", {}).get("message"), + ) + for item in paginate( + f"{self.endpoint}/api/{repo_type}s/{repo_id}/commits/{revision}", + headers=self._build_hf_headers(token=token), + params={"expand[]": "formatted"} if formatted else {}, + ) + ] + + @validate_hf_hub_args + def get_paths_info( + self, + repo_id: str, + paths: Union[List[str], str], + *, + expand: bool = False, + revision: Optional[str] = None, + repo_type: Optional[str] = None, + token: Union[str, bool, None] = None, + ) -> List[Union[RepoFile, RepoFolder]]: + """ + Get information about a repo's paths. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated by a `/`. + paths (`Union[List[str], str]`, *optional*): + The paths to get information about. If a path do not exist, it is ignored without raising + an exception. + expand (`bool`, *optional*, defaults to `False`): + Whether to fetch more information about the paths (e.g. last commit and files' security scan results). This + operation is more expensive for the server so only 50 results are returned per page (instead of 1000). + As pagination is implemented in `huggingface_hub`, this is transparent for you except for the time it + takes to get the results. + revision (`str`, *optional*): + The revision of the repository from which to get the information. Defaults to `"main"` branch. + repo_type (`str`, *optional*): + The type of the repository from which to get the information (`"model"`, `"dataset"` or `"space"`. + Defaults to `"model"`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `List[Union[RepoFile, RepoFolder]]`: + The information about the paths, as a list of [`RepoFile`] and [`RepoFolder`] objects. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo + does not exist. + [`~utils.RevisionNotFoundError`]: + If revision is not found (error 404) on the repo. + + Example: + ```py + >>> from huggingface_hub import get_paths_info + >>> paths_info = get_paths_info("allenai/c4", ["README.md", "en"], repo_type="dataset") + >>> paths_info + [ + RepoFile(path='README.md', size=2379, blob_id='f84cb4c97182890fc1dbdeaf1a6a468fd27b4fff', lfs=None, last_commit=None, security=None), + RepoFolder(path='en', tree_id='dc943c4c40f53d02b31ced1defa7e5f438d5862e', last_commit=None) + ] + ``` + """ + repo_type = repo_type or constants.REPO_TYPE_MODEL + revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION + headers = self._build_hf_headers(token=token) + + response = get_session().post( + f"{self.endpoint}/api/{repo_type}s/{repo_id}/paths-info/{revision}", + data={ + "paths": paths if isinstance(paths, list) else [paths], + "expand": expand, + }, + headers=headers, + ) + hf_raise_for_status(response) + paths_info = response.json() + return [ + RepoFile(**path_info) if path_info["type"] == "file" else RepoFolder(**path_info) + for path_info in paths_info + ] + + @validate_hf_hub_args + def super_squash_history( + self, + repo_id: str, + *, + branch: Optional[str] = None, + commit_message: Optional[str] = None, + repo_type: Optional[str] = None, + token: Union[str, bool, None] = None, + ) -> None: + """Squash commit history on a branch for a repo on the Hub. + + Squashing the repo history is useful when you know you'll make hundreds of commits and you don't want to + clutter the history. Squashing commits can only be performed from the head of a branch. + + > [!WARNING] + > Once squashed, the commit history cannot be retrieved. This is a non-revertible operation. + + > [!WARNING] + > Once the history of a branch has been squashed, it is not possible to merge it back into another branch since + > their history will have diverged. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated by a `/`. + branch (`str`, *optional*): + The branch to squash. Defaults to the head of the `"main"` branch. + commit_message (`str`, *optional*): + The commit message to use for the squashed commit. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if listing commits from a dataset or a Space, `None` or `"model"` if + listing from a model. Default is `None`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo + does not exist. + [`~utils.RevisionNotFoundError`]: + If the branch to squash cannot be found. + [`~utils.BadRequestError`]: + If invalid reference for a branch. You cannot squash history on tags. + + Example: + ```py + >>> from huggingface_hub import HfApi + >>> api = HfApi() + + # Create repo + >>> repo_id = api.create_repo("test-squash").repo_id + + # Make a lot of commits. + >>> api.upload_file(repo_id=repo_id, path_in_repo="file.txt", path_or_fileobj=b"content") + >>> api.upload_file(repo_id=repo_id, path_in_repo="lfs.bin", path_or_fileobj=b"content") + >>> api.upload_file(repo_id=repo_id, path_in_repo="file.txt", path_or_fileobj=b"another_content") + + # Squash history + >>> api.super_squash_history(repo_id=repo_id) + ``` + """ + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + if repo_type not in constants.REPO_TYPES: + raise ValueError("Invalid repo type") + if branch is None: + branch = constants.DEFAULT_REVISION + + # Prepare request + url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/super-squash/{quote(branch, safe='')}" + headers = self._build_hf_headers(token=token) + commit_message = commit_message or f"Super-squash branch '{branch}' using huggingface_hub" + + # Super-squash + response = get_session().post(url=url, headers=headers, json={"message": commit_message}) + hf_raise_for_status(response) + + @validate_hf_hub_args + def list_lfs_files( + self, + repo_id: str, + *, + repo_type: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> Iterable[LFSFileInfo]: + """ + List all LFS files in a repo on the Hub. + + This is primarily useful to count how much storage a repo is using and to eventually clean up large files + with [`permanently_delete_lfs_files`]. Note that this would be a permanent action that will affect all commits + referencing this deleted files and that cannot be undone. + + Args: + repo_id (`str`): + The repository for which you are listing LFS files. + repo_type (`str`, *optional*): + Type of repository. Set to `"dataset"` or `"space"` if listing from a dataset or space, `None` or + `"model"` if listing from a model. Default is `None`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Iterable[LFSFileInfo]`: An iterator of [`LFSFileInfo`] objects. + + Example: + ```py + >>> from huggingface_hub import HfApi + >>> api = HfApi() + >>> lfs_files = api.list_lfs_files("username/my-cool-repo") + + # Filter files files to delete based on a combination of `filename`, `pushed_at`, `ref` or `size`. + # e.g. select only LFS files in the "checkpoints" folder + >>> lfs_files_to_delete = (lfs_file for lfs_file in lfs_files if lfs_file.filename.startswith("checkpoints/")) + + # Permanently delete LFS files + >>> api.permanently_delete_lfs_files("username/my-cool-repo", lfs_files_to_delete) + ``` + """ + # Prepare request + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/lfs-files" + headers = self._build_hf_headers(token=token) + + # Paginate over LFS items + for item in paginate(url, params={}, headers=headers): + yield LFSFileInfo(**item) + + @validate_hf_hub_args + def permanently_delete_lfs_files( + self, + repo_id: str, + lfs_files: Iterable[LFSFileInfo], + *, + rewrite_history: bool = True, + repo_type: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> None: + """ + Permanently delete LFS files from a repo on the Hub. + + > [!WARNING] + > This is a permanent action that will affect all commits referencing the deleted files and might corrupt your + > repository. This is a non-revertible operation. Use it only if you know what you are doing. + + Args: + repo_id (`str`): + The repository for which you are listing LFS files. + lfs_files (`Iterable[LFSFileInfo]`): + An iterable of [`LFSFileInfo`] items to permanently delete from the repo. Use [`list_lfs_files`] to list + all LFS files from a repo. + rewrite_history (`bool`, *optional*, default to `True`): + Whether to rewrite repository history to remove file pointers referencing the deleted LFS files (recommended). + repo_type (`str`, *optional*): + Type of repository. Set to `"dataset"` or `"space"` if listing from a dataset or space, `None` or + `"model"` if listing from a model. Default is `None`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Example: + ```py + >>> from huggingface_hub import HfApi + >>> api = HfApi() + >>> lfs_files = api.list_lfs_files("username/my-cool-repo") + + # Filter files files to delete based on a combination of `filename`, `pushed_at`, `ref` or `size`. + # e.g. select only LFS files in the "checkpoints" folder + >>> lfs_files_to_delete = (lfs_file for lfs_file in lfs_files if lfs_file.filename.startswith("checkpoints/")) + + # Permanently delete LFS files + >>> api.permanently_delete_lfs_files("username/my-cool-repo", lfs_files_to_delete) + ``` + """ + # Prepare request + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/lfs-files/batch" + headers = self._build_hf_headers(token=token) + + # Delete LFS items by batches of 1000 + for batch in chunk_iterable(lfs_files, 1000): + shas = [item.file_oid for item in batch] + if len(shas) == 0: + return + payload = { + "deletions": { + "sha": shas, + "rewriteHistory": rewrite_history, + } + } + response = get_session().post(url, headers=headers, json=payload) + hf_raise_for_status(response) + + @validate_hf_hub_args + def create_repo( + self, + repo_id: str, + *, + token: Union[str, bool, None] = None, + private: Optional[bool] = None, + repo_type: Optional[str] = None, + exist_ok: bool = False, + resource_group_id: Optional[str] = None, + space_sdk: Optional[str] = None, + space_hardware: Optional[SpaceHardware] = None, + space_storage: Optional[SpaceStorage] = None, + space_sleep_time: Optional[int] = None, + space_secrets: Optional[List[Dict[str, str]]] = None, + space_variables: Optional[List[Dict[str, str]]] = None, + ) -> RepoUrl: + """Create an empty repo on the HuggingFace Hub. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + private (`bool`, *optional*): + Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + exist_ok (`bool`, *optional*, defaults to `False`): + If `True`, do not raise an error if repo already exists. + resource_group_id (`str`, *optional*): + Resource group in which to create the repo. Resource groups is only available for Enterprise Hub organizations and + allow to define which members of the organization can access the resource. The ID of a resource group + can be found in the URL of the resource's page on the Hub (e.g. `"66670e5163145ca562cb1988"`). + To learn more about resource groups, see https://huggingface.co/docs/hub/en/security-resource-groups. + space_sdk (`str`, *optional*): + Choice of SDK to use if repo_type is "space". Can be "streamlit", "gradio", "docker", or "static". + space_hardware (`SpaceHardware` or `str`, *optional*): + Choice of Hardware if repo_type is "space". See [`SpaceHardware`] for a complete list. + space_storage (`SpaceStorage` or `str`, *optional*): + Choice of persistent storage tier. Example: `"small"`. See [`SpaceStorage`] for a complete list. + space_sleep_time (`int`, *optional*): + Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want + your Space to sleep (default behavior for upgraded hardware). For free hardware, you can't configure + the sleep time (value is fixed to 48 hours of inactivity). + See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details. + space_secrets (`List[Dict[str, str]]`, *optional*): + A list of secret keys to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional. + For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets. + space_variables (`List[Dict[str, str]]`, *optional*): + A list of public environment variables to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional. + For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables. + + Returns: + [`RepoUrl`]: URL to the newly created repo. Value is a subclass of `str` containing + attributes like `endpoint`, `repo_type` and `repo_id`. + """ + organization, name = repo_id.split("/") if "/" in repo_id else (None, repo_id) + + path = f"{self.endpoint}/api/repos/create" + + if repo_type not in constants.REPO_TYPES: + raise ValueError("Invalid repo type") + + json: Dict[str, Any] = {"name": name, "organization": organization} + if private is not None: + json["private"] = private + if repo_type is not None: + json["type"] = repo_type + if repo_type == "space": + if space_sdk is None: + raise ValueError( + "No space_sdk provided. `create_repo` expects space_sdk to be one" + f" of {constants.SPACES_SDK_TYPES} when repo_type is 'space'`" + ) + if space_sdk not in constants.SPACES_SDK_TYPES: + raise ValueError(f"Invalid space_sdk. Please choose one of {constants.SPACES_SDK_TYPES}.") + json["sdk"] = space_sdk + + if space_sdk is not None and repo_type != "space": + warnings.warn("Ignoring provided space_sdk because repo_type is not 'space'.") + + function_args = [ + "space_hardware", + "space_storage", + "space_sleep_time", + "space_secrets", + "space_variables", + ] + json_keys = ["hardware", "storageTier", "sleepTimeSeconds", "secrets", "variables"] + values = [space_hardware, space_storage, space_sleep_time, space_secrets, space_variables] + + if repo_type == "space": + json.update({k: v for k, v in zip(json_keys, values) if v is not None}) + else: + provided_space_args = [key for key, value in zip(function_args, values) if value is not None] + + if provided_space_args: + warnings.warn(f"Ignoring provided {', '.join(provided_space_args)} because repo_type is not 'space'.") + + if getattr(self, "_lfsmultipartthresh", None): + # Testing purposes only. + # See https://github.com/huggingface/huggingface_hub/pull/733/files#r820604472 + json["lfsmultipartthresh"] = self._lfsmultipartthresh # type: ignore + + if resource_group_id is not None: + json["resourceGroupId"] = resource_group_id + + headers = self._build_hf_headers(token=token) + while True: + r = get_session().post(path, headers=headers, json=json) + if r.status_code == 409 and "Cannot create repo: another conflicting operation is in progress" in r.text: + # Since https://github.com/huggingface/moon-landing/pull/7272 (private repo), it is not possible to + # concurrently create repos on the Hub for a same user. This is rarely an issue, except when running + # tests. To avoid any inconvenience, we retry to create the repo for this specific error. + # NOTE: This could have being fixed directly in the tests but adding it here should fixed CIs for all + # dependent libraries. + # NOTE: If a fix is implemented server-side, we should be able to remove this retry mechanism. + logger.debug("Create repo failed due to a concurrency issue. Retrying...") + continue + break + + try: + hf_raise_for_status(r) + except HTTPError as err: + if exist_ok and err.response.status_code == 409: + # Repo already exists and `exist_ok=True` + pass + elif exist_ok and err.response.status_code == 403: + # No write permission on the namespace but repo might already exist + try: + self.repo_info(repo_id=repo_id, repo_type=repo_type, token=token) + if repo_type is None or repo_type == constants.REPO_TYPE_MODEL: + return RepoUrl(f"{self.endpoint}/{repo_id}") + return RepoUrl(f"{self.endpoint}/{repo_type}/{repo_id}") + except HfHubHTTPError: + raise err + else: + raise + + d = r.json() + return RepoUrl(d["url"], endpoint=self.endpoint) + + @validate_hf_hub_args + def delete_repo( + self, + repo_id: str, + *, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + missing_ok: bool = False, + ) -> None: + """ + Delete a repo from the HuggingFace Hub. CAUTION: this is irreversible. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. + missing_ok (`bool`, *optional*, defaults to `False`): + If `True`, do not raise an error if repo does not exist. + + Raises: + [`~utils.RepositoryNotFoundError`] + If the repository to delete from cannot be found and `missing_ok` is set to False (default). + """ + organization, name = repo_id.split("/") if "/" in repo_id else (None, repo_id) + + path = f"{self.endpoint}/api/repos/delete" + + if repo_type not in constants.REPO_TYPES: + raise ValueError("Invalid repo type") + + json = {"name": name, "organization": organization} + if repo_type is not None: + json["type"] = repo_type + + headers = self._build_hf_headers(token=token) + r = get_session().delete(path, headers=headers, json=json) + try: + hf_raise_for_status(r) + except RepositoryNotFoundError: + if not missing_ok: + raise + + @_deprecate_method(version="0.32", message="Please use `update_repo_settings` instead.") + @validate_hf_hub_args + def update_repo_visibility( + self, + repo_id: str, + private: bool = False, + *, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + ) -> Dict[str, bool]: + """Update the visibility setting of a repository. + + Deprecated. Use `update_repo_settings` instead. + + Args: + repo_id (`str`, *optional*): + A namespace (user or an organization) and a repo name separated by a `/`. + private (`bool`, *optional*, defaults to `False`): + Whether the repository should be private. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + + Returns: + The HTTP response in json. + + > [!TIP] + > Raises the following errors: + > + > - [`~utils.RepositoryNotFoundError`] + > If the repository to download from cannot be found. This may be because it doesn't exist, + > or because it is set to `private` and you do not have access. + """ + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL # default repo type + + r = get_session().put( + url=f"{self.endpoint}/api/{repo_type}s/{repo_id}/settings", + headers=self._build_hf_headers(token=token), + json={"private": private}, + ) + hf_raise_for_status(r) + return r.json() + + @validate_hf_hub_args + def update_repo_settings( + self, + repo_id: str, + *, + gated: Optional[Literal["auto", "manual", False]] = None, + private: Optional[bool] = None, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + xet_enabled: Optional[bool] = None, + ) -> None: + """ + Update the settings of a repository, including gated access and visibility. + + To give more control over how repos are used, the Hub allows repo authors to enable + access requests for their repos, and also to set the visibility of the repo to private. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated by a /. + gated (`Literal["auto", "manual", False]`, *optional*): + The gated status for the repository. If set to `None` (default), the `gated` setting of the repository won't be updated. + * "auto": The repository is gated, and access requests are automatically approved or denied based on predefined criteria. + * "manual": The repository is gated, and access requests require manual approval. + * False : The repository is not gated, and anyone can access it. + private (`bool`, *optional*): + Whether the repository should be private. + token (`Union[str, bool, None]`, *optional*): + A valid user access token (string). Defaults to the locally saved token, + which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass False. + repo_type (`str`, *optional*): + The type of the repository to update settings from (`"model"`, `"dataset"` or `"space"`). + Defaults to `"model"`. + xet_enabled (`bool`, *optional*): + Whether the repository should be enabled for Xet Storage. + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If gated is not one of "auto", "manual", or False. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If repo_type is not one of the values in constants.REPO_TYPES. + [`~utils.HfHubHTTPError`]: + If the request to the Hugging Face Hub API fails. + [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + """ + + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL # default repo type + + # Prepare the JSON payload for the PUT request + payload: Dict = {} + + if gated is not None: + if gated not in ["auto", "manual", False]: + raise ValueError(f"Invalid gated status, must be one of 'auto', 'manual', or False. Got '{gated}'.") + payload["gated"] = gated + + if private is not None: + payload["private"] = private + + if xet_enabled is not None: + payload["xetEnabled"] = xet_enabled + + if len(payload) == 0: + raise ValueError("At least one setting must be updated.") + + # Build headers + headers = self._build_hf_headers(token=token) + + r = get_session().put( + url=f"{self.endpoint}/api/{repo_type}s/{repo_id}/settings", + headers=headers, + json=payload, + ) + hf_raise_for_status(r) + + def move_repo( + self, + from_id: str, + to_id: str, + *, + repo_type: Optional[str] = None, + token: Union[str, bool, None] = None, + ): + """ + Moving a repository from namespace1/repo_name1 to namespace2/repo_name2 + + Note there are certain limitations. For more information about moving + repositories, please see + https://hf.co/docs/hub/repositories-settings#renaming-or-transferring-a-repo. + + Args: + from_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. Original repository identifier. + to_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. Final repository identifier. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + > [!TIP] + > Raises the following errors: + > + > - [`~utils.RepositoryNotFoundError`] + > If the repository to download from cannot be found. This may be because it doesn't exist, + > or because it is set to `private` and you do not have access. + """ + if len(from_id.split("/")) != 2: + raise ValueError(f"Invalid repo_id: {from_id}. It should have a namespace (:namespace:/:repo_name:)") + + if len(to_id.split("/")) != 2: + raise ValueError(f"Invalid repo_id: {to_id}. It should have a namespace (:namespace:/:repo_name:)") + + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL # Hub won't accept `None`. + + json = {"fromRepo": from_id, "toRepo": to_id, "type": repo_type} + + path = f"{self.endpoint}/api/repos/move" + headers = self._build_hf_headers(token=token) + r = get_session().post(path, headers=headers, json=json) + try: + hf_raise_for_status(r) + except HfHubHTTPError as e: + e.append_to_message( + "\nFor additional documentation please see" + " https://hf.co/docs/hub/repositories-settings#renaming-or-transferring-a-repo." + ) + raise + + @overload + def create_commit( # type: ignore + self, + repo_id: str, + operations: Iterable[CommitOperation], + *, + commit_message: str, + commit_description: Optional[str] = None, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + num_threads: int = 5, + parent_commit: Optional[str] = None, + run_as_future: Literal[False] = ..., + ) -> CommitInfo: ... + + @overload + def create_commit( + self, + repo_id: str, + operations: Iterable[CommitOperation], + *, + commit_message: str, + commit_description: Optional[str] = None, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + num_threads: int = 5, + parent_commit: Optional[str] = None, + run_as_future: Literal[True] = ..., + ) -> Future[CommitInfo]: ... + + @validate_hf_hub_args + @future_compatible + def create_commit( + self, + repo_id: str, + operations: Iterable[CommitOperation], + *, + commit_message: str, + commit_description: Optional[str] = None, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + num_threads: int = 5, + parent_commit: Optional[str] = None, + run_as_future: bool = False, + ) -> Union[CommitInfo, Future[CommitInfo]]: + """ + Creates a commit in the given repo, deleting & uploading files as needed. + + > [!WARNING] + > The input list of `CommitOperation` will be mutated during the commit process. Do not reuse the same objects + > for multiple commits. + + > [!WARNING] + > `create_commit` assumes that the repo already exists on the Hub. If you get a + > Client error 404, please make sure you are authenticated and that `repo_id` and + > `repo_type` are set correctly. If repo does not exist, create it first using + > [`~hf_api.create_repo`]. + + > [!WARNING] + > `create_commit` is limited to 25k LFS files and a 1GB payload for regular files. + + Args: + repo_id (`str`): + The repository in which the commit will be created, for example: + `"username/custom_transformers"` + + operations (`Iterable` of [`~hf_api.CommitOperation`]): + An iterable of operations to include in the commit, either: + + - [`~hf_api.CommitOperationAdd`] to upload a file + - [`~hf_api.CommitOperationDelete`] to delete a file + - [`~hf_api.CommitOperationCopy`] to copy a file + + Operation objects will be mutated to include information relative to the upload. Do not reuse the + same objects for multiple commits. + + commit_message (`str`): + The summary (first line) of the commit that will be created. + + commit_description (`str`, *optional*): + The description of the commit that will be created + + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + + revision (`str`, *optional*): + The git revision to commit from. Defaults to the head of the `"main"` branch. + + create_pr (`boolean`, *optional*): + Whether or not to create a Pull Request with that commit. Defaults to `False`. + If `revision` is not set, PR is opened against the `"main"` branch. If + `revision` is set and is a branch, PR is opened against this branch. If + `revision` is set and is not a branch name (example: a commit oid), an + `RevisionNotFoundError` is returned by the server. + + num_threads (`int`, *optional*): + Number of concurrent threads for uploading files. Defaults to 5. + Setting it to 2 means at most 2 files will be uploaded concurrently. + + parent_commit (`str`, *optional*): + The OID / SHA of the parent commit, as a hexadecimal string. + Shorthands (7 first characters) are also supported. If specified and `create_pr` is `False`, + the commit will fail if `revision` does not point to `parent_commit`. If specified and `create_pr` + is `True`, the pull request will be created from `parent_commit`. Specifying `parent_commit` + ensures the repo has not changed before committing the changes, and can be especially useful + if the repo is updated / committed to concurrently. + run_as_future (`bool`, *optional*): + Whether or not to run this method in the background. Background jobs are run sequentially without + blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) + object. Defaults to `False`. + + Returns: + [`CommitInfo`] or `Future`: + Instance of [`CommitInfo`] containing information about the newly created commit (commit hash, commit + url, pr url, commit message,...). If `run_as_future=True` is passed, returns a Future object which will + contain the result when executed. + + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If commit message is empty. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If parent commit is not a valid commit OID. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If a README.md file with an invalid metadata section is committed. In this case, the commit will fail + early, before trying to upload any file. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If `create_pr` is `True` and revision is neither `None` nor `"main"`. + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private + but not authenticated or repo does not exist. + """ + if parent_commit is not None and not constants.REGEX_COMMIT_OID.fullmatch(parent_commit): + raise ValueError( + f"`parent_commit` is not a valid commit OID. It must match the following regex: {constants.REGEX_COMMIT_OID}" + ) + + if commit_message is None or len(commit_message) == 0: + raise ValueError("`commit_message` can't be empty, please pass a value.") + + commit_description = commit_description if commit_description is not None else "" + repo_type = repo_type if repo_type is not None else constants.REPO_TYPE_MODEL + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + unquoted_revision = revision or constants.DEFAULT_REVISION + revision = quote(unquoted_revision, safe="") + create_pr = create_pr if create_pr is not None else False + + headers = self._build_hf_headers(token=token) + + operations = list(operations) + additions = [op for op in operations if isinstance(op, CommitOperationAdd)] + copies = [op for op in operations if isinstance(op, CommitOperationCopy)] + nb_additions = len(additions) + nb_copies = len(copies) + nb_deletions = len(operations) - nb_additions - nb_copies + + for addition in additions: + if addition._is_committed: + raise ValueError( + f"CommitOperationAdd {addition} has already being committed and cannot be reused. Please create a" + " new CommitOperationAdd object if you want to create a new commit." + ) + + if repo_type != "dataset": + for addition in additions: + if addition.path_in_repo.endswith((".arrow", ".parquet")): + warnings.warn( + f"It seems that you are about to commit a data file ({addition.path_in_repo}) to a {repo_type}" + " repository. You are sure this is intended? If you are trying to upload a dataset, please" + " set `repo_type='dataset'` or `--repo-type=dataset` in a CLI." + ) + + logger.debug( + f"About to commit to the hub: {len(additions)} addition(s), {len(copies)} copie(s) and" + f" {nb_deletions} deletion(s)." + ) + + # If updating a README.md file, make sure the metadata format is valid + # It's better to fail early than to fail after all the files have been uploaded. + for addition in additions: + if addition.path_in_repo == "README.md": + with addition.as_file() as file: + content = file.read().decode() + self._validate_yaml(content, repo_type=repo_type, token=token) + # Skip other additions after `README.md` has been processed + break + + # If updating twice the same file or update then delete a file in a single commit + _warn_on_overwriting_operations(operations) + + self.preupload_lfs_files( + repo_id=repo_id, + additions=additions, + token=token, + repo_type=repo_type, + revision=unquoted_revision, # first-class methods take unquoted revision + create_pr=create_pr, + num_threads=num_threads, + free_memory=False, # do not remove `CommitOperationAdd.path_or_fileobj` on LFS files for "normal" users + ) + + files_to_copy = _fetch_files_to_copy( + copies=copies, + repo_type=repo_type, + repo_id=repo_id, + headers=headers, + revision=unquoted_revision, + endpoint=self.endpoint, + ) + # Remove no-op operations (files that have not changed) + operations_without_no_op = [] + for operation in operations: + if ( + isinstance(operation, CommitOperationAdd) + and operation._remote_oid is not None + and operation._remote_oid == operation._local_oid + ): + # File already exists on the Hub and has not changed: we can skip it. + logger.debug(f"Skipping upload for '{operation.path_in_repo}' as the file has not changed.") + continue + if ( + isinstance(operation, CommitOperationCopy) + and operation._dest_oid is not None + and operation._dest_oid == operation._src_oid + ): + # Source and destination files are identical - skip + logger.debug( + f"Skipping copy for '{operation.src_path_in_repo}' -> '{operation.path_in_repo}' as the content of the source file is the same as the destination file." + ) + continue + operations_without_no_op.append(operation) + if len(operations) != len(operations_without_no_op): + logger.info( + f"Removing {len(operations) - len(operations_without_no_op)} file(s) from commit that have not changed." + ) + + # Return early if empty commit + if len(operations_without_no_op) == 0: + logger.warning("No files have been modified since last commit. Skipping to prevent empty commit.") + + # Get latest commit info + try: + info = self.repo_info(repo_id=repo_id, repo_type=repo_type, revision=unquoted_revision, token=token) + except RepositoryNotFoundError as e: + e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE) + raise + + # Return commit info based on latest commit + url_prefix = self.endpoint + if repo_type is not None and repo_type != constants.REPO_TYPE_MODEL: + url_prefix = f"{url_prefix}/{repo_type}s" + return CommitInfo( + commit_url=f"{url_prefix}/{repo_id}/commit/{info.sha}", + commit_message=commit_message, + commit_description=commit_description, + oid=info.sha, # type: ignore[arg-type] + ) + + commit_payload = _prepare_commit_payload( + operations=operations, + files_to_copy=files_to_copy, + commit_message=commit_message, + commit_description=commit_description, + parent_commit=parent_commit, + ) + commit_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/commit/{revision}" + + def _payload_as_ndjson() -> Iterable[bytes]: + for item in commit_payload: + yield json.dumps(item).encode() + yield b"\n" + + headers = { + # See https://github.com/huggingface/huggingface_hub/issues/1085#issuecomment-1265208073 + "Content-Type": "application/x-ndjson", + **headers, + } + data = b"".join(_payload_as_ndjson()) + params = {"create_pr": "1"} if create_pr else None + + try: + commit_resp = get_session().post(url=commit_url, headers=headers, data=data, params=params) + hf_raise_for_status(commit_resp, endpoint_name="commit") + except RepositoryNotFoundError as e: + e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE) + raise + except EntryNotFoundError as e: + if nb_deletions > 0 and "A file with this name doesn't exist" in str(e): + e.append_to_message( + "\nMake sure to differentiate file and folder paths in delete" + " operations with a trailing '/' or using `is_folder=True/False`." + ) + raise + + # Mark additions as committed (cannot be reused in another commit) + for addition in additions: + addition._is_committed = True + + commit_data = commit_resp.json() + return CommitInfo( + commit_url=commit_data["commitUrl"], + commit_message=commit_message, + commit_description=commit_description, + oid=commit_data["commitOid"], + pr_url=commit_data["pullRequestUrl"] if create_pr else None, + ) + + def preupload_lfs_files( + self, + repo_id: str, + additions: Iterable[CommitOperationAdd], + *, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + num_threads: int = 5, + free_memory: bool = True, + gitignore_content: Optional[str] = None, + ): + """Pre-upload LFS files to S3 in preparation on a future commit. + + This method is useful if you are generating the files to upload on-the-fly and you don't want to store them + in memory before uploading them all at once. + + > [!WARNING] + > This is a power-user method. You shouldn't need to call it directly to make a normal commit. + > Use [`create_commit`] directly instead. + + > [!WARNING] + > Commit operations will be mutated during the process. In particular, the attached `path_or_fileobj` will be + > removed after the upload to save memory (and replaced by an empty `bytes` object). Do not reuse the same + > objects except to pass them to [`create_commit`]. If you don't want to remove the attached content from the + > commit operation object, pass `free_memory=False`. + + Args: + repo_id (`str`): + The repository in which you will commit the files, for example: `"username/custom_transformers"`. + + operations (`Iterable` of [`CommitOperationAdd`]): + The list of files to upload. Warning: the objects in this list will be mutated to include information + relative to the upload. Do not reuse the same objects for multiple commits. + + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + repo_type (`str`, *optional*): + The type of repository to upload to (e.g. `"model"` -default-, `"dataset"` or `"space"`). + + revision (`str`, *optional*): + The git revision to commit from. Defaults to the head of the `"main"` branch. + + create_pr (`boolean`, *optional*): + Whether or not you plan to create a Pull Request with that commit. Defaults to `False`. + + num_threads (`int`, *optional*): + Number of concurrent threads for uploading files. Defaults to 5. + Setting it to 2 means at most 2 files will be uploaded concurrently. + + gitignore_content (`str`, *optional*): + The content of the `.gitignore` file to know which files should be ignored. The order of priority + is to first check if `gitignore_content` is passed, then check if the `.gitignore` file is present + in the list of files to commit and finally default to the `.gitignore` file already hosted on the Hub + (if any). + + Example: + ```py + >>> from huggingface_hub import CommitOperationAdd, preupload_lfs_files, create_commit, create_repo + + >>> repo_id = create_repo("test_preupload").repo_id + + # Generate and preupload LFS files one by one + >>> operations = [] # List of all `CommitOperationAdd` objects that will be generated + >>> for i in range(5): + ... content = ... # generate binary content + ... addition = CommitOperationAdd(path_in_repo=f"shard_{i}_of_5.bin", path_or_fileobj=content) + ... preupload_lfs_files(repo_id, additions=[addition]) # upload + free memory + ... operations.append(addition) + + # Create commit + >>> create_commit(repo_id, operations=operations, commit_message="Commit all shards") + ``` + """ + repo_type = repo_type if repo_type is not None else constants.REPO_TYPE_MODEL + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION + create_pr = create_pr if create_pr is not None else False + headers = self._build_hf_headers(token=token) + + # Check if a `gitignore` file is being committed to the Hub. + additions = list(additions) + if gitignore_content is None: + for addition in additions: + if addition.path_in_repo == ".gitignore": + with addition.as_file() as f: + gitignore_content = f.read().decode() + break + + # Filter out already uploaded files + new_additions = [addition for addition in additions if not addition._is_uploaded] + + # Check which new files are LFS + # For some items, we might have already fetched the upload mode (in case of upload_large_folder) + additions_no_upload_mode = [addition for addition in new_additions if addition._upload_mode is None] + if len(additions_no_upload_mode) > 0: + try: + _fetch_upload_modes( + additions=additions_no_upload_mode, + repo_type=repo_type, + repo_id=repo_id, + headers=headers, + revision=revision, + endpoint=self.endpoint, + create_pr=create_pr or False, + gitignore_content=gitignore_content, + ) + except RepositoryNotFoundError as e: + e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE) + raise + + # Filter out regular files + new_lfs_additions = [addition for addition in new_additions if addition._upload_mode == "lfs"] + + # Filter out files listed in .gitignore + new_lfs_additions_to_upload = [] + for addition in new_lfs_additions: + if addition._should_ignore: + logger.debug(f"Skipping upload for LFS file '{addition.path_in_repo}' (ignored by gitignore file).") + else: + new_lfs_additions_to_upload.append(addition) + if len(new_lfs_additions) != len(new_lfs_additions_to_upload): + logger.info( + f"Skipped upload for {len(new_lfs_additions) - len(new_lfs_additions_to_upload)} LFS file(s) " + "(ignored by gitignore file)." + ) + # If no LFS files remain to upload, keep previous behavior and log explicitly + if len(new_lfs_additions_to_upload) == 0: + logger.debug("No LFS files to upload.") + return + # Prepare upload parameters + upload_kwargs = { + "additions": new_lfs_additions_to_upload, + "repo_type": repo_type, + "repo_id": repo_id, + "headers": headers, + "endpoint": self.endpoint, + # If `create_pr`, we don't want to check user permission on the revision as users with read permission + # should still be able to create PRs even if they don't have write permission on the target branch of the + # PR (i.e. `revision`). + "revision": revision if not create_pr else None, + } + _upload_files(**upload_kwargs, num_threads=num_threads, create_pr=create_pr) # type: ignore [arg-type] + for addition in new_lfs_additions_to_upload: + addition._is_uploaded = True + if free_memory: + addition.path_or_fileobj = b"" + + @overload + def upload_file( # type: ignore + self, + *, + path_or_fileobj: Union[str, Path, bytes, BinaryIO], + path_in_repo: str, + repo_id: str, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + run_as_future: Literal[False] = ..., + ) -> CommitInfo: ... + + @overload + def upload_file( + self, + *, + path_or_fileobj: Union[str, Path, bytes, BinaryIO], + path_in_repo: str, + repo_id: str, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + run_as_future: Literal[True] = ..., + ) -> Future[CommitInfo]: ... + + @validate_hf_hub_args + @future_compatible + def upload_file( + self, + *, + path_or_fileobj: Union[str, Path, bytes, BinaryIO], + path_in_repo: str, + repo_id: str, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + run_as_future: bool = False, + ) -> Union[CommitInfo, Future[CommitInfo]]: + """ + Upload a local file (up to 50 GB) to the given repo. The upload is done + through a HTTP post request, and doesn't require git or git-lfs to be + installed. + + Args: + path_or_fileobj (`str`, `Path`, `bytes`, or `IO`): + Path to a file on the local machine or binary data stream / + fileobj / buffer. + path_in_repo (`str`): + Relative filepath in the repo, for example: + `"checkpoints/1fec34a/weights.bin"` + repo_id (`str`): + The repository to which the file will be uploaded, for example: + `"username/custom_transformers"` + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + revision (`str`, *optional*): + The git revision to commit from. Defaults to the head of the `"main"` branch. + commit_message (`str`, *optional*): + The summary / title / first line of the generated commit + commit_description (`str` *optional*) + The description of the generated commit + create_pr (`boolean`, *optional*): + Whether or not to create a Pull Request with that commit. Defaults to `False`. + If `revision` is not set, PR is opened against the `"main"` branch. If + `revision` is set and is a branch, PR is opened against this branch. If + `revision` is set and is not a branch name (example: a commit oid), an + `RevisionNotFoundError` is returned by the server. + parent_commit (`str`, *optional*): + The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. + If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. + If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. + Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be + especially useful if the repo is updated / committed to concurrently. + run_as_future (`bool`, *optional*): + Whether or not to run this method in the background. Background jobs are run sequentially without + blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) + object. Defaults to `False`. + + + Returns: + [`CommitInfo`] or `Future`: + Instance of [`CommitInfo`] containing information about the newly created commit (commit hash, commit + url, pr url, commit message,...). If `run_as_future=True` is passed, returns a Future object which will + contain the result when executed. + > [!TIP] + > Raises the following errors: + > + > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + > if the HuggingFace API returned an error + > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + > if some parameter value is invalid + > - [`~utils.RepositoryNotFoundError`] + > If the repository to download from cannot be found. This may be because it doesn't exist, + > or because it is set to `private` and you do not have access. + > - [`~utils.RevisionNotFoundError`] + > If the revision to download from cannot be found. + + > [!WARNING] + > `upload_file` assumes that the repo already exists on the Hub. If you get a + > Client error 404, please make sure you are authenticated and that `repo_id` and + > `repo_type` are set correctly. If repo does not exist, create it first using + > [`~hf_api.create_repo`]. + + Example: + + ```python + >>> from huggingface_hub import upload_file + + >>> with open("./local/filepath", "rb") as fobj: + ... upload_file( + ... path_or_fileobj=fileobj, + ... path_in_repo="remote/file/path.h5", + ... repo_id="username/my-dataset", + ... repo_type="dataset", + ... token="my_token", + ... ) + "https://huggingface.co/datasets/username/my-dataset/blob/main/remote/file/path.h5" + + >>> upload_file( + ... path_or_fileobj=".\\\\local\\\\file\\\\path", + ... path_in_repo="remote/file/path.h5", + ... repo_id="username/my-model", + ... token="my_token", + ... ) + "https://huggingface.co/username/my-model/blob/main/remote/file/path.h5" + + >>> upload_file( + ... path_or_fileobj=".\\\\local\\\\file\\\\path", + ... path_in_repo="remote/file/path.h5", + ... repo_id="username/my-model", + ... token="my_token", + ... create_pr=True, + ... ) + "https://huggingface.co/username/my-model/blob/refs%2Fpr%2F1/remote/file/path.h5" + ``` + """ + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + + commit_message = ( + commit_message if commit_message is not None else f"Upload {path_in_repo} with huggingface_hub" + ) + operation = CommitOperationAdd( + path_or_fileobj=path_or_fileobj, + path_in_repo=path_in_repo, + ) + + commit_info = self.create_commit( + repo_id=repo_id, + repo_type=repo_type, + operations=[operation], + commit_message=commit_message, + commit_description=commit_description, + token=token, + revision=revision, + create_pr=create_pr, + parent_commit=parent_commit, + ) + + if commit_info.pr_url is not None: + revision = quote(_parse_revision_from_pr_url(commit_info.pr_url), safe="") + if repo_type in constants.REPO_TYPES_URL_PREFIXES: + repo_id = constants.REPO_TYPES_URL_PREFIXES[repo_type] + repo_id + revision = revision if revision is not None else constants.DEFAULT_REVISION + + return CommitInfo( + commit_url=commit_info.commit_url, + commit_message=commit_info.commit_message, + commit_description=commit_info.commit_description, + oid=commit_info.oid, + pr_url=commit_info.pr_url, + # Similar to `hf_hub_url` but it's "blob" instead of "resolve" + # TODO: remove this in v1.0 + _url=f"{self.endpoint}/{repo_id}/blob/{revision}/{path_in_repo}", + ) + + @overload + def upload_folder( # type: ignore + self, + *, + repo_id: str, + folder_path: Union[str, Path], + path_in_repo: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + delete_patterns: Optional[Union[List[str], str]] = None, + run_as_future: Literal[False] = ..., + ) -> CommitInfo: ... + + @overload + def upload_folder( # type: ignore + self, + *, + repo_id: str, + folder_path: Union[str, Path], + path_in_repo: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + delete_patterns: Optional[Union[List[str], str]] = None, + run_as_future: Literal[True] = ..., + ) -> Future[CommitInfo]: ... + + @validate_hf_hub_args + @future_compatible + def upload_folder( + self, + *, + repo_id: str, + folder_path: Union[str, Path], + path_in_repo: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + delete_patterns: Optional[Union[List[str], str]] = None, + run_as_future: bool = False, + ) -> Union[CommitInfo, Future[CommitInfo]]: + """ + Upload a local folder to the given repo. The upload is done through a HTTP requests, and doesn't require git or + git-lfs to be installed. + + The structure of the folder will be preserved. Files with the same name already present in the repository will + be overwritten. Others will be left untouched. + + Use the `allow_patterns` and `ignore_patterns` arguments to specify which files to upload. These parameters + accept either a single pattern or a list of patterns. Patterns are Standard Wildcards (globbing patterns) as + documented [here](https://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm). If both `allow_patterns` and + `ignore_patterns` are provided, both constraints apply. By default, all files from the folder are uploaded. + + Use the `delete_patterns` argument to specify remote files you want to delete. Input type is the same as for + `allow_patterns` (see above). If `path_in_repo` is also provided, the patterns are matched against paths + relative to this folder. For example, `upload_folder(..., path_in_repo="experiment", delete_patterns="logs/*")` + will delete any remote file under `./experiment/logs/`. Note that the `.gitattributes` file will not be deleted + even if it matches the patterns. + + Any `.git/` folder present in any subdirectory will be ignored. However, please be aware that the `.gitignore` + file is not taken into account. + + Uses `HfApi.create_commit` under the hood. + + Args: + repo_id (`str`): + The repository to which the file will be uploaded, for example: + `"username/custom_transformers"` + folder_path (`str` or `Path`): + Path to the folder to upload on the local file system + path_in_repo (`str`, *optional*): + Relative path of the directory in the repo, for example: + `"checkpoints/1fec34a/results"`. Will default to the root folder of the repository. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + revision (`str`, *optional*): + The git revision to commit from. Defaults to the head of the `"main"` branch. + commit_message (`str`, *optional*): + The summary / title / first line of the generated commit. Defaults to: + `f"Upload {path_in_repo} with huggingface_hub"` + commit_description (`str` *optional*): + The description of the generated commit + create_pr (`boolean`, *optional*): + Whether or not to create a Pull Request with that commit. Defaults to `False`. If `revision` is not + set, PR is opened against the `"main"` branch. If `revision` is set and is a branch, PR is opened + against this branch. If `revision` is set and is not a branch name (example: a commit oid), an + `RevisionNotFoundError` is returned by the server. + parent_commit (`str`, *optional*): + The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. + If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. + If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. + Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be + especially useful if the repo is updated / committed to concurrently. + allow_patterns (`List[str]` or `str`, *optional*): + If provided, only files matching at least one pattern are uploaded. + ignore_patterns (`List[str]` or `str`, *optional*): + If provided, files matching any of the patterns are not uploaded. + delete_patterns (`List[str]` or `str`, *optional*): + If provided, remote files matching any of the patterns will be deleted from the repo while committing + new files. This is useful if you don't know which files have already been uploaded. + Note: to avoid discrepancies the `.gitattributes` file is not deleted even if it matches the pattern. + run_as_future (`bool`, *optional*): + Whether or not to run this method in the background. Background jobs are run sequentially without + blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) + object. Defaults to `False`. + + Returns: + [`CommitInfo`] or `Future`: + Instance of [`CommitInfo`] containing information about the newly created commit (commit hash, commit + url, pr url, commit message,...). If `run_as_future=True` is passed, returns a Future object which will + contain the result when executed. + + > [!TIP] + > Raises the following errors: + > + > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + > if the HuggingFace API returned an error + > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + > if some parameter value is invalid + + > [!WARNING] + > `upload_folder` assumes that the repo already exists on the Hub. If you get a Client error 404, please make + > sure you are authenticated and that `repo_id` and `repo_type` are set correctly. If repo does not exist, create + > it first using [`~hf_api.create_repo`]. + + > [!TIP] + > When dealing with a large folder (thousands of files or hundreds of GB), we recommend using [`~hf_api.upload_large_folder`] instead. + + Example: + + ```python + # Upload checkpoints folder except the log files + >>> upload_folder( + ... folder_path="local/checkpoints", + ... path_in_repo="remote/experiment/checkpoints", + ... repo_id="username/my-dataset", + ... repo_type="datasets", + ... token="my_token", + ... ignore_patterns="**/logs/*.txt", + ... ) + # "https://huggingface.co/datasets/username/my-dataset/tree/main/remote/experiment/checkpoints" + + # Upload checkpoints folder including logs while deleting existing logs from the repo + # Useful if you don't know exactly which log files have already being pushed + >>> upload_folder( + ... folder_path="local/checkpoints", + ... path_in_repo="remote/experiment/checkpoints", + ... repo_id="username/my-dataset", + ... repo_type="datasets", + ... token="my_token", + ... delete_patterns="**/logs/*.txt", + ... ) + "https://huggingface.co/datasets/username/my-dataset/tree/main/remote/experiment/checkpoints" + + # Upload checkpoints folder while creating a PR + >>> upload_folder( + ... folder_path="local/checkpoints", + ... path_in_repo="remote/experiment/checkpoints", + ... repo_id="username/my-dataset", + ... repo_type="datasets", + ... token="my_token", + ... create_pr=True, + ... ) + "https://huggingface.co/datasets/username/my-dataset/tree/refs%2Fpr%2F1/remote/experiment/checkpoints" + + ``` + """ + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + + # By default, upload folder to the root directory in repo. + if path_in_repo is None: + path_in_repo = "" + + # Do not upload .git folder + if ignore_patterns is None: + ignore_patterns = [] + elif isinstance(ignore_patterns, str): + ignore_patterns = [ignore_patterns] + ignore_patterns += DEFAULT_IGNORE_PATTERNS + + delete_operations = self._prepare_folder_deletions( + repo_id=repo_id, + repo_type=repo_type, + revision=constants.DEFAULT_REVISION if create_pr else revision, + token=token, + path_in_repo=path_in_repo, + delete_patterns=delete_patterns, + ) + add_operations = self._prepare_upload_folder_additions( + folder_path, + path_in_repo, + allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, + token=token, + repo_type=repo_type, + ) + + # Optimize operations: if some files will be overwritten, we don't need to delete them first + if len(add_operations) > 0: + added_paths = set(op.path_in_repo for op in add_operations) + delete_operations = [ + delete_op for delete_op in delete_operations if delete_op.path_in_repo not in added_paths + ] + commit_operations = delete_operations + add_operations + + commit_message = commit_message or "Upload folder using huggingface_hub" + + commit_info = self.create_commit( + repo_type=repo_type, + repo_id=repo_id, + operations=commit_operations, + commit_message=commit_message, + commit_description=commit_description, + token=token, + revision=revision, + create_pr=create_pr, + parent_commit=parent_commit, + ) + + # Create url to uploaded folder (for legacy return value) + if create_pr and commit_info.pr_url is not None: + revision = quote(_parse_revision_from_pr_url(commit_info.pr_url), safe="") + if repo_type in constants.REPO_TYPES_URL_PREFIXES: + repo_id = constants.REPO_TYPES_URL_PREFIXES[repo_type] + repo_id + revision = revision if revision is not None else constants.DEFAULT_REVISION + + return CommitInfo( + commit_url=commit_info.commit_url, + commit_message=commit_info.commit_message, + commit_description=commit_info.commit_description, + oid=commit_info.oid, + pr_url=commit_info.pr_url, + # Similar to `hf_hub_url` but it's "tree" instead of "resolve" + # TODO: remove this in v1.0 + _url=f"{self.endpoint}/{repo_id}/tree/{revision}/{path_in_repo}", + ) + + @validate_hf_hub_args + def delete_file( + self, + path_in_repo: str, + repo_id: str, + *, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + ) -> CommitInfo: + """ + Deletes a file in the given repo. + + Args: + path_in_repo (`str`): + Relative filepath in the repo, for example: + `"checkpoints/1fec34a/weights.bin"` + repo_id (`str`): + The repository from which the file will be deleted, for example: + `"username/custom_transformers"` + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if the file is in a dataset or + space, `None` or `"model"` if in a model. Default is `None`. + revision (`str`, *optional*): + The git revision to commit from. Defaults to the head of the `"main"` branch. + commit_message (`str`, *optional*): + The summary / title / first line of the generated commit. Defaults to + `f"Delete {path_in_repo} with huggingface_hub"`. + commit_description (`str` *optional*) + The description of the generated commit + create_pr (`boolean`, *optional*): + Whether or not to create a Pull Request with that commit. Defaults to `False`. + If `revision` is not set, PR is opened against the `"main"` branch. If + `revision` is set and is a branch, PR is opened against this branch. If + `revision` is set and is not a branch name (example: a commit oid), an + `RevisionNotFoundError` is returned by the server. + parent_commit (`str`, *optional*): + The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. + If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. + If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. + Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be + especially useful if the repo is updated / committed to concurrently. + + + > [!TIP] + > Raises the following errors: + > + > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + > if the HuggingFace API returned an error + > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + > if some parameter value is invalid + > - [`~utils.RepositoryNotFoundError`] + > If the repository to download from cannot be found. This may be because it doesn't exist, + > or because it is set to `private` and you do not have access. + > - [`~utils.RevisionNotFoundError`] + > If the revision to download from cannot be found. + > - [`~utils.EntryNotFoundError`] + > If the file to download cannot be found. + + """ + commit_message = ( + commit_message if commit_message is not None else f"Delete {path_in_repo} with huggingface_hub" + ) + + operations = [CommitOperationDelete(path_in_repo=path_in_repo)] + + return self.create_commit( + repo_id=repo_id, + repo_type=repo_type, + token=token, + operations=operations, + revision=revision, + commit_message=commit_message, + commit_description=commit_description, + create_pr=create_pr, + parent_commit=parent_commit, + ) + + @validate_hf_hub_args + def delete_files( + self, + repo_id: str, + delete_patterns: List[str], + *, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + ) -> CommitInfo: + """ + Delete files from a repository on the Hub. + + If a folder path is provided, the entire folder is deleted as well as + all files it contained. + + Args: + repo_id (`str`): + The repository from which the folder will be deleted, for example: + `"username/custom_transformers"` + delete_patterns (`List[str]`): + List of files or folders to delete. Each string can either be + a file path, a folder path or a Unix shell-style wildcard. + E.g. `["file.txt", "folder/", "data/*.parquet"]` + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + to the stored token. + repo_type (`str`, *optional*): + Type of the repo to delete files from. Can be `"model"`, + `"dataset"` or `"space"`. Defaults to `"model"`. + revision (`str`, *optional*): + The git revision to commit from. Defaults to the head of the `"main"` branch. + commit_message (`str`, *optional*): + The summary (first line) of the generated commit. Defaults to + `f"Delete files using huggingface_hub"`. + commit_description (`str` *optional*) + The description of the generated commit. + create_pr (`boolean`, *optional*): + Whether or not to create a Pull Request with that commit. Defaults to `False`. + If `revision` is not set, PR is opened against the `"main"` branch. If + `revision` is set and is a branch, PR is opened against this branch. If + `revision` is set and is not a branch name (example: a commit oid), an + `RevisionNotFoundError` is returned by the server. + parent_commit (`str`, *optional*): + The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. + If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. + If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. + Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be + especially useful if the repo is updated / committed to concurrently. + """ + operations = self._prepare_folder_deletions( + repo_id=repo_id, repo_type=repo_type, delete_patterns=delete_patterns, path_in_repo="", revision=revision + ) + + if commit_message is None: + commit_message = f"Delete files {' '.join(delete_patterns)} with huggingface_hub" + + return self.create_commit( + repo_id=repo_id, + repo_type=repo_type, + token=token, + operations=operations, + revision=revision, + commit_message=commit_message, + commit_description=commit_description, + create_pr=create_pr, + parent_commit=parent_commit, + ) + + @validate_hf_hub_args + def delete_folder( + self, + path_in_repo: str, + repo_id: str, + *, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + ) -> CommitInfo: + """ + Deletes a folder in the given repo. + + Simple wrapper around [`create_commit`] method. + + Args: + path_in_repo (`str`): + Relative folder path in the repo, for example: `"checkpoints/1fec34a"`. + repo_id (`str`): + The repository from which the folder will be deleted, for example: + `"username/custom_transformers"` + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + to the stored token. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if the folder is in a dataset or + space, `None` or `"model"` if in a model. Default is `None`. + revision (`str`, *optional*): + The git revision to commit from. Defaults to the head of the `"main"` branch. + commit_message (`str`, *optional*): + The summary / title / first line of the generated commit. Defaults to + `f"Delete folder {path_in_repo} with huggingface_hub"`. + commit_description (`str` *optional*) + The description of the generated commit. + create_pr (`boolean`, *optional*): + Whether or not to create a Pull Request with that commit. Defaults to `False`. + If `revision` is not set, PR is opened against the `"main"` branch. If + `revision` is set and is a branch, PR is opened against this branch. If + `revision` is set and is not a branch name (example: a commit oid), an + `RevisionNotFoundError` is returned by the server. + parent_commit (`str`, *optional*): + The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. + If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. + If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. + Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be + especially useful if the repo is updated / committed to concurrently. + """ + return self.create_commit( + repo_id=repo_id, + repo_type=repo_type, + token=token, + operations=[CommitOperationDelete(path_in_repo=path_in_repo, is_folder=True)], + revision=revision, + commit_message=( + commit_message if commit_message is not None else f"Delete folder {path_in_repo} with huggingface_hub" + ), + commit_description=commit_description, + create_pr=create_pr, + parent_commit=parent_commit, + ) + + def upload_large_folder( + self, + repo_id: str, + folder_path: Union[str, Path], + *, + repo_type: str, # Repo type is required! + revision: Optional[str] = None, + private: Optional[bool] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + num_workers: Optional[int] = None, + print_report: bool = True, + print_report_every: int = 60, + ) -> None: + """Upload a large folder to the Hub in the most resilient way possible. + + Several workers are started to upload files in an optimized way. Before being committed to a repo, files must be + hashed and be pre-uploaded if they are LFS files. Workers will perform these tasks for each file in the folder. + At each step, some metadata information about the upload process is saved in the folder under `.cache/.huggingface/` + to be able to resume the process if interrupted. The whole process might result in several commits. + + Args: + repo_id (`str`): + The repository to which the file will be uploaded. + E.g. `"HuggingFaceTB/smollm-corpus"`. + folder_path (`str` or `Path`): + Path to the folder to upload on the local file system. + repo_type (`str`): + Type of the repository. Must be one of `"model"`, `"dataset"` or `"space"`. + Unlike in all other `HfApi` methods, `repo_type` is explicitly required here. This is to avoid + any mistake when uploading a large folder to the Hub, and therefore prevent from having to re-upload + everything. + revision (`str`, `optional`): + The branch to commit to. If not provided, the `main` branch will be used. + private (`bool`, `optional`): + Whether the repository should be private. + If `None` (default), the repo will be public unless the organization's default is private. + allow_patterns (`List[str]` or `str`, *optional*): + If provided, only files matching at least one pattern are uploaded. + ignore_patterns (`List[str]` or `str`, *optional*): + If provided, files matching any of the patterns are not uploaded. + num_workers (`int`, *optional*): + Number of workers to start. Defaults to `os.cpu_count() - 2` (minimum 2). + A higher number of workers may speed up the process if your machine allows it. However, on machines with a + slower connection, it is recommended to keep the number of workers low to ensure better resumability. + Indeed, partially uploaded files will have to be completely re-uploaded if the process is interrupted. + print_report (`bool`, *optional*): + Whether to print a report of the upload progress. Defaults to True. + Report is printed to `sys.stdout` every X seconds (60 by defaults) and overwrites the previous report. + print_report_every (`int`, *optional*): + Frequency at which the report is printed. Defaults to 60 seconds. + + > [!TIP] + > A few things to keep in mind: + > - Repository limits still apply: https://huggingface.co/docs/hub/repositories-recommendations + > - Do not start several processes in parallel. + > - You can interrupt and resume the process at any time. + > - Do not upload the same folder to several repositories. If you need to do so, you must delete the local `.cache/.huggingface/` folder first. + + > [!WARNING] + > While being much more robust to upload large folders, `upload_large_folder` is more limited than [`upload_folder`] feature-wise. In practice: + > - you cannot set a custom `path_in_repo`. If you want to upload to a subfolder, you need to set the proper structure locally. + > - you cannot set a custom `commit_message` and `commit_description` since multiple commits are created. + > - you cannot delete from the repo while uploading. Please make a separate commit first. + > - you cannot create a PR directly. Please create a PR first (from the UI or using [`create_pull_request`]) and then commit to it by passing `revision`. + + **Technical details:** + + `upload_large_folder` process is as follow: + 1. (Check parameters and setup.) + 2. Create repo if missing. + 3. List local files to upload. + 4. Run validation checks and display warnings if repository limits might be exceeded: + - Warns if the total number of files exceeds 100k (recommended limit). + - Warns if any folder contains more than 10k files (recommended limit). + - Warns about files larger than 20GB (recommended) or 50GB (hard limit). + 5. Start workers. Workers can perform the following tasks: + - Hash a file. + - Get upload mode (regular or LFS) for a list of files. + - Pre-upload an LFS file. + - Commit a bunch of files. + Once a worker finishes a task, it will move on to the next task based on the priority list (see below) until + all files are uploaded and committed. + 6. While workers are up, regularly print a report to sys.stdout. + + Order of priority: + 1. Commit if more than 5 minutes since last commit attempt (and at least 1 file). + 2. Commit if at least 150 files are ready to commit. + 3. Get upload mode if at least 10 files have been hashed. + 4. Pre-upload LFS file if at least 1 file and no worker is pre-uploading. + 5. Hash file if at least 1 file and no worker is hashing. + 6. Get upload mode if at least 1 file and no worker is getting upload mode. + 7. Pre-upload LFS file if at least 1 file (exception: if hf_transfer is enabled, only 1 worker can preupload LFS at a time). + 8. Hash file if at least 1 file to hash. + 9. Get upload mode if at least 1 file to get upload mode. + 10. Commit if at least 1 file to commit and at least 1 min since last commit attempt. + 11. Commit if at least 1 file to commit and all other queues are empty. + + Special rules: + - If `hf_transfer` is enabled, only 1 LFS uploader at a time. Otherwise the CPU would be bloated by `hf_transfer`. + - Only one worker can commit at a time. + - If no tasks are available, the worker waits for 10 seconds before checking again. + """ + return upload_large_folder_internal( + self, + repo_id=repo_id, + folder_path=folder_path, + repo_type=repo_type, + revision=revision, + private=private, + allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, + num_workers=num_workers, + print_report=print_report, + print_report_every=print_report_every, + ) + + @validate_hf_hub_args + def get_hf_file_metadata( + self, + *, + url: str, + token: Union[bool, str, None] = None, + proxies: Optional[Dict] = None, + timeout: Optional[float] = constants.DEFAULT_REQUEST_TIMEOUT, + ) -> HfFileMetadata: + """Fetch metadata of a file versioned on the Hub for a given url. + + Args: + url (`str`): + File url, for example returned by [`hf_hub_url`]. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + proxies (`dict`, *optional*): + Dictionary mapping protocol to the URL of the proxy passed to `requests.request`. + timeout (`float`, *optional*, defaults to 10): + How many seconds to wait for the server to send metadata before giving up. + + Returns: + A [`HfFileMetadata`] object containing metadata such as location, etag, size and commit_hash. + """ + if token is None: + # Cannot do `token = token or self.token` as token can be `False`. + token = self.token + + return get_hf_file_metadata( + url=url, + token=token, + proxies=proxies, + timeout=timeout, + library_name=self.library_name, + library_version=self.library_version, + user_agent=self.user_agent, + endpoint=self.endpoint, + ) + + @validate_hf_hub_args + def hf_hub_download( + self, + repo_id: str, + filename: str, + *, + subfolder: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + cache_dir: Union[str, Path, None] = None, + local_dir: Union[str, Path, None] = None, + force_download: bool = False, + proxies: Optional[Dict] = None, + etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT, + token: Union[bool, str, None] = None, + local_files_only: bool = False, + # Deprecated args + resume_download: Optional[bool] = None, + force_filename: Optional[str] = None, + local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto", + ) -> str: + """Download a given file if it's not already present in the local cache. + + The new cache file layout looks like this: + - The cache directory contains one subfolder per repo_id (namespaced by repo type) + - inside each repo folder: + - refs is a list of the latest known revision => commit_hash pairs + - blobs contains the actual file blobs (identified by their git-sha or sha256, depending on + whether they're LFS files or not) + - snapshots contains one subfolder per commit, each "commit" contains the subset of the files + that have been resolved at that particular commit. Each filename is a symlink to the blob + at that particular commit. + + ``` + [ 96] . + └── [ 160] models--julien-c--EsperBERTo-small + ├── [ 160] blobs + │ ├── [321M] 403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd + │ ├── [ 398] 7cb18dc9bafbfcf74629a4b760af1b160957a83e + │ └── [1.4K] d7edf6bd2a681fb0175f7735299831ee1b22b812 + ├── [ 96] refs + │ └── [ 40] main + └── [ 128] snapshots + ├── [ 128] 2439f60ef33a0d46d85da5001d52aeda5b00ce9f + │ ├── [ 52] README.md -> ../../blobs/d7edf6bd2a681fb0175f7735299831ee1b22b812 + │ └── [ 76] pytorch_model.bin -> ../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd + └── [ 128] bbc77c8132af1cc5cf678da3f1ddf2de43606d48 + ├── [ 52] README.md -> ../../blobs/7cb18dc9bafbfcf74629a4b760af1b160957a83e + └── [ 76] pytorch_model.bin -> ../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd + ``` + + If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this + option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir` + to store some metadata related to the downloaded files. While this mechanism is not as robust as the main + cache-system, it's optimized for regularly pulling the latest version of a repository. + + Args: + repo_id (`str`): + A user or an organization name and a repo name separated by a `/`. + filename (`str`): + The name of the file in the repo. + subfolder (`str`, *optional*): + An optional value corresponding to a folder inside the repository. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if downloading from a dataset or space, + `None` or `"model"` if downloading from a model. Default is `None`. + revision (`str`, *optional*): + An optional Git revision id which can be a branch name, a tag, or a + commit hash. + cache_dir (`str`, `Path`, *optional*): + Path to the folder where cached files are stored. + local_dir (`str` or `Path`, *optional*): + If provided, the downloaded file will be placed under this directory. + force_download (`bool`, *optional*, defaults to `False`): + Whether the file should be downloaded even if it already exists in + the local cache. + proxies (`dict`, *optional*): + Dictionary mapping protocol to the URL of the proxy passed to + `requests.request`. + etag_timeout (`float`, *optional*, defaults to `10`): + When fetching ETag, how many seconds to wait for the server to send + data before giving up which is passed to `requests.request`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + local_files_only (`bool`, *optional*, defaults to `False`): + If `True`, avoid downloading the file and return the path to the + local cached file if it exists. + + Returns: + `str`: Local path of file or if networking is off, last version of file cached on disk. + + Raises: + [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + [`~utils.RevisionNotFoundError`] + If the revision to download from cannot be found. + [`~utils.EntryNotFoundError`] + If the file to download cannot be found. + [`~utils.LocalEntryNotFoundError`] + If network is disabled or unavailable and file is not found in cache. + [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) + If `token=True` but the token cannot be found. + [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) + If ETag cannot be determined. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If some parameter value is invalid. + """ + from .file_download import hf_hub_download + + if token is None: + # Cannot do `token = token or self.token` as token can be `False`. + token = self.token + + return hf_hub_download( + repo_id=repo_id, + filename=filename, + subfolder=subfolder, + repo_type=repo_type, + revision=revision, + endpoint=self.endpoint, + library_name=self.library_name, + library_version=self.library_version, + cache_dir=cache_dir, + local_dir=local_dir, + local_dir_use_symlinks=local_dir_use_symlinks, + user_agent=self.user_agent, + force_download=force_download, + force_filename=force_filename, + proxies=proxies, + etag_timeout=etag_timeout, + resume_download=resume_download, + token=token, + headers=self.headers, + local_files_only=local_files_only, + ) + + @validate_hf_hub_args + def snapshot_download( + self, + repo_id: str, + *, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + cache_dir: Union[str, Path, None] = None, + local_dir: Union[str, Path, None] = None, + proxies: Optional[Dict] = None, + etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT, + force_download: bool = False, + token: Union[bool, str, None] = None, + local_files_only: bool = False, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + max_workers: int = 8, + tqdm_class: Optional[Type[base_tqdm]] = None, + # Deprecated args + local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto", + resume_download: Optional[bool] = None, + ) -> str: + """Download repo files. + + Download a whole snapshot of a repo's files at the specified revision. This is useful when you want all files from + a repo, because you don't know which ones you will need a priori. All files are nested inside a folder in order + to keep their actual filename relative to that folder. You can also filter which files to download using + `allow_patterns` and `ignore_patterns`. + + If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this + option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir` + to store some metadata related to the downloaded files.While this mechanism is not as robust as the main + cache-system, it's optimized for regularly pulling the latest version of a repository. + + An alternative would be to clone the repo but this requires git and git-lfs to be installed and properly + configured. It is also not possible to filter which files to download when cloning a repository using git. + + Args: + repo_id (`str`): + A user or an organization name and a repo name separated by a `/`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if downloading from a dataset or space, + `None` or `"model"` if downloading from a model. Default is `None`. + revision (`str`, *optional*): + An optional Git revision id which can be a branch name, a tag, or a + commit hash. + cache_dir (`str`, `Path`, *optional*): + Path to the folder where cached files are stored. + local_dir (`str` or `Path`, *optional*): + If provided, the downloaded files will be placed under this directory. + proxies (`dict`, *optional*): + Dictionary mapping protocol to the URL of the proxy passed to + `requests.request`. + etag_timeout (`float`, *optional*, defaults to `10`): + When fetching ETag, how many seconds to wait for the server to send + data before giving up which is passed to `requests.request`. + force_download (`bool`, *optional*, defaults to `False`): + Whether the file should be downloaded even if it already exists in the local cache. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + local_files_only (`bool`, *optional*, defaults to `False`): + If `True`, avoid downloading the file and return the path to the + local cached file if it exists. + allow_patterns (`List[str]` or `str`, *optional*): + If provided, only files matching at least one pattern are downloaded. + ignore_patterns (`List[str]` or `str`, *optional*): + If provided, files matching any of the patterns are not downloaded. + max_workers (`int`, *optional*): + Number of concurrent threads to download files (1 thread = 1 file download). + Defaults to 8. + tqdm_class (`tqdm`, *optional*): + If provided, overwrites the default behavior for the progress bar. Passed + argument must inherit from `tqdm.auto.tqdm` or at least mimic its behavior. + Note that the `tqdm_class` is not passed to each individual download. + Defaults to the custom HF progress bar that can be disabled by setting + `HF_HUB_DISABLE_PROGRESS_BARS` environment variable. + + Returns: + `str`: folder path of the repo snapshot. + + Raises: + [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + [`~utils.RevisionNotFoundError`] + If the revision to download from cannot be found. + [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) + If `token=True` and the token cannot be found. + [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if + ETag cannot be determined. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if some parameter value is invalid. + """ + from ._snapshot_download import snapshot_download + + if token is None: + # Cannot do `token = token or self.token` as token can be `False`. + token = self.token + + return snapshot_download( + repo_id=repo_id, + repo_type=repo_type, + revision=revision, + endpoint=self.endpoint, + cache_dir=cache_dir, + local_dir=local_dir, + local_dir_use_symlinks=local_dir_use_symlinks, + library_name=self.library_name, + library_version=self.library_version, + user_agent=self.user_agent, + proxies=proxies, + etag_timeout=etag_timeout, + resume_download=resume_download, + force_download=force_download, + token=token, + local_files_only=local_files_only, + allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, + max_workers=max_workers, + tqdm_class=tqdm_class, + ) + + def get_safetensors_metadata( + self, + repo_id: str, + *, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> SafetensorsRepoMetadata: + """ + Parse metadata for a safetensors repo on the Hub. + + We first check if the repo has a single safetensors file or a sharded safetensors repo. If it's a single + safetensors file, we parse the metadata from this file. If it's a sharded safetensors repo, we parse the + metadata from the index file and then parse the metadata from each shard. + + To parse metadata from a single safetensors file, use [`parse_safetensors_file_metadata`]. + + For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format. + + Args: + repo_id (`str`): + A user or an organization name and a repo name separated by a `/`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if the file is in a dataset or space, `None` or `"model"` if in a + model. Default is `None`. + revision (`str`, *optional*): + The git revision to fetch the file from. Can be a branch name, a tag, or a commit hash. Defaults to the + head of the `"main"` branch. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`SafetensorsRepoMetadata`]: information related to safetensors repo. + + Raises: + [`NotASafetensorsRepoError`] + If the repo is not a safetensors repo i.e. doesn't have either a + `model.safetensors` or a `model.safetensors.index.json` file. + [`SafetensorsParsingError`] + If a safetensors file header couldn't be parsed correctly. + + Example: + ```py + # Parse repo with single weights file + >>> metadata = get_safetensors_metadata("bigscience/bloomz-560m") + >>> metadata + SafetensorsRepoMetadata( + metadata=None, + sharded=False, + weight_map={'h.0.input_layernorm.bias': 'model.safetensors', ...}, + files_metadata={'model.safetensors': SafetensorsFileMetadata(...)} + ) + >>> metadata.files_metadata["model.safetensors"].metadata + {'format': 'pt'} + + # Parse repo with sharded model + >>> metadata = get_safetensors_metadata("bigscience/bloom") + Parse safetensors files: 100%|██████████████████████████████████████████| 72/72 [00:12<00:00, 5.78it/s] + >>> metadata + SafetensorsRepoMetadata(metadata={'total_size': 352494542848}, sharded=True, weight_map={...}, files_metadata={...}) + >>> len(metadata.files_metadata) + 72 # All safetensors files have been fetched + + # Parse repo with sharded model + >>> get_safetensors_metadata("runwayml/stable-diffusion-v1-5") + NotASafetensorsRepoError: 'runwayml/stable-diffusion-v1-5' is not a safetensors repo. Couldn't find 'model.safetensors.index.json' or 'model.safetensors' files. + ``` + """ + if self.file_exists( # Single safetensors file => non-sharded model + repo_id=repo_id, + filename=constants.SAFETENSORS_SINGLE_FILE, + repo_type=repo_type, + revision=revision, + token=token, + ): + file_metadata = self.parse_safetensors_file_metadata( + repo_id=repo_id, + filename=constants.SAFETENSORS_SINGLE_FILE, + repo_type=repo_type, + revision=revision, + token=token, + ) + return SafetensorsRepoMetadata( + metadata=None, + sharded=False, + weight_map={ + tensor_name: constants.SAFETENSORS_SINGLE_FILE for tensor_name in file_metadata.tensors.keys() + }, + files_metadata={constants.SAFETENSORS_SINGLE_FILE: file_metadata}, + ) + elif self.file_exists( # Multiple safetensors files => sharded with index + repo_id=repo_id, + filename=constants.SAFETENSORS_INDEX_FILE, + repo_type=repo_type, + revision=revision, + token=token, + ): + # Fetch index + index_file = self.hf_hub_download( + repo_id=repo_id, + filename=constants.SAFETENSORS_INDEX_FILE, + repo_type=repo_type, + revision=revision, + token=token, + ) + with open(index_file) as f: + index = json.load(f) + + weight_map = index.get("weight_map", {}) + + # Fetch metadata per shard + files_metadata = {} + + def _parse(filename: str) -> None: + files_metadata[filename] = self.parse_safetensors_file_metadata( + repo_id=repo_id, filename=filename, repo_type=repo_type, revision=revision, token=token + ) + + thread_map( + _parse, + set(weight_map.values()), + desc="Parse safetensors files", + tqdm_class=hf_tqdm, + ) + + return SafetensorsRepoMetadata( + metadata=index.get("metadata", None), + sharded=True, + weight_map=weight_map, + files_metadata=files_metadata, + ) + else: + # Not a safetensors repo + raise NotASafetensorsRepoError( + f"'{repo_id}' is not a safetensors repo. Couldn't find '{constants.SAFETENSORS_INDEX_FILE}' or '{constants.SAFETENSORS_SINGLE_FILE}' files." + ) + + def parse_safetensors_file_metadata( + self, + repo_id: str, + filename: str, + *, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> SafetensorsFileMetadata: + """ + Parse metadata from a safetensors file on the Hub. + + To parse metadata from all safetensors files in a repo at once, use [`get_safetensors_metadata`]. + + For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format. + + Args: + repo_id (`str`): + A user or an organization name and a repo name separated by a `/`. + filename (`str`): + The name of the file in the repo. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if the file is in a dataset or space, `None` or `"model"` if in a + model. Default is `None`. + revision (`str`, *optional*): + The git revision to fetch the file from. Can be a branch name, a tag, or a commit hash. Defaults to the + head of the `"main"` branch. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`SafetensorsFileMetadata`]: information related to a safetensors file. + + Raises: + [`NotASafetensorsRepoError`]: + If the repo is not a safetensors repo i.e. doesn't have either a + `model.safetensors` or a `model.safetensors.index.json` file. + [`SafetensorsParsingError`]: + If a safetensors file header couldn't be parsed correctly. + """ + url = hf_hub_url( + repo_id=repo_id, filename=filename, repo_type=repo_type, revision=revision, endpoint=self.endpoint + ) + _headers = self._build_hf_headers(token=token) + + # 1. Fetch first 100kb + # Empirically, 97% of safetensors files have a metadata size < 100kb (over the top 1000 models on the Hub). + # We assume fetching 100kb is faster than making 2 GET requests. Therefore we always fetch the first 100kb to + # avoid the 2nd GET in most cases. + # See https://github.com/huggingface/huggingface_hub/pull/1855#discussion_r1404286419. + response = get_session().get(url, headers={**_headers, "range": "bytes=0-100000"}) + hf_raise_for_status(response) + + # 2. Parse metadata size + metadata_size = struct.unpack(" constants.SAFETENSORS_MAX_HEADER_LENGTH: + raise SafetensorsParsingError( + f"Failed to parse safetensors header for '{filename}' (repo '{repo_id}', revision " + f"'{revision or constants.DEFAULT_REVISION}'): safetensors header is too big. Maximum supported size is " + f"{constants.SAFETENSORS_MAX_HEADER_LENGTH} bytes (got {metadata_size})." + ) + + # 3.a. Get metadata from payload + if metadata_size <= 100000: + metadata_as_bytes = response.content[8 : 8 + metadata_size] + else: # 3.b. Request full metadata + response = get_session().get(url, headers={**_headers, "range": f"bytes=8-{metadata_size + 7}"}) + hf_raise_for_status(response) + metadata_as_bytes = response.content + + # 4. Parse json header + try: + metadata_as_dict = json.loads(metadata_as_bytes.decode(errors="ignore")) + except json.JSONDecodeError as e: + raise SafetensorsParsingError( + f"Failed to parse safetensors header for '{filename}' (repo '{repo_id}', revision " + f"'{revision or constants.DEFAULT_REVISION}'): header is not json-encoded string. Please make sure this is a " + "correctly formatted safetensors file." + ) from e + + try: + return SafetensorsFileMetadata( + metadata=metadata_as_dict.get("__metadata__", {}), + tensors={ + key: TensorInfo( + dtype=tensor["dtype"], + shape=tensor["shape"], + data_offsets=tuple(tensor["data_offsets"]), # type: ignore + ) + for key, tensor in metadata_as_dict.items() + if key != "__metadata__" + }, + ) + except (KeyError, IndexError) as e: + raise SafetensorsParsingError( + f"Failed to parse safetensors header for '{filename}' (repo '{repo_id}', revision " + f"'{revision or constants.DEFAULT_REVISION}'): header format not recognized. Please make sure this is a correctly" + " formatted safetensors file." + ) from e + + @validate_hf_hub_args + def create_branch( + self, + repo_id: str, + *, + branch: str, + revision: Optional[str] = None, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + exist_ok: bool = False, + ) -> None: + """ + Create a new branch for a repo on the Hub, starting from the specified revision (defaults to `main`). + To find a revision suiting your needs, you can use [`list_repo_refs`] or [`list_repo_commits`]. + + Args: + repo_id (`str`): + The repository in which the branch will be created. + Example: `"user/my-cool-model"`. + + branch (`str`): + The name of the branch to create. + + revision (`str`, *optional*): + The git revision to create the branch from. It can be a branch name or + the OID/SHA of a commit, as a hexadecimal string. Defaults to the head + of the `"main"` branch. + + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if creating a branch on a dataset or + space, `None` or `"model"` if tagging a model. Default is `None`. + + exist_ok (`bool`, *optional*, defaults to `False`): + If `True`, do not raise an error if branch already exists. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private + but not authenticated or repo does not exist. + [`~utils.BadRequestError`]: + If invalid reference for a branch. Ex: `refs/pr/5` or 'refs/foo/bar'. + [`~utils.HfHubHTTPError`]: + If the branch already exists on the repo (error 409) and `exist_ok` is + set to `False`. + """ + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + branch = quote(branch, safe="") + + # Prepare request + branch_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/branch/{branch}" + headers = self._build_hf_headers(token=token) + payload = {} + if revision is not None: + payload["startingPoint"] = revision + + # Create branch + response = get_session().post(url=branch_url, headers=headers, json=payload) + try: + hf_raise_for_status(response) + except HfHubHTTPError as e: + if exist_ok and e.response.status_code == 409: + return + elif exist_ok and e.response.status_code == 403: + # No write permission on the namespace but branch might already exist + try: + refs = self.list_repo_refs(repo_id=repo_id, repo_type=repo_type, token=token) + for branch_ref in refs.branches: + if branch_ref.name == branch: + return # Branch already exists => do not raise + except HfHubHTTPError: + pass # We raise the original error if the branch does not exist + raise + + @validate_hf_hub_args + def delete_branch( + self, + repo_id: str, + *, + branch: str, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + ) -> None: + """ + Delete a branch from a repo on the Hub. + + Args: + repo_id (`str`): + The repository in which a branch will be deleted. + Example: `"user/my-cool-model"`. + + branch (`str`): + The name of the branch to delete. + + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if creating a branch on a dataset or + space, `None` or `"model"` if tagging a model. Default is `None`. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private + but not authenticated or repo does not exist. + [`~utils.HfHubHTTPError`]: + If trying to delete a protected branch. Ex: `main` cannot be deleted. + [`~utils.HfHubHTTPError`]: + If trying to delete a branch that does not exist. + + """ + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + branch = quote(branch, safe="") + + # Prepare request + branch_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/branch/{branch}" + headers = self._build_hf_headers(token=token) + + # Delete branch + response = get_session().delete(url=branch_url, headers=headers) + hf_raise_for_status(response) + + @validate_hf_hub_args + def create_tag( + self, + repo_id: str, + *, + tag: str, + tag_message: Optional[str] = None, + revision: Optional[str] = None, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + exist_ok: bool = False, + ) -> None: + """ + Tag a given commit of a repo on the Hub. + + Args: + repo_id (`str`): + The repository in which a commit will be tagged. + Example: `"user/my-cool-model"`. + + tag (`str`): + The name of the tag to create. + + tag_message (`str`, *optional*): + The description of the tag to create. + + revision (`str`, *optional*): + The git revision to tag. It can be a branch name or the OID/SHA of a + commit, as a hexadecimal string. Shorthands (7 first characters) are + also supported. Defaults to the head of the `"main"` branch. + + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if tagging a dataset or + space, `None` or `"model"` if tagging a model. Default is + `None`. + + exist_ok (`bool`, *optional*, defaults to `False`): + If `True`, do not raise an error if tag already exists. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private + but not authenticated or repo does not exist. + [`~utils.RevisionNotFoundError`]: + If revision is not found (error 404) on the repo. + [`~utils.HfHubHTTPError`]: + If the branch already exists on the repo (error 409) and `exist_ok` is + set to `False`. + """ + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION + + # Prepare request + tag_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/tag/{revision}" + headers = self._build_hf_headers(token=token) + payload = {"tag": tag} + if tag_message is not None: + payload["message"] = tag_message + + # Tag + response = get_session().post(url=tag_url, headers=headers, json=payload) + try: + hf_raise_for_status(response) + except HfHubHTTPError as e: + if not (e.response.status_code == 409 and exist_ok): + raise + + @validate_hf_hub_args + def delete_tag( + self, + repo_id: str, + *, + tag: str, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + ) -> None: + """ + Delete a tag from a repo on the Hub. + + Args: + repo_id (`str`): + The repository in which a tag will be deleted. + Example: `"user/my-cool-model"`. + + tag (`str`): + The name of the tag to delete. + + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if tagging a dataset or space, `None` or + `"model"` if tagging a model. Default is `None`. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private + but not authenticated or repo does not exist. + [`~utils.RevisionNotFoundError`]: + If tag is not found. + """ + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + tag = quote(tag, safe="") + + # Prepare request + tag_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/tag/{tag}" + headers = self._build_hf_headers(token=token) + + # Un-tag + response = get_session().delete(url=tag_url, headers=headers) + hf_raise_for_status(response) + + @validate_hf_hub_args + def get_full_repo_name( + self, + model_id: str, + *, + organization: Optional[str] = None, + token: Union[bool, str, None] = None, + ): + """ + Returns the repository name for a given model ID and optional + organization. + + Args: + model_id (`str`): + The name of the model. + organization (`str`, *optional*): + If passed, the repository name will be in the organization + namespace instead of the user namespace. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `str`: The repository name in the user's namespace + ({username}/{model_id}) if no organization is passed, and under the + organization namespace ({organization}/{model_id}) otherwise. + """ + if organization is None: + if "/" in model_id: + username = model_id.split("/")[0] + else: + username = self.whoami(token=token)["name"] # type: ignore + return f"{username}/{model_id}" + else: + return f"{organization}/{model_id}" + + @validate_hf_hub_args + def get_repo_discussions( + self, + repo_id: str, + *, + author: Optional[str] = None, + discussion_type: Optional[constants.DiscussionTypeFilter] = None, + discussion_status: Optional[constants.DiscussionStatusFilter] = None, + repo_type: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> Iterator[Discussion]: + """ + Fetches Discussions and Pull Requests for the given repo. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + author (`str`, *optional*): + Pass a value to filter by discussion author. `None` means no filter. + Default is `None`. + discussion_type (`str`, *optional*): + Set to `"pull_request"` to fetch only pull requests, `"discussion"` + to fetch only discussions. Set to `"all"` or `None` to fetch both. + Default is `None`. + discussion_status (`str`, *optional*): + Set to `"open"` (respectively `"closed"`) to fetch only open + (respectively closed) discussions. Set to `"all"` or `None` + to fetch both. + Default is `None`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if fetching from a dataset or + space, `None` or `"model"` if fetching from a model. Default is + `None`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Iterator[Discussion]`: An iterator of [`Discussion`] objects. + + Example: + Collecting all discussions of a repo in a list: + + ```python + >>> from huggingface_hub import get_repo_discussions + >>> discussions_list = list(get_repo_discussions(repo_id="bert-base-uncased")) + ``` + + Iterating over discussions of a repo: + + ```python + >>> from huggingface_hub import get_repo_discussions + >>> for discussion in get_repo_discussions(repo_id="bert-base-uncased"): + ... print(discussion.num, discussion.title) + ``` + """ + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + if discussion_type is not None and discussion_type not in constants.DISCUSSION_TYPES: + raise ValueError(f"Invalid discussion_type, must be one of {constants.DISCUSSION_TYPES}") + if discussion_status is not None and discussion_status not in constants.DISCUSSION_STATUS: + raise ValueError(f"Invalid discussion_status, must be one of {constants.DISCUSSION_STATUS}") + + headers = self._build_hf_headers(token=token) + path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions" + + params: Dict[str, Union[str, int]] = {} + if discussion_type is not None: + params["type"] = discussion_type + if discussion_status is not None: + params["status"] = discussion_status + if author is not None: + params["author"] = author + + def _fetch_discussion_page(page_index: int): + params["p"] = page_index + resp = get_session().get(path, headers=headers, params=params) + hf_raise_for_status(resp) + paginated_discussions = resp.json() + total = paginated_discussions["count"] + start = paginated_discussions["start"] + discussions = paginated_discussions["discussions"] + has_next = (start + len(discussions)) < total + return discussions, has_next + + has_next, page_index = True, 0 + + while has_next: + discussions, has_next = _fetch_discussion_page(page_index=page_index) + for discussion in discussions: + yield Discussion( + title=discussion["title"], + num=discussion["num"], + author=discussion.get("author", {}).get("name", "deleted"), + created_at=parse_datetime(discussion["createdAt"]), + status=discussion["status"], + repo_id=discussion["repo"]["name"], + repo_type=discussion["repo"]["type"], + is_pull_request=discussion["isPullRequest"], + endpoint=self.endpoint, + ) + page_index = page_index + 1 + + @validate_hf_hub_args + def get_discussion_details( + self, + repo_id: str, + discussion_num: int, + *, + repo_type: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> DiscussionWithDetails: + """Fetches a Discussion's / Pull Request 's details from the Hub. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + discussion_num (`int`): + The number of the Discussion or Pull Request . Must be a strictly positive integer. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: [`DiscussionWithDetails`] + + > [!TIP] + > Raises the following errors: + > + > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + > if the HuggingFace API returned an error + > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + > if some parameter value is invalid + > - [`~utils.RepositoryNotFoundError`] + > If the repository to download from cannot be found. This may be because it doesn't exist, + > or because it is set to `private` and you do not have access. + """ + if not isinstance(discussion_num, int) or discussion_num <= 0: + raise ValueError("Invalid discussion_num, must be a positive integer") + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + + path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions/{discussion_num}" + headers = self._build_hf_headers(token=token) + resp = get_session().get(path, params={"diff": "1"}, headers=headers) + hf_raise_for_status(resp) + + discussion_details = resp.json() + is_pull_request = discussion_details["isPullRequest"] + + target_branch = discussion_details["changes"]["base"] if is_pull_request else None + conflicting_files = discussion_details["filesWithConflicts"] if is_pull_request else None + merge_commit_oid = discussion_details["changes"].get("mergeCommitId", None) if is_pull_request else None + + return DiscussionWithDetails( + title=discussion_details["title"], + num=discussion_details["num"], + author=discussion_details.get("author", {}).get("name", "deleted"), + created_at=parse_datetime(discussion_details["createdAt"]), + status=discussion_details["status"], + repo_id=discussion_details["repo"]["name"], + repo_type=discussion_details["repo"]["type"], + is_pull_request=discussion_details["isPullRequest"], + events=[deserialize_event(evt) for evt in discussion_details["events"]], + conflicting_files=conflicting_files, + target_branch=target_branch, + merge_commit_oid=merge_commit_oid, + diff=discussion_details.get("diff"), + endpoint=self.endpoint, + ) + + @validate_hf_hub_args + def create_discussion( + self, + repo_id: str, + title: str, + *, + token: Union[bool, str, None] = None, + description: Optional[str] = None, + repo_type: Optional[str] = None, + pull_request: bool = False, + ) -> DiscussionWithDetails: + """Creates a Discussion or Pull Request. + + Pull Requests created programmatically will be in `"draft"` status. + + Creating a Pull Request with changes can also be done at once with [`HfApi.create_commit`]. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + title (`str`): + The title of the discussion. It can be up to 200 characters long, + and must be at least 3 characters long. Leading and trailing whitespaces + will be stripped. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + description (`str`, *optional*): + An optional description for the Pull Request. + Defaults to `"Discussion opened with the huggingface_hub Python library"` + pull_request (`bool`, *optional*): + Whether to create a Pull Request or discussion. If `True`, creates a Pull Request. + If `False`, creates a discussion. Defaults to `False`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + + Returns: [`DiscussionWithDetails`] + + > [!TIP] + > Raises the following errors: + > + > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + > if the HuggingFace API returned an error + > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + > if some parameter value is invalid + > - [`~utils.RepositoryNotFoundError`] + > If the repository to download from cannot be found. This may be because it doesn't exist, + > or because it is set to `private` and you do not have access.""" + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + + if description is not None: + description = description.strip() + description = ( + description + if description + else ( + f"{'Pull Request' if pull_request else 'Discussion'} opened with the" + " [huggingface_hub Python" + " library](https://huggingface.co/docs/huggingface_hub)" + ) + ) + + headers = self._build_hf_headers(token=token) + resp = get_session().post( + f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions", + json={ + "title": title.strip(), + "description": description, + "pullRequest": pull_request, + }, + headers=headers, + ) + hf_raise_for_status(resp) + num = resp.json()["num"] + return self.get_discussion_details( + repo_id=repo_id, + repo_type=repo_type, + discussion_num=num, + token=token, + ) + + @validate_hf_hub_args + def create_pull_request( + self, + repo_id: str, + title: str, + *, + token: Union[bool, str, None] = None, + description: Optional[str] = None, + repo_type: Optional[str] = None, + ) -> DiscussionWithDetails: + """Creates a Pull Request . Pull Requests created programmatically will be in `"draft"` status. + + Creating a Pull Request with changes can also be done at once with [`HfApi.create_commit`]; + + This is a wrapper around [`HfApi.create_discussion`]. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + title (`str`): + The title of the discussion. It can be up to 200 characters long, + and must be at least 3 characters long. Leading and trailing whitespaces + will be stripped. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + description (`str`, *optional*): + An optional description for the Pull Request. + Defaults to `"Discussion opened with the huggingface_hub Python library"` + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + + Returns: [`DiscussionWithDetails`] + + > [!TIP] + > Raises the following errors: + > + > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + > if the HuggingFace API returned an error + > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + > if some parameter value is invalid + > - [`~utils.RepositoryNotFoundError`] + > If the repository to download from cannot be found. This may be because it doesn't exist, + > or because it is set to `private` and you do not have access.""" + return self.create_discussion( + repo_id=repo_id, + title=title, + token=token, + description=description, + repo_type=repo_type, + pull_request=True, + ) + + def _post_discussion_changes( + self, + *, + repo_id: str, + discussion_num: int, + resource: str, + body: Optional[dict] = None, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + ) -> requests.Response: + """Internal utility to POST changes to a Discussion or Pull Request""" + if not isinstance(discussion_num, int) or discussion_num <= 0: + raise ValueError("Invalid discussion_num, must be a positive integer") + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + repo_id = f"{repo_type}s/{repo_id}" + + path = f"{self.endpoint}/api/{repo_id}/discussions/{discussion_num}/{resource}" + + headers = self._build_hf_headers(token=token) + resp = requests.post(path, headers=headers, json=body) + hf_raise_for_status(resp) + return resp + + @validate_hf_hub_args + def comment_discussion( + self, + repo_id: str, + discussion_num: int, + comment: str, + *, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + ) -> DiscussionComment: + """Creates a new comment on the given Discussion. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + discussion_num (`int`): + The number of the Discussion or Pull Request . Must be a strictly positive integer. + comment (`str`): + The content of the comment to create. Comments support markdown formatting. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`DiscussionComment`]: the newly created comment + + + Examples: + ```python + + >>> comment = \"\"\" + ... Hello @otheruser! + ... + ... # This is a title + ... + ... **This is bold**, *this is italic* and ~this is strikethrough~ + ... And [this](http://url) is a link + ... \"\"\" + + >>> HfApi().comment_discussion( + ... repo_id="username/repo_name", + ... discussion_num=34 + ... comment=comment + ... ) + # DiscussionComment(id='deadbeef0000000', type='comment', ...) + + ``` + + > [!TIP] + > Raises the following errors: + > + > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + > if the HuggingFace API returned an error + > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + > if some parameter value is invalid + > - [`~utils.RepositoryNotFoundError`] + > If the repository to download from cannot be found. This may be because it doesn't exist, + > or because it is set to `private` and you do not have access. + """ + resp = self._post_discussion_changes( + repo_id=repo_id, + repo_type=repo_type, + discussion_num=discussion_num, + token=token, + resource="comment", + body={"comment": comment}, + ) + return deserialize_event(resp.json()["newMessage"]) # type: ignore + + @validate_hf_hub_args + def rename_discussion( + self, + repo_id: str, + discussion_num: int, + new_title: str, + *, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + ) -> DiscussionTitleChange: + """Renames a Discussion. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + discussion_num (`int`): + The number of the Discussion or Pull Request . Must be a strictly positive integer. + new_title (`str`): + The new title for the discussion + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`DiscussionTitleChange`]: the title change event + + + Examples: + ```python + >>> new_title = "New title, fixing a typo" + >>> HfApi().rename_discussion( + ... repo_id="username/repo_name", + ... discussion_num=34 + ... new_title=new_title + ... ) + # DiscussionTitleChange(id='deadbeef0000000', type='title-change', ...) + + ``` + + > [!TIP] + > Raises the following errors: + > + > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + > if the HuggingFace API returned an error + > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + > if some parameter value is invalid + > - [`~utils.RepositoryNotFoundError`] + > If the repository to download from cannot be found. This may be because it doesn't exist, + > or because it is set to `private` and you do not have access. + """ + resp = self._post_discussion_changes( + repo_id=repo_id, + repo_type=repo_type, + discussion_num=discussion_num, + token=token, + resource="title", + body={"title": new_title}, + ) + return deserialize_event(resp.json()["newTitle"]) # type: ignore + + @validate_hf_hub_args + def change_discussion_status( + self, + repo_id: str, + discussion_num: int, + new_status: Literal["open", "closed"], + *, + token: Union[bool, str, None] = None, + comment: Optional[str] = None, + repo_type: Optional[str] = None, + ) -> DiscussionStatusChange: + """Closes or re-opens a Discussion or Pull Request. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + discussion_num (`int`): + The number of the Discussion or Pull Request . Must be a strictly positive integer. + new_status (`str`): + The new status for the discussion, either `"open"` or `"closed"`. + comment (`str`, *optional*): + An optional comment to post with the status change. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`DiscussionStatusChange`]: the status change event + + + Examples: + ```python + >>> new_title = "New title, fixing a typo" + >>> HfApi().rename_discussion( + ... repo_id="username/repo_name", + ... discussion_num=34 + ... new_title=new_title + ... ) + # DiscussionStatusChange(id='deadbeef0000000', type='status-change', ...) + + ``` + + > [!TIP] + > Raises the following errors: + > + > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + > if the HuggingFace API returned an error + > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + > if some parameter value is invalid + > - [`~utils.RepositoryNotFoundError`] + > If the repository to download from cannot be found. This may be because it doesn't exist, + > or because it is set to `private` and you do not have access. + """ + if new_status not in ["open", "closed"]: + raise ValueError("Invalid status, valid statuses are: 'open' and 'closed'") + body: Dict[str, str] = {"status": new_status} + if comment and comment.strip(): + body["comment"] = comment.strip() + resp = self._post_discussion_changes( + repo_id=repo_id, + repo_type=repo_type, + discussion_num=discussion_num, + token=token, + resource="status", + body=body, + ) + return deserialize_event(resp.json()["newStatus"]) # type: ignore + + @validate_hf_hub_args + def merge_pull_request( + self, + repo_id: str, + discussion_num: int, + *, + token: Union[bool, str, None] = None, + comment: Optional[str] = None, + repo_type: Optional[str] = None, + ): + """Merges a Pull Request. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + discussion_num (`int`): + The number of the Discussion or Pull Request . Must be a strictly positive integer. + comment (`str`, *optional*): + An optional comment to post with the status change. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`DiscussionStatusChange`]: the status change event + + > [!TIP] + > Raises the following errors: + > + > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + > if the HuggingFace API returned an error + > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + > if some parameter value is invalid + > - [`~utils.RepositoryNotFoundError`] + > If the repository to download from cannot be found. This may be because it doesn't exist, + > or because it is set to `private` and you do not have access. + """ + self._post_discussion_changes( + repo_id=repo_id, + repo_type=repo_type, + discussion_num=discussion_num, + token=token, + resource="merge", + body={"comment": comment.strip()} if comment and comment.strip() else None, + ) + + @validate_hf_hub_args + def edit_discussion_comment( + self, + repo_id: str, + discussion_num: int, + comment_id: str, + new_content: str, + *, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + ) -> DiscussionComment: + """Edits a comment on a Discussion / Pull Request. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + discussion_num (`int`): + The number of the Discussion or Pull Request . Must be a strictly positive integer. + comment_id (`str`): + The ID of the comment to edit. + new_content (`str`): + The new content of the comment. Comments support markdown formatting. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`DiscussionComment`]: the edited comment + + > [!TIP] + > Raises the following errors: + > + > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + > if the HuggingFace API returned an error + > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + > if some parameter value is invalid + > - [`~utils.RepositoryNotFoundError`] + > If the repository to download from cannot be found. This may be because it doesn't exist, + > or because it is set to `private` and you do not have access. + """ + resp = self._post_discussion_changes( + repo_id=repo_id, + repo_type=repo_type, + discussion_num=discussion_num, + token=token, + resource=f"comment/{comment_id.lower()}/edit", + body={"content": new_content}, + ) + return deserialize_event(resp.json()["updatedComment"]) # type: ignore + + @validate_hf_hub_args + def hide_discussion_comment( + self, + repo_id: str, + discussion_num: int, + comment_id: str, + *, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + ) -> DiscussionComment: + """Hides a comment on a Discussion / Pull Request. + + > [!WARNING] + > Hidden comments' content cannot be retrieved anymore. Hiding a comment is irreversible. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + discussion_num (`int`): + The number of the Discussion or Pull Request . Must be a strictly positive integer. + comment_id (`str`): + The ID of the comment to edit. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`DiscussionComment`]: the hidden comment + + > [!TIP] + > Raises the following errors: + > + > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + > if the HuggingFace API returned an error + > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + > if some parameter value is invalid + > - [`~utils.RepositoryNotFoundError`] + > If the repository to download from cannot be found. This may be because it doesn't exist, + > or because it is set to `private` and you do not have access. + """ + warnings.warn( + "Hidden comments' content cannot be retrieved anymore. Hiding a comment is irreversible.", + UserWarning, + ) + resp = self._post_discussion_changes( + repo_id=repo_id, + repo_type=repo_type, + discussion_num=discussion_num, + token=token, + resource=f"comment/{comment_id.lower()}/hide", + ) + return deserialize_event(resp.json()["updatedComment"]) # type: ignore + + @validate_hf_hub_args + def add_space_secret( + self, + repo_id: str, + key: str, + value: str, + *, + description: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> None: + """Adds or updates a secret in a Space. + + Secrets allow to set secret keys or tokens to a Space without hardcoding them. + For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets. + + Args: + repo_id (`str`): + ID of the repo to update. Example: `"bigcode/in-the-stack"`. + key (`str`): + Secret key. Example: `"GITHUB_API_KEY"` + value (`str`): + Secret value. Example: `"your_github_api_key"`. + description (`str`, *optional*): + Secret description. Example: `"Github API key to access the Github API"`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + """ + payload = {"key": key, "value": value} + if description is not None: + payload["description"] = description + r = get_session().post( + f"{self.endpoint}/api/spaces/{repo_id}/secrets", + headers=self._build_hf_headers(token=token), + json=payload, + ) + hf_raise_for_status(r) + + @validate_hf_hub_args + def delete_space_secret(self, repo_id: str, key: str, *, token: Union[bool, str, None] = None) -> None: + """Deletes a secret from a Space. + + Secrets allow to set secret keys or tokens to a Space without hardcoding them. + For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets. + + Args: + repo_id (`str`): + ID of the repo to update. Example: `"bigcode/in-the-stack"`. + key (`str`): + Secret key. Example: `"GITHUB_API_KEY"`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + """ + r = get_session().delete( + f"{self.endpoint}/api/spaces/{repo_id}/secrets", + headers=self._build_hf_headers(token=token), + json={"key": key}, + ) + hf_raise_for_status(r) + + @validate_hf_hub_args + def get_space_variables(self, repo_id: str, *, token: Union[bool, str, None] = None) -> Dict[str, SpaceVariable]: + """Gets all variables from a Space. + + Variables allow to set environment variables to a Space without hardcoding them. + For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables + + Args: + repo_id (`str`): + ID of the repo to query. Example: `"bigcode/in-the-stack"`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + """ + r = get_session().get( + f"{self.endpoint}/api/spaces/{repo_id}/variables", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(r) + return {k: SpaceVariable(k, v) for k, v in r.json().items()} + + @validate_hf_hub_args + def add_space_variable( + self, + repo_id: str, + key: str, + value: str, + *, + description: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> Dict[str, SpaceVariable]: + """Adds or updates a variable in a Space. + + Variables allow to set environment variables to a Space without hardcoding them. + For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables + + Args: + repo_id (`str`): + ID of the repo to update. Example: `"bigcode/in-the-stack"`. + key (`str`): + Variable key. Example: `"MODEL_REPO_ID"` + value (`str`): + Variable value. Example: `"the_model_repo_id"`. + description (`str`): + Description of the variable. Example: `"Model Repo ID of the implemented model"`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + """ + payload = {"key": key, "value": value} + if description is not None: + payload["description"] = description + r = get_session().post( + f"{self.endpoint}/api/spaces/{repo_id}/variables", + headers=self._build_hf_headers(token=token), + json=payload, + ) + hf_raise_for_status(r) + return {k: SpaceVariable(k, v) for k, v in r.json().items()} + + @validate_hf_hub_args + def delete_space_variable( + self, repo_id: str, key: str, *, token: Union[bool, str, None] = None + ) -> Dict[str, SpaceVariable]: + """Deletes a variable from a Space. + + Variables allow to set environment variables to a Space without hardcoding them. + For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables + + Args: + repo_id (`str`): + ID of the repo to update. Example: `"bigcode/in-the-stack"`. + key (`str`): + Variable key. Example: `"MODEL_REPO_ID"` + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + """ + r = get_session().delete( + f"{self.endpoint}/api/spaces/{repo_id}/variables", + headers=self._build_hf_headers(token=token), + json={"key": key}, + ) + hf_raise_for_status(r) + return {k: SpaceVariable(k, v) for k, v in r.json().items()} + + @validate_hf_hub_args + def get_space_runtime(self, repo_id: str, *, token: Union[bool, str, None] = None) -> SpaceRuntime: + """Gets runtime information about a Space. + + Args: + repo_id (`str`): + ID of the repo to update. Example: `"bigcode/in-the-stack"`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + Returns: + [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware. + """ + r = get_session().get( + f"{self.endpoint}/api/spaces/{repo_id}/runtime", headers=self._build_hf_headers(token=token) + ) + hf_raise_for_status(r) + return SpaceRuntime(r.json()) + + @validate_hf_hub_args + def request_space_hardware( + self, + repo_id: str, + hardware: SpaceHardware, + *, + token: Union[bool, str, None] = None, + sleep_time: Optional[int] = None, + ) -> SpaceRuntime: + """Request new hardware for a Space. + + Args: + repo_id (`str`): + ID of the repo to update. Example: `"bigcode/in-the-stack"`. + hardware (`str` or [`SpaceHardware`]): + Hardware on which to run the Space. Example: `"t4-medium"`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + sleep_time (`int`, *optional*): + Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want + your Space to sleep (default behavior for upgraded hardware). For free hardware, you can't configure + the sleep time (value is fixed to 48 hours of inactivity). + See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details. + Returns: + [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware. + + > [!TIP] + > It is also possible to request hardware directly when creating the Space repo! See [`create_repo`] for details. + """ + if sleep_time is not None and hardware == SpaceHardware.CPU_BASIC: + warnings.warn( + "If your Space runs on the default 'cpu-basic' hardware, it will go to sleep if inactive for more" + " than 48 hours. This value is not configurable. If you don't want your Space to deactivate or if" + " you want to set a custom sleep time, you need to upgrade to a paid Hardware.", + UserWarning, + ) + payload: Dict[str, Any] = {"flavor": hardware} + if sleep_time is not None: + payload["sleepTimeSeconds"] = sleep_time + r = get_session().post( + f"{self.endpoint}/api/spaces/{repo_id}/hardware", + headers=self._build_hf_headers(token=token), + json=payload, + ) + hf_raise_for_status(r) + return SpaceRuntime(r.json()) + + @validate_hf_hub_args + def set_space_sleep_time( + self, repo_id: str, sleep_time: int, *, token: Union[bool, str, None] = None + ) -> SpaceRuntime: + """Set a custom sleep time for a Space running on upgraded hardware.. + + Your Space will go to sleep after X seconds of inactivity. You are not billed when your Space is in "sleep" + mode. If a new visitor lands on your Space, it will "wake it up". Only upgraded hardware can have a + configurable sleep time. To know more about the sleep stage, please refer to + https://huggingface.co/docs/hub/spaces-gpus#sleep-time. + + Args: + repo_id (`str`): + ID of the repo to update. Example: `"bigcode/in-the-stack"`. + sleep_time (`int`, *optional*): + Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want + your Space to pause (default behavior for upgraded hardware). For free hardware, you can't configure + the sleep time (value is fixed to 48 hours of inactivity). + See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + Returns: + [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware. + + > [!TIP] + > It is also possible to set a custom sleep time when requesting hardware with [`request_space_hardware`]. + """ + r = get_session().post( + f"{self.endpoint}/api/spaces/{repo_id}/sleeptime", + headers=self._build_hf_headers(token=token), + json={"seconds": sleep_time}, + ) + hf_raise_for_status(r) + runtime = SpaceRuntime(r.json()) + + hardware = runtime.requested_hardware or runtime.hardware + if hardware == SpaceHardware.CPU_BASIC: + warnings.warn( + "If your Space runs on the default 'cpu-basic' hardware, it will go to sleep if inactive for more" + " than 48 hours. This value is not configurable. If you don't want your Space to deactivate or if" + " you want to set a custom sleep time, you need to upgrade to a paid Hardware.", + UserWarning, + ) + return runtime + + @validate_hf_hub_args + def pause_space(self, repo_id: str, *, token: Union[bool, str, None] = None) -> SpaceRuntime: + """Pause your Space. + + A paused Space stops executing until manually restarted by its owner. This is different from the sleeping + state in which free Spaces go after 48h of inactivity. Paused time is not billed to your account, no matter the + hardware you've selected. To restart your Space, use [`restart_space`] and go to your Space settings page. + + For more details, please visit [the docs](https://huggingface.co/docs/hub/spaces-gpus#pause). + + Args: + repo_id (`str`): + ID of the Space to pause. Example: `"Salesforce/BLIP2"`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`SpaceRuntime`]: Runtime information about your Space including `stage=PAUSED` and requested hardware. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If your Space is not found (error 404). Most probably wrong repo_id or your space is private but you + are not authenticated. + [`~utils.HfHubHTTPError`]: + 403 Forbidden: only the owner of a Space can pause it. If you want to manage a Space that you don't + own, either ask the owner by opening a Discussion or duplicate the Space. + [`~utils.BadRequestError`]: + If your Space is a static Space. Static Spaces are always running and never billed. If you want to hide + a static Space, you can set it to private. + """ + r = get_session().post( + f"{self.endpoint}/api/spaces/{repo_id}/pause", headers=self._build_hf_headers(token=token) + ) + hf_raise_for_status(r) + return SpaceRuntime(r.json()) + + @validate_hf_hub_args + def restart_space( + self, repo_id: str, *, token: Union[bool, str, None] = None, factory_reboot: bool = False + ) -> SpaceRuntime: + """Restart your Space. + + This is the only way to programmatically restart a Space if you've put it on Pause (see [`pause_space`]). You + must be the owner of the Space to restart it. If you are using an upgraded hardware, your account will be + billed as soon as the Space is restarted. You can trigger a restart no matter the current state of a Space. + + For more details, please visit [the docs](https://huggingface.co/docs/hub/spaces-gpus#pause). + + Args: + repo_id (`str`): + ID of the Space to restart. Example: `"Salesforce/BLIP2"`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + factory_reboot (`bool`, *optional*): + If `True`, the Space will be rebuilt from scratch without caching any requirements. + + Returns: + [`SpaceRuntime`]: Runtime information about your Space. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If your Space is not found (error 404). Most probably wrong repo_id or your space is private but you + are not authenticated. + [`~utils.HfHubHTTPError`]: + 403 Forbidden: only the owner of a Space can restart it. If you want to restart a Space that you don't + own, either ask the owner by opening a Discussion or duplicate the Space. + [`~utils.BadRequestError`]: + If your Space is a static Space. Static Spaces are always running and never billed. If you want to hide + a static Space, you can set it to private. + """ + params = {} + if factory_reboot: + params["factory"] = "true" + r = get_session().post( + f"{self.endpoint}/api/spaces/{repo_id}/restart", headers=self._build_hf_headers(token=token), params=params + ) + hf_raise_for_status(r) + return SpaceRuntime(r.json()) + + @validate_hf_hub_args + def duplicate_space( + self, + from_id: str, + to_id: Optional[str] = None, + *, + private: Optional[bool] = None, + token: Union[bool, str, None] = None, + exist_ok: bool = False, + hardware: Optional[SpaceHardware] = None, + storage: Optional[SpaceStorage] = None, + sleep_time: Optional[int] = None, + secrets: Optional[List[Dict[str, str]]] = None, + variables: Optional[List[Dict[str, str]]] = None, + ) -> RepoUrl: + """Duplicate a Space. + + Programmatically duplicate a Space. The new Space will be created in your account and will be in the same state + as the original Space (running or paused). You can duplicate a Space no matter the current state of a Space. + + Args: + from_id (`str`): + ID of the Space to duplicate. Example: `"pharma/CLIP-Interrogator"`. + to_id (`str`, *optional*): + ID of the new Space. Example: `"dog/CLIP-Interrogator"`. If not provided, the new Space will have the same + name as the original Space, but in your account. + private (`bool`, *optional*): + Whether the new Space should be private or not. Defaults to the same privacy as the original Space. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + exist_ok (`bool`, *optional*, defaults to `False`): + If `True`, do not raise an error if repo already exists. + hardware (`SpaceHardware` or `str`, *optional*): + Choice of Hardware. Example: `"t4-medium"`. See [`SpaceHardware`] for a complete list. + storage (`SpaceStorage` or `str`, *optional*): + Choice of persistent storage tier. Example: `"small"`. See [`SpaceStorage`] for a complete list. + sleep_time (`int`, *optional*): + Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want + your Space to sleep (default behavior for upgraded hardware). For free hardware, you can't configure + the sleep time (value is fixed to 48 hours of inactivity). + See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details. + secrets (`List[Dict[str, str]]`, *optional*): + A list of secret keys to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional. + For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets. + variables (`List[Dict[str, str]]`, *optional*): + A list of public environment variables to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional. + For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables. + + Returns: + [`RepoUrl`]: URL to the newly created repo. Value is a subclass of `str` containing + attributes like `endpoint`, `repo_type` and `repo_id`. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If one of `from_id` or `to_id` cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + If the HuggingFace API returned an error + + Example: + ```python + >>> from huggingface_hub import duplicate_space + + # Duplicate a Space to your account + >>> duplicate_space("multimodalart/dreambooth-training") + RepoUrl('https://huggingface.co/spaces/nateraw/dreambooth-training',...) + + # Can set custom destination id and visibility flag. + >>> duplicate_space("multimodalart/dreambooth-training", to_id="my-dreambooth", private=True) + RepoUrl('https://huggingface.co/spaces/nateraw/my-dreambooth',...) + ``` + """ + # Parse to_id if provided + parsed_to_id = RepoUrl(to_id) if to_id is not None else None + + # Infer target repo_id + to_namespace = ( # set namespace manually or default to username + parsed_to_id.namespace + if parsed_to_id is not None and parsed_to_id.namespace is not None + else self.whoami(token)["name"] + ) + to_repo_name = parsed_to_id.repo_name if to_id is not None else RepoUrl(from_id).repo_name # type: ignore + + # repository must be a valid repo_id (namespace/repo_name). + payload: Dict[str, Any] = {"repository": f"{to_namespace}/{to_repo_name}"} + + keys = ["private", "hardware", "storageTier", "sleepTimeSeconds", "secrets", "variables"] + values = [private, hardware, storage, sleep_time, secrets, variables] + payload.update({k: v for k, v in zip(keys, values) if v is not None}) + + if sleep_time is not None and hardware == SpaceHardware.CPU_BASIC: + warnings.warn( + "If your Space runs on the default 'cpu-basic' hardware, it will go to sleep if inactive for more" + " than 48 hours. This value is not configurable. If you don't want your Space to deactivate or if" + " you want to set a custom sleep time, you need to upgrade to a paid Hardware.", + UserWarning, + ) + + r = get_session().post( + f"{self.endpoint}/api/spaces/{from_id}/duplicate", + headers=self._build_hf_headers(token=token), + json=payload, + ) + + try: + hf_raise_for_status(r) + except HTTPError as err: + if exist_ok and err.response.status_code == 409: + # Repo already exists and `exist_ok=True` + pass + else: + raise + + return RepoUrl(r.json()["url"], endpoint=self.endpoint) + + @validate_hf_hub_args + def request_space_storage( + self, + repo_id: str, + storage: SpaceStorage, + *, + token: Union[bool, str, None] = None, + ) -> SpaceRuntime: + """Request persistent storage for a Space. + + Args: + repo_id (`str`): + ID of the Space to update. Example: `"open-llm-leaderboard/open_llm_leaderboard"`. + storage (`str` or [`SpaceStorage`]): + Storage tier. Either 'small', 'medium', or 'large'. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + Returns: + [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware. + + > [!TIP] + > It is not possible to decrease persistent storage after its granted. To do so, you must delete it + > via [`delete_space_storage`]. + """ + payload: Dict[str, SpaceStorage] = {"tier": storage} + r = get_session().post( + f"{self.endpoint}/api/spaces/{repo_id}/storage", + headers=self._build_hf_headers(token=token), + json=payload, + ) + hf_raise_for_status(r) + return SpaceRuntime(r.json()) + + @validate_hf_hub_args + def delete_space_storage( + self, + repo_id: str, + *, + token: Union[bool, str, None] = None, + ) -> SpaceRuntime: + """Delete persistent storage for a Space. + + Args: + repo_id (`str`): + ID of the Space to update. Example: `"open-llm-leaderboard/open_llm_leaderboard"`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + Returns: + [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware. + Raises: + [`BadRequestError`] + If space has no persistent storage. + + """ + r = get_session().delete( + f"{self.endpoint}/api/spaces/{repo_id}/storage", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(r) + return SpaceRuntime(r.json()) + + ####################### + # Inference Endpoints # + ####################### + + def list_inference_endpoints( + self, namespace: Optional[str] = None, *, token: Union[bool, str, None] = None + ) -> List[InferenceEndpoint]: + """Lists all inference endpoints for the given namespace. + + Args: + namespace (`str`, *optional*): + The namespace to list endpoints for. Defaults to the current user. Set to `"*"` to list all endpoints + from all namespaces (i.e. personal namespace and all orgs the user belongs to). + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + List[`InferenceEndpoint`]: A list of all inference endpoints for the given namespace. + + Example: + ```python + >>> from huggingface_hub import HfApi + >>> api = HfApi() + >>> api.list_inference_endpoints() + [InferenceEndpoint(name='my-endpoint', ...), ...] + ``` + """ + # Special case: list all endpoints for all namespaces the user has access to + if namespace == "*": + user = self.whoami(token=token) + + # List personal endpoints first + endpoints: List[InferenceEndpoint] = list_inference_endpoints(namespace=self._get_namespace(token=token)) + + # Then list endpoints for all orgs the user belongs to and ignore 401 errors (no billing or no access) + for org in user.get("orgs", []): + try: + endpoints += list_inference_endpoints(namespace=org["name"], token=token) + except HfHubHTTPError as error: + if error.response.status_code == 401: # Either no billing or user don't have access) + logger.debug("Cannot list Inference Endpoints for org '%s': %s", org["name"], error) + pass + + return endpoints + + # Normal case: list endpoints for a specific namespace + namespace = namespace or self._get_namespace(token=token) + + response = get_session().get( + f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + + return [ + InferenceEndpoint.from_raw(endpoint, namespace=namespace, token=token) + for endpoint in response.json()["items"] + ] + + def create_inference_endpoint( + self, + name: str, + *, + repository: str, + framework: str, + accelerator: str, + instance_size: str, + instance_type: str, + region: str, + vendor: str, + account_id: Optional[str] = None, + min_replica: int = 1, + max_replica: int = 1, + scale_to_zero_timeout: Optional[int] = None, + revision: Optional[str] = None, + task: Optional[str] = None, + custom_image: Optional[Dict] = None, + env: Optional[Dict[str, str]] = None, + secrets: Optional[Dict[str, str]] = None, + type: InferenceEndpointType = InferenceEndpointType.PROTECTED, + domain: Optional[str] = None, + path: Optional[str] = None, + cache_http_responses: Optional[bool] = None, + tags: Optional[List[str]] = None, + namespace: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> InferenceEndpoint: + """Create a new Inference Endpoint. + + Args: + name (`str`): + The unique name for the new Inference Endpoint. + repository (`str`): + The name of the model repository associated with the Inference Endpoint (e.g. `"gpt2"`). + framework (`str`): + The machine learning framework used for the model (e.g. `"custom"`). + accelerator (`str`): + The hardware accelerator to be used for inference (e.g. `"cpu"`). + instance_size (`str`): + The size or type of the instance to be used for hosting the model (e.g. `"x4"`). + instance_type (`str`): + The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`). + region (`str`): + The cloud region in which the Inference Endpoint will be created (e.g. `"us-east-1"`). + vendor (`str`): + The cloud provider or vendor where the Inference Endpoint will be hosted (e.g. `"aws"`). + account_id (`str`, *optional*): + The account ID used to link a VPC to a private Inference Endpoint (if applicable). + min_replica (`int`, *optional*): + The minimum number of replicas (instances) to keep running for the Inference Endpoint. To enable + scaling to zero, set this value to 0 and adjust `scale_to_zero_timeout` accordingly. Defaults to 1. + max_replica (`int`, *optional*): + The maximum number of replicas (instances) to scale to for the Inference Endpoint. Defaults to 1. + scale_to_zero_timeout (`int`, *optional*): + The duration in minutes before an inactive endpoint is scaled to zero, or no scaling to zero if + set to None and `min_replica` is not 0. Defaults to None. + revision (`str`, *optional*): + The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`). + task (`str`, *optional*): + The task on which to deploy the model (e.g. `"text-classification"`). + custom_image (`Dict`, *optional*): + A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an + Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples). + env (`Dict[str, str]`, *optional*): + Non-secret environment variables to inject in the container environment. + secrets (`Dict[str, str]`, *optional*): + Secret values to inject in the container environment. + type ([`InferenceEndpointType]`, *optional*): + The type of the Inference Endpoint, which can be `"protected"` (default), `"public"` or `"private"`. + domain (`str`, *optional*): + The custom domain for the Inference Endpoint deployment, if setup the inference endpoint will be available at this domain (e.g. `"my-new-domain.cool-website.woof"`). + path (`str`, *optional*): + The custom path to the deployed model, should start with a `/` (e.g. `"/models/google-bert/bert-base-uncased"`). + cache_http_responses (`bool`, *optional*): + Whether to cache HTTP responses from the Inference Endpoint. Defaults to `False`. + tags (`List[str]`, *optional*): + A list of tags to associate with the Inference Endpoint. + namespace (`str`, *optional*): + The namespace where the Inference Endpoint will be created. Defaults to the current user's namespace. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`InferenceEndpoint`]: information about the updated Inference Endpoint. + + Example: + ```python + >>> from huggingface_hub import HfApi + >>> api = HfApi() + >>> endpoint = api.create_inference_endpoint( + ... "my-endpoint-name", + ... repository="gpt2", + ... framework="pytorch", + ... task="text-generation", + ... accelerator="cpu", + ... vendor="aws", + ... region="us-east-1", + ... type="protected", + ... instance_size="x2", + ... instance_type="intel-icl", + ... ) + >>> endpoint + InferenceEndpoint(name='my-endpoint-name', status="pending",...) + + # Run inference on the endpoint + >>> endpoint.client.text_generation(...) + "..." + ``` + + ```python + # Start an Inference Endpoint running Zephyr-7b-beta on TGI + >>> from huggingface_hub import HfApi + >>> api = HfApi() + >>> endpoint = api.create_inference_endpoint( + ... "aws-zephyr-7b-beta-0486", + ... repository="HuggingFaceH4/zephyr-7b-beta", + ... framework="pytorch", + ... task="text-generation", + ... accelerator="gpu", + ... vendor="aws", + ... region="us-east-1", + ... type="protected", + ... instance_size="x1", + ... instance_type="nvidia-a10g", + ... env={ + ... "MAX_BATCH_PREFILL_TOKENS": "2048", + ... "MAX_INPUT_LENGTH": "1024", + ... "MAX_TOTAL_TOKENS": "1512", + ... "MODEL_ID": "/repository" + ... }, + ... custom_image={ + ... "health_route": "/health", + ... "url": "ghcr.io/huggingface/text-generation-inference:1.1.0", + ... }, + ... secrets={"MY_SECRET_KEY": "secret_value"}, + ... tags=["dev", "text-generation"], + ... ) + ``` + + ```python + # Start an Inference Endpoint running ProsusAI/finbert while scaling to zero in 15 minutes + >>> from huggingface_hub import HfApi + >>> api = HfApi() + >>> endpoint = api.create_inference_endpoint( + ... "finbert-classifier", + ... repository="ProsusAI/finbert", + ... framework="pytorch", + ... task="text-classification", + ... min_replica=0, + ... scale_to_zero_timeout=15, + ... accelerator="cpu", + ... vendor="aws", + ... region="us-east-1", + ... type="protected", + ... instance_size="x2", + ... instance_type="intel-icl", + ... ) + >>> endpoint.wait(timeout=300) + # Run inference on the endpoint + >>> endpoint.client.text_generation(...) + TextClassificationOutputElement(label='positive', score=0.8983615040779114) + ``` + + """ + namespace = namespace or self._get_namespace(token=token) + + if custom_image is not None: + image = ( + custom_image + if next(iter(custom_image)) in constants.INFERENCE_ENDPOINT_IMAGE_KEYS + else {"custom": custom_image} + ) + else: + image = {"huggingface": {}} + + payload: Dict = { + "accountId": account_id, + "compute": { + "accelerator": accelerator, + "instanceSize": instance_size, + "instanceType": instance_type, + "scaling": { + "maxReplica": max_replica, + "minReplica": min_replica, + "scaleToZeroTimeout": scale_to_zero_timeout, + }, + }, + "model": { + "framework": framework, + "repository": repository, + "revision": revision, + "task": task, + "image": image, + }, + "name": name, + "provider": { + "region": region, + "vendor": vendor, + }, + "type": type, + } + if env: + payload["model"]["env"] = env + if secrets: + payload["model"]["secrets"] = secrets + if domain is not None or path is not None: + payload["route"] = {} + if domain is not None: + payload["route"]["domain"] = domain + if path is not None: + payload["route"]["path"] = path + if cache_http_responses is not None: + payload["cacheHttpResponses"] = cache_http_responses + if tags is not None: + payload["tags"] = tags + + response = get_session().post( + f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}", + headers=self._build_hf_headers(token=token), + json=payload, + ) + hf_raise_for_status(response) + + return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) + + @experimental + @validate_hf_hub_args + def create_inference_endpoint_from_catalog( + self, + repo_id: str, + *, + name: Optional[str] = None, + token: Union[bool, str, None] = None, + namespace: Optional[str] = None, + ) -> InferenceEndpoint: + """Create a new Inference Endpoint from a model in the Hugging Face Inference Catalog. + + The goal of the Inference Catalog is to provide a curated list of models that are optimized for inference + and for which default configurations have been tested. See https://endpoints.huggingface.co/catalog for a list + of available models in the catalog. + + Args: + repo_id (`str`): + The ID of the model in the catalog to deploy as an Inference Endpoint. + name (`str`, *optional*): + The unique name for the new Inference Endpoint. If not provided, a random name will be generated. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + namespace (`str`, *optional*): + The namespace where the Inference Endpoint will be created. Defaults to the current user's namespace. + + Returns: + [`InferenceEndpoint`]: information about the new Inference Endpoint. + + > [!WARNING] + > `create_inference_endpoint_from_catalog` is experimental. Its API is subject to change in the future. Please provide feedback + > if you have any suggestions or requests. + """ + token = token or self.token or get_token() + payload: Dict = { + "namespace": namespace or self._get_namespace(token=token), + "repoId": repo_id, + } + if name is not None: + payload["endpointName"] = name + + response = get_session().post( + f"{constants.INFERENCE_CATALOG_ENDPOINT}/deploy", + headers=self._build_hf_headers(token=token), + json=payload, + ) + hf_raise_for_status(response) + data = response.json()["endpoint"] + return InferenceEndpoint.from_raw(data, namespace=data["name"], token=token) + + @experimental + @validate_hf_hub_args + def list_inference_catalog(self, *, token: Union[bool, str, None] = None) -> List[str]: + """List models available in the Hugging Face Inference Catalog. + + The goal of the Inference Catalog is to provide a curated list of models that are optimized for inference + and for which default configurations have been tested. See https://endpoints.huggingface.co/catalog for a list + of available models in the catalog. + + Use [`create_inference_endpoint_from_catalog`] to deploy a model from the catalog. + + Args: + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + + Returns: + List[`str`]: A list of model IDs available in the catalog. + > [!WARNING] + > `list_inference_catalog` is experimental. Its API is subject to change in the future. Please provide feedback + > if you have any suggestions or requests. + """ + response = get_session().get( + f"{constants.INFERENCE_CATALOG_ENDPOINT}/repo-list", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + return response.json()["models"] + + def get_inference_endpoint( + self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None + ) -> InferenceEndpoint: + """Get information about an Inference Endpoint. + + Args: + name (`str`): + The name of the Inference Endpoint to retrieve information about. + namespace (`str`, *optional*): + The namespace in which the Inference Endpoint is located. Defaults to the current user. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`InferenceEndpoint`]: information about the requested Inference Endpoint. + + Example: + ```python + >>> from huggingface_hub import HfApi + >>> api = HfApi() + >>> endpoint = api.get_inference_endpoint("my-text-to-image") + >>> endpoint + InferenceEndpoint(name='my-text-to-image', ...) + + # Get status + >>> endpoint.status + 'running' + >>> endpoint.url + 'https://my-text-to-image.region.vendor.endpoints.huggingface.cloud' + + # Run inference + >>> endpoint.client.text_to_image(...) + ``` + """ + namespace = namespace or self._get_namespace(token=token) + + response = get_session().get( + f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + + return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) + + def update_inference_endpoint( + self, + name: str, + *, + # Compute update + accelerator: Optional[str] = None, + instance_size: Optional[str] = None, + instance_type: Optional[str] = None, + min_replica: Optional[int] = None, + max_replica: Optional[int] = None, + scale_to_zero_timeout: Optional[int] = None, + # Model update + repository: Optional[str] = None, + framework: Optional[str] = None, + revision: Optional[str] = None, + task: Optional[str] = None, + custom_image: Optional[Dict] = None, + env: Optional[Dict[str, str]] = None, + secrets: Optional[Dict[str, str]] = None, + # Route update + domain: Optional[str] = None, + path: Optional[str] = None, + # Other + cache_http_responses: Optional[bool] = None, + tags: Optional[List[str]] = None, + namespace: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> InferenceEndpoint: + """Update an Inference Endpoint. + + This method allows the update of either the compute configuration, the deployed model, the route, or any combination. + All arguments are optional but at least one must be provided. + + For convenience, you can also update an Inference Endpoint using [`InferenceEndpoint.update`]. + + Args: + name (`str`): + The name of the Inference Endpoint to update. + + accelerator (`str`, *optional*): + The hardware accelerator to be used for inference (e.g. `"cpu"`). + instance_size (`str`, *optional*): + The size or type of the instance to be used for hosting the model (e.g. `"x4"`). + instance_type (`str`, *optional*): + The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`). + min_replica (`int`, *optional*): + The minimum number of replicas (instances) to keep running for the Inference Endpoint. + max_replica (`int`, *optional*): + The maximum number of replicas (instances) to scale to for the Inference Endpoint. + scale_to_zero_timeout (`int`, *optional*): + The duration in minutes before an inactive endpoint is scaled to zero. + + repository (`str`, *optional*): + The name of the model repository associated with the Inference Endpoint (e.g. `"gpt2"`). + framework (`str`, *optional*): + The machine learning framework used for the model (e.g. `"custom"`). + revision (`str`, *optional*): + The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`). + task (`str`, *optional*): + The task on which to deploy the model (e.g. `"text-classification"`). + custom_image (`Dict`, *optional*): + A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an + Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples). + env (`Dict[str, str]`, *optional*): + Non-secret environment variables to inject in the container environment + secrets (`Dict[str, str]`, *optional*): + Secret values to inject in the container environment. + + domain (`str`, *optional*): + The custom domain for the Inference Endpoint deployment, if setup the inference endpoint will be available at this domain (e.g. `"my-new-domain.cool-website.woof"`). + path (`str`, *optional*): + The custom path to the deployed model, should start with a `/` (e.g. `"/models/google-bert/bert-base-uncased"`). + + cache_http_responses (`bool`, *optional*): + Whether to cache HTTP responses from the Inference Endpoint. + tags (`List[str]`, *optional*): + A list of tags to associate with the Inference Endpoint. + + namespace (`str`, *optional*): + The namespace where the Inference Endpoint will be updated. Defaults to the current user's namespace. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`InferenceEndpoint`]: information about the updated Inference Endpoint. + """ + namespace = namespace or self._get_namespace(token=token) + + # Populate only the fields that are not None + payload: Dict = defaultdict(lambda: defaultdict(dict)) + if accelerator is not None: + payload["compute"]["accelerator"] = accelerator + if instance_size is not None: + payload["compute"]["instanceSize"] = instance_size + if instance_type is not None: + payload["compute"]["instanceType"] = instance_type + if max_replica is not None: + payload["compute"]["scaling"]["maxReplica"] = max_replica + if min_replica is not None: + payload["compute"]["scaling"]["minReplica"] = min_replica + if scale_to_zero_timeout is not None: + payload["compute"]["scaling"]["scaleToZeroTimeout"] = scale_to_zero_timeout + if repository is not None: + payload["model"]["repository"] = repository + if framework is not None: + payload["model"]["framework"] = framework + if revision is not None: + payload["model"]["revision"] = revision + if task is not None: + payload["model"]["task"] = task + if custom_image is not None: + payload["model"]["image"] = {"custom": custom_image} + if env is not None: + payload["model"]["env"] = env + if secrets is not None: + payload["model"]["secrets"] = secrets + if domain is not None: + payload["route"]["domain"] = domain + if path is not None: + payload["route"]["path"] = path + if cache_http_responses is not None: + payload["cacheHttpResponses"] = cache_http_responses + if tags is not None: + payload["tags"] = tags + + response = get_session().put( + f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}", + headers=self._build_hf_headers(token=token), + json=payload, + ) + hf_raise_for_status(response) + + return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) + + def delete_inference_endpoint( + self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None + ) -> None: + """Delete an Inference Endpoint. + + This operation is not reversible. If you don't want to be charged for an Inference Endpoint, it is preferable + to pause it with [`pause_inference_endpoint`] or scale it to zero with [`scale_to_zero_inference_endpoint`]. + + For convenience, you can also delete an Inference Endpoint using [`InferenceEndpoint.delete`]. + + Args: + name (`str`): + The name of the Inference Endpoint to delete. + namespace (`str`, *optional*): + The namespace in which the Inference Endpoint is located. Defaults to the current user. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + """ + namespace = namespace or self._get_namespace(token=token) + response = get_session().delete( + f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + + def pause_inference_endpoint( + self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None + ) -> InferenceEndpoint: + """Pause an Inference Endpoint. + + A paused Inference Endpoint will not be charged. It can be resumed at any time using [`resume_inference_endpoint`]. + This is different than scaling the Inference Endpoint to zero with [`scale_to_zero_inference_endpoint`], which + would be automatically restarted when a request is made to it. + + For convenience, you can also pause an Inference Endpoint using [`pause_inference_endpoint`]. + + Args: + name (`str`): + The name of the Inference Endpoint to pause. + namespace (`str`, *optional*): + The namespace in which the Inference Endpoint is located. Defaults to the current user. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`InferenceEndpoint`]: information about the paused Inference Endpoint. + """ + namespace = namespace or self._get_namespace(token=token) + + response = get_session().post( + f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}/pause", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + + return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) + + def resume_inference_endpoint( + self, + name: str, + *, + namespace: Optional[str] = None, + running_ok: bool = True, + token: Union[bool, str, None] = None, + ) -> InferenceEndpoint: + """Resume an Inference Endpoint. + + For convenience, you can also resume an Inference Endpoint using [`InferenceEndpoint.resume`]. + + Args: + name (`str`): + The name of the Inference Endpoint to resume. + namespace (`str`, *optional*): + The namespace in which the Inference Endpoint is located. Defaults to the current user. + running_ok (`bool`, *optional*): + If `True`, the method will not raise an error if the Inference Endpoint is already running. Defaults to + `True`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`InferenceEndpoint`]: information about the resumed Inference Endpoint. + """ + namespace = namespace or self._get_namespace(token=token) + + response = get_session().post( + f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}/resume", + headers=self._build_hf_headers(token=token), + ) + try: + hf_raise_for_status(response) + except HfHubHTTPError as error: + # If already running (and it's ok), then fetch current status and return + if running_ok and error.response.status_code == 400 and "already running" in error.response.text: + return self.get_inference_endpoint(name, namespace=namespace, token=token) + # Otherwise, raise the error + raise + + return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) + + def scale_to_zero_inference_endpoint( + self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None + ) -> InferenceEndpoint: + """Scale Inference Endpoint to zero. + + An Inference Endpoint scaled to zero will not be charged. It will be resume on the next request to it, with a + cold start delay. This is different than pausing the Inference Endpoint with [`pause_inference_endpoint`], which + would require a manual resume with [`resume_inference_endpoint`]. + + For convenience, you can also scale an Inference Endpoint to zero using [`InferenceEndpoint.scale_to_zero`]. + + Args: + name (`str`): + The name of the Inference Endpoint to scale to zero. + namespace (`str`, *optional*): + The namespace in which the Inference Endpoint is located. Defaults to the current user. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`InferenceEndpoint`]: information about the scaled-to-zero Inference Endpoint. + """ + namespace = namespace or self._get_namespace(token=token) + + response = get_session().post( + f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}/scale-to-zero", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + + return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) + + def _get_namespace(self, token: Union[bool, str, None] = None) -> str: + """Get the default namespace for the current user.""" + me = self.whoami(token=token) + if me["type"] == "user": + return me["name"] + else: + raise ValueError( + "Cannot determine default namespace. You must provide a 'namespace' as input or be logged in as a" + " user." + ) + + ######################## + # Collection Endpoints # + ######################## + @validate_hf_hub_args + def list_collections( + self, + *, + owner: Union[List[str], str, None] = None, + item: Union[List[str], str, None] = None, + sort: Optional[Literal["lastModified", "trending", "upvotes"]] = None, + limit: Optional[int] = None, + token: Union[bool, str, None] = None, + ) -> Iterable[Collection]: + """List collections on the Huggingface Hub, given some filters. + + > [!WARNING] + > When listing collections, the item list per collection is truncated to 4 items maximum. To retrieve all items + > from a collection, you must use [`get_collection`]. + + Args: + owner (`List[str]` or `str`, *optional*): + Filter by owner's username. + item (`List[str]` or `str`, *optional*): + Filter collections containing a particular items. Example: `"models/teknium/OpenHermes-2.5-Mistral-7B"`, `"datasets/squad"` or `"papers/2311.12983"`. + sort (`Literal["lastModified", "trending", "upvotes"]`, *optional*): + Sort collections by last modified, trending or upvotes. + limit (`int`, *optional*): + Maximum number of collections to be returned. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Iterable[Collection]`: an iterable of [`Collection`] objects. + """ + # Construct the API endpoint + path = f"{self.endpoint}/api/collections" + headers = self._build_hf_headers(token=token) + params: Dict = {} + if owner is not None: + params.update({"owner": owner}) + if item is not None: + params.update({"item": item}) + if sort is not None: + params.update({"sort": sort}) + if limit is not None: + params.update({"limit": limit}) + + # Paginate over the results until limit is reached + items = paginate(path, headers=headers, params=params) + if limit is not None: + items = islice(items, limit) # Do not iterate over all pages + + # Parse as Collection and return + for position, collection_data in enumerate(items): + yield Collection(position=position, **collection_data) + + def get_collection(self, collection_slug: str, *, token: Union[bool, str, None] = None) -> Collection: + """Gets information about a Collection on the Hub. + + Args: + collection_slug (`str`): + Slug of the collection of the Hub. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: [`Collection`] + + Example: + + ```py + >>> from huggingface_hub import get_collection + >>> collection = get_collection("TheBloke/recent-models-64f9a55bb3115b4f513ec026") + >>> collection.title + 'Recent models' + >>> len(collection.items) + 37 + >>> collection.items[0] + CollectionItem( + item_object_id='651446103cd773a050bf64c2', + item_id='TheBloke/U-Amethyst-20B-AWQ', + item_type='model', + position=88, + note=None + ) + ``` + """ + r = get_session().get( + f"{self.endpoint}/api/collections/{collection_slug}", headers=self._build_hf_headers(token=token) + ) + hf_raise_for_status(r) + return Collection(**{**r.json(), "endpoint": self.endpoint}) + + def create_collection( + self, + title: str, + *, + namespace: Optional[str] = None, + description: Optional[str] = None, + private: bool = False, + exists_ok: bool = False, + token: Union[bool, str, None] = None, + ) -> Collection: + """Create a new Collection on the Hub. + + Args: + title (`str`): + Title of the collection to create. Example: `"Recent models"`. + namespace (`str`, *optional*): + Namespace of the collection to create (username or org). Will default to the owner name. + description (`str`, *optional*): + Description of the collection to create. + private (`bool`, *optional*): + Whether the collection should be private or not. Defaults to `False` (i.e. public collection). + exists_ok (`bool`, *optional*): + If `True`, do not raise an error if collection already exists. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: [`Collection`] + + Example: + + ```py + >>> from huggingface_hub import create_collection + >>> collection = create_collection( + ... title="ICCV 2023", + ... description="Portfolio of models, papers and demos I presented at ICCV 2023", + ... ) + >>> collection.slug + "username/iccv-2023-64f9a55bb3115b4f513ec026" + ``` + """ + if namespace is None: + namespace = self.whoami(token)["name"] + + payload = { + "title": title, + "namespace": namespace, + "private": private, + } + if description is not None: + payload["description"] = description + + r = get_session().post( + f"{self.endpoint}/api/collections", headers=self._build_hf_headers(token=token), json=payload + ) + try: + hf_raise_for_status(r) + except HTTPError as err: + if exists_ok and err.response.status_code == 409: + # Collection already exists and `exists_ok=True` + slug = r.json()["slug"] + return self.get_collection(slug, token=token) + else: + raise + return Collection(**{**r.json(), "endpoint": self.endpoint}) + + def update_collection_metadata( + self, + collection_slug: str, + *, + title: Optional[str] = None, + description: Optional[str] = None, + position: Optional[int] = None, + private: Optional[bool] = None, + theme: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> Collection: + """Update metadata of a collection on the Hub. + + All arguments are optional. Only provided metadata will be updated. + + Args: + collection_slug (`str`): + Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. + title (`str`): + Title of the collection to update. + description (`str`, *optional*): + Description of the collection to update. + position (`int`, *optional*): + New position of the collection in the list of collections of the user. + private (`bool`, *optional*): + Whether the collection should be private or not. + theme (`str`, *optional*): + Theme of the collection on the Hub. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: [`Collection`] + + Example: + + ```py + >>> from huggingface_hub import update_collection_metadata + >>> collection = update_collection_metadata( + ... collection_slug="username/iccv-2023-64f9a55bb3115b4f513ec026", + ... title="ICCV Oct. 2023" + ... description="Portfolio of models, datasets, papers and demos I presented at ICCV Oct. 2023", + ... private=False, + ... theme="pink", + ... ) + >>> collection.slug + "username/iccv-oct-2023-64f9a55bb3115b4f513ec026" + # ^collection slug got updated but not the trailing ID + ``` + """ + payload = { + "position": position, + "private": private, + "theme": theme, + "title": title, + "description": description, + } + r = get_session().patch( + f"{self.endpoint}/api/collections/{collection_slug}", + headers=self._build_hf_headers(token=token), + # Only send not-none values to the API + json={key: value for key, value in payload.items() if value is not None}, + ) + hf_raise_for_status(r) + return Collection(**{**r.json()["data"], "endpoint": self.endpoint}) + + def delete_collection( + self, collection_slug: str, *, missing_ok: bool = False, token: Union[bool, str, None] = None + ) -> None: + """Delete a collection on the Hub. + + Args: + collection_slug (`str`): + Slug of the collection to delete. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. + missing_ok (`bool`, *optional*): + If `True`, do not raise an error if collection doesn't exists. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Example: + + ```py + >>> from huggingface_hub import delete_collection + >>> collection = delete_collection("username/useless-collection-64f9a55bb3115b4f513ec026", missing_ok=True) + ``` + + > [!WARNING] + > This is a non-revertible action. A deleted collection cannot be restored. + """ + r = get_session().delete( + f"{self.endpoint}/api/collections/{collection_slug}", headers=self._build_hf_headers(token=token) + ) + try: + hf_raise_for_status(r) + except HTTPError as err: + if missing_ok and err.response.status_code == 404: + # Collection doesn't exists and `missing_ok=True` + return + else: + raise + + def add_collection_item( + self, + collection_slug: str, + item_id: str, + item_type: CollectionItemType_T, + *, + note: Optional[str] = None, + exists_ok: bool = False, + token: Union[bool, str, None] = None, + ) -> Collection: + """Add an item to a collection on the Hub. + + Args: + collection_slug (`str`): + Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. + item_id (`str`): + ID of the item to add to the collection. It can be the ID of a repo on the Hub (e.g. `"facebook/bart-large-mnli"`) + or a paper id (e.g. `"2307.09288"`). + item_type (`str`): + Type of the item to add. Can be one of `"model"`, `"dataset"`, `"space"` or `"paper"`. + note (`str`, *optional*): + A note to attach to the item in the collection. The maximum size for a note is 500 characters. + exists_ok (`bool`, *optional*): + If `True`, do not raise an error if item already exists. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: [`Collection`] + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` + or `admin` role in the organization the repo belongs to or if you passed a `read` token. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the item you try to add to the collection does not exist on the Hub. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 409 if the item you try to add to the collection is already in the collection (and exists_ok=False) + + Example: + + ```py + >>> from huggingface_hub import add_collection_item + >>> collection = add_collection_item( + ... collection_slug="davanstrien/climate-64f99dc2a5067f6b65531bab", + ... item_id="pierre-loic/climate-news-articles", + ... item_type="dataset" + ... ) + >>> collection.items[-1].item_id + "pierre-loic/climate-news-articles" + # ^item got added to the collection on last position + + # Add item with a note + >>> add_collection_item( + ... collection_slug="davanstrien/climate-64f99dc2a5067f6b65531bab", + ... item_id="datasets/climate_fever", + ... item_type="dataset" + ... note="This dataset adopts the FEVER methodology that consists of 1,535 real-world claims regarding climate-change collected on the internet." + ... ) + (...) + ``` + """ + payload: Dict[str, Any] = {"item": {"id": item_id, "type": item_type}} + if note is not None: + payload["note"] = note + r = get_session().post( + f"{self.endpoint}/api/collections/{collection_slug}/items", + headers=self._build_hf_headers(token=token), + json=payload, + ) + try: + hf_raise_for_status(r) + except HTTPError as err: + if exists_ok and err.response.status_code == 409: + # Item already exists and `exists_ok=True` + return self.get_collection(collection_slug, token=token) + else: + raise + return Collection(**{**r.json(), "endpoint": self.endpoint}) + + def update_collection_item( + self, + collection_slug: str, + item_object_id: str, + *, + note: Optional[str] = None, + position: Optional[int] = None, + token: Union[bool, str, None] = None, + ) -> None: + """Update an item in a collection. + + Args: + collection_slug (`str`): + Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. + item_object_id (`str`): + ID of the item in the collection. This is not the id of the item on the Hub (repo_id or paper id). + It must be retrieved from a [`CollectionItem`] object. Example: `collection.items[0].item_object_id`. + note (`str`, *optional*): + A note to attach to the item in the collection. The maximum size for a note is 500 characters. + position (`int`, *optional*): + New position of the item in the collection. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Example: + + ```py + >>> from huggingface_hub import get_collection, update_collection_item + + # Get collection first + >>> collection = get_collection("TheBloke/recent-models-64f9a55bb3115b4f513ec026") + + # Update item based on its ID (add note + update position) + >>> update_collection_item( + ... collection_slug="TheBloke/recent-models-64f9a55bb3115b4f513ec026", + ... item_object_id=collection.items[-1].item_object_id, + ... note="Newly updated model!" + ... position=0, + ... ) + ``` + """ + payload = {"position": position, "note": note} + r = get_session().patch( + f"{self.endpoint}/api/collections/{collection_slug}/items/{item_object_id}", + headers=self._build_hf_headers(token=token), + # Only send not-none values to the API + json={key: value for key, value in payload.items() if value is not None}, + ) + hf_raise_for_status(r) + + def delete_collection_item( + self, + collection_slug: str, + item_object_id: str, + *, + missing_ok: bool = False, + token: Union[bool, str, None] = None, + ) -> None: + """Delete an item from a collection. + + Args: + collection_slug (`str`): + Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. + item_object_id (`str`): + ID of the item in the collection. This is not the id of the item on the Hub (repo_id or paper id). + It must be retrieved from a [`CollectionItem`] object. Example: `collection.items[0].item_object_id`. + missing_ok (`bool`, *optional*): + If `True`, do not raise an error if item doesn't exists. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Example: + + ```py + >>> from huggingface_hub import get_collection, delete_collection_item + + # Get collection first + >>> collection = get_collection("TheBloke/recent-models-64f9a55bb3115b4f513ec026") + + # Delete item based on its ID + >>> delete_collection_item( + ... collection_slug="TheBloke/recent-models-64f9a55bb3115b4f513ec026", + ... item_object_id=collection.items[-1].item_object_id, + ... ) + ``` + """ + r = get_session().delete( + f"{self.endpoint}/api/collections/{collection_slug}/items/{item_object_id}", + headers=self._build_hf_headers(token=token), + ) + try: + hf_raise_for_status(r) + except HTTPError as err: + if missing_ok and err.response.status_code == 404: + # Item already deleted and `missing_ok=True` + return + else: + raise + + ########################## + # Manage access requests # + ########################## + + @validate_hf_hub_args + def list_pending_access_requests( + self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None + ) -> List[AccessRequest]: + """ + Get pending access requests for a given gated repo. + + A pending request means the user has requested access to the repo but the request has not been processed yet. + If the approval mode is automatic, this list should be empty. Pending requests can be accepted or rejected + using [`accept_access_request`] and [`reject_access_request`]. + + For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. + + Args: + repo_id (`str`): + The id of the repo to get access requests for. + repo_type (`str`, *optional*): + The type of the repo to get access requests for. Must be one of `model`, `dataset` or `space`. + Defaults to `model`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `List[AccessRequest]`: A list of [`AccessRequest`] objects. Each time contains a `username`, `email`, + `status` and `timestamp` attribute. If the gated repo has a custom form, the `fields` attribute will + be populated with user's answers. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 400 if the repo is not gated. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` + or `admin` role in the organization the repo belongs to or if you passed a `read` token. + + Example: + ```py + >>> from huggingface_hub import list_pending_access_requests, accept_access_request + + # List pending requests + >>> requests = list_pending_access_requests("meta-llama/Llama-2-7b") + >>> len(requests) + 411 + >>> requests[0] + [ + AccessRequest( + username='clem', + fullname='Clem 🤗', + email='***', + timestamp=datetime.datetime(2023, 11, 23, 18, 4, 53, 828000, tzinfo=datetime.timezone.utc), + status='pending', + fields=None, + ), + ... + ] + + # Accept Clem's request + >>> accept_access_request("meta-llama/Llama-2-7b", "clem") + ``` + """ + return self._list_access_requests(repo_id, "pending", repo_type=repo_type, token=token) + + @validate_hf_hub_args + def list_accepted_access_requests( + self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None + ) -> List[AccessRequest]: + """ + Get accepted access requests for a given gated repo. + + An accepted request means the user has requested access to the repo and the request has been accepted. The user + can download any file of the repo. If the approval mode is automatic, this list should contains by default all + requests. Accepted requests can be cancelled or rejected at any time using [`cancel_access_request`] and + [`reject_access_request`]. A cancelled request will go back to the pending list while a rejected request will + go to the rejected list. In both cases, the user will lose access to the repo. + + For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. + + Args: + repo_id (`str`): + The id of the repo to get access requests for. + repo_type (`str`, *optional*): + The type of the repo to get access requests for. Must be one of `model`, `dataset` or `space`. + Defaults to `model`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `List[AccessRequest]`: A list of [`AccessRequest`] objects. Each time contains a `username`, `email`, + `status` and `timestamp` attribute. If the gated repo has a custom form, the `fields` attribute will + be populated with user's answers. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 400 if the repo is not gated. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` + or `admin` role in the organization the repo belongs to or if you passed a `read` token. + + Example: + ```py + >>> from huggingface_hub import list_accepted_access_requests + + >>> requests = list_accepted_access_requests("meta-llama/Llama-2-7b") + >>> len(requests) + 411 + >>> requests[0] + [ + AccessRequest( + username='clem', + fullname='Clem 🤗', + email='***', + timestamp=datetime.datetime(2023, 11, 23, 18, 4, 53, 828000, tzinfo=datetime.timezone.utc), + status='accepted', + fields=None, + ), + ... + ] + ``` + """ + return self._list_access_requests(repo_id, "accepted", repo_type=repo_type, token=token) + + @validate_hf_hub_args + def list_rejected_access_requests( + self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None + ) -> List[AccessRequest]: + """ + Get rejected access requests for a given gated repo. + + A rejected request means the user has requested access to the repo and the request has been explicitly rejected + by a repo owner (either you or another user from your organization). The user cannot download any file of the + repo. Rejected requests can be accepted or cancelled at any time using [`accept_access_request`] and + [`cancel_access_request`]. A cancelled request will go back to the pending list while an accepted request will + go to the accepted list. + + For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. + + Args: + repo_id (`str`): + The id of the repo to get access requests for. + repo_type (`str`, *optional*): + The type of the repo to get access requests for. Must be one of `model`, `dataset` or `space`. + Defaults to `model`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `List[AccessRequest]`: A list of [`AccessRequest`] objects. Each time contains a `username`, `email`, + `status` and `timestamp` attribute. If the gated repo has a custom form, the `fields` attribute will + be populated with user's answers. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 400 if the repo is not gated. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` + or `admin` role in the organization the repo belongs to or if you passed a `read` token. + + Example: + ```py + >>> from huggingface_hub import list_rejected_access_requests + + >>> requests = list_rejected_access_requests("meta-llama/Llama-2-7b") + >>> len(requests) + 411 + >>> requests[0] + [ + AccessRequest( + username='clem', + fullname='Clem 🤗', + email='***', + timestamp=datetime.datetime(2023, 11, 23, 18, 4, 53, 828000, tzinfo=datetime.timezone.utc), + status='rejected', + fields=None, + ), + ... + ] + ``` + """ + return self._list_access_requests(repo_id, "rejected", repo_type=repo_type, token=token) + + def _list_access_requests( + self, + repo_id: str, + status: Literal["accepted", "rejected", "pending"], + repo_type: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> List[AccessRequest]: + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + + response = get_session().get( + f"{constants.ENDPOINT}/api/{repo_type}s/{repo_id}/user-access-request/{status}", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + return [ + AccessRequest( + username=request["user"]["user"], + fullname=request["user"]["fullname"], + email=request["user"].get("email"), + status=request["status"], + timestamp=parse_datetime(request["timestamp"]), + fields=request.get("fields"), # only if custom fields in form + ) + for request in response.json() + ] + + @validate_hf_hub_args + def cancel_access_request( + self, repo_id: str, user: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None + ) -> None: + """ + Cancel an access request from a user for a given gated repo. + + A cancelled request will go back to the pending list and the user will lose access to the repo. + + For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. + + Args: + repo_id (`str`): + The id of the repo to cancel access request for. + user (`str`): + The username of the user which access request should be cancelled. + repo_type (`str`, *optional*): + The type of the repo to cancel access request for. Must be one of `model`, `dataset` or `space`. + Defaults to `model`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 400 if the repo is not gated. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` + or `admin` role in the organization the repo belongs to or if you passed a `read` token. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user does not exist on the Hub. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user access request cannot be found. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user access request is already in the pending list. + """ + self._handle_access_request(repo_id, user, "pending", repo_type=repo_type, token=token) + + @validate_hf_hub_args + def accept_access_request( + self, repo_id: str, user: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None + ) -> None: + """ + Accept an access request from a user for a given gated repo. + + Once the request is accepted, the user will be able to download any file of the repo and access the community + tab. If the approval mode is automatic, you don't have to accept requests manually. An accepted request can be + cancelled or rejected at any time using [`cancel_access_request`] and [`reject_access_request`]. + + For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. + + Args: + repo_id (`str`): + The id of the repo to accept access request for. + user (`str`): + The username of the user which access request should be accepted. + repo_type (`str`, *optional*): + The type of the repo to accept access request for. Must be one of `model`, `dataset` or `space`. + Defaults to `model`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 400 if the repo is not gated. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` + or `admin` role in the organization the repo belongs to or if you passed a `read` token. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user does not exist on the Hub. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user access request cannot be found. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user access request is already in the accepted list. + """ + self._handle_access_request(repo_id, user, "accepted", repo_type=repo_type, token=token) + + @validate_hf_hub_args + def reject_access_request( + self, + repo_id: str, + user: str, + *, + repo_type: Optional[str] = None, + rejection_reason: Optional[str], + token: Union[bool, str, None] = None, + ) -> None: + """ + Reject an access request from a user for a given gated repo. + + A rejected request will go to the rejected list. The user cannot download any file of the repo. Rejected + requests can be accepted or cancelled at any time using [`accept_access_request`] and [`cancel_access_request`]. + A cancelled request will go back to the pending list while an accepted request will go to the accepted list. + + For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. + + Args: + repo_id (`str`): + The id of the repo to reject access request for. + user (`str`): + The username of the user which access request should be rejected. + repo_type (`str`, *optional*): + The type of the repo to reject access request for. Must be one of `model`, `dataset` or `space`. + Defaults to `model`. + rejection_reason (`str`, *optional*): + Optional rejection reason that will be visible to the user (max 200 characters). + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 400 if the repo is not gated. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` + or `admin` role in the organization the repo belongs to or if you passed a `read` token. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user does not exist on the Hub. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user access request cannot be found. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user access request is already in the rejected list. + """ + self._handle_access_request( + repo_id, user, "rejected", repo_type=repo_type, rejection_reason=rejection_reason, token=token + ) + + @validate_hf_hub_args + def _handle_access_request( + self, + repo_id: str, + user: str, + status: Literal["accepted", "rejected", "pending"], + repo_type: Optional[str] = None, + rejection_reason: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> None: + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + + payload = {"user": user, "status": status} + + if rejection_reason is not None: + if status != "rejected": + raise ValueError("`rejection_reason` can only be passed when rejecting an access request.") + payload["rejectionReason"] = rejection_reason + + response = get_session().post( + f"{constants.ENDPOINT}/api/{repo_type}s/{repo_id}/user-access-request/handle", + headers=self._build_hf_headers(token=token), + json=payload, + ) + hf_raise_for_status(response) + + @validate_hf_hub_args + def grant_access( + self, repo_id: str, user: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None + ) -> None: + """ + Grant access to a user for a given gated repo. + + Granting access don't require for the user to send an access request by themselves. The user is automatically + added to the accepted list meaning they can download the files You can revoke the granted access at any time + using [`cancel_access_request`] or [`reject_access_request`]. + + For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. + + Args: + repo_id (`str`): + The id of the repo to grant access to. + user (`str`): + The username of the user to grant access. + repo_type (`str`, *optional*): + The type of the repo to grant access to. Must be one of `model`, `dataset` or `space`. + Defaults to `model`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 400 if the repo is not gated. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 400 if the user already has access to the repo. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` + or `admin` role in the organization the repo belongs to or if you passed a `read` token. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user does not exist on the Hub. + """ + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + + response = get_session().post( + f"{constants.ENDPOINT}/api/{repo_type}s/{repo_id}/user-access-request/grant", + headers=self._build_hf_headers(token=token), + json={"user": user}, + ) + hf_raise_for_status(response) + return response.json() + + ################### + # Manage webhooks # + ################### + + @validate_hf_hub_args + def get_webhook(self, webhook_id: str, *, token: Union[bool, str, None] = None) -> WebhookInfo: + """Get a webhook by its id. + + Args: + webhook_id (`str`): + The unique identifier of the webhook to get. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved token, which is the recommended + method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`WebhookInfo`]: + Info about the webhook. + + Example: + ```python + >>> from huggingface_hub import get_webhook + >>> webhook = get_webhook("654bbbc16f2ec14d77f109cc") + >>> print(webhook) + WebhookInfo( + id="654bbbc16f2ec14d77f109cc", + job=None, + watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], + url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", + secret="my-secret", + domains=["repo", "discussion"], + disabled=False, + ) + ``` + """ + response = get_session().get( + f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + webhook_data = response.json()["webhook"] + + watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]] + + webhook = WebhookInfo( + id=webhook_data["id"], + url=webhook_data.get("url"), + job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None, + watched=watched_items, + domains=webhook_data["domains"], + secret=webhook_data.get("secret"), + disabled=webhook_data["disabled"], + ) + + return webhook + + @validate_hf_hub_args + def list_webhooks(self, *, token: Union[bool, str, None] = None) -> List[WebhookInfo]: + """List all configured webhooks. + + Args: + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved token, which is the recommended + method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `List[WebhookInfo]`: + List of webhook info objects. + + Example: + ```python + >>> from huggingface_hub import list_webhooks + >>> webhooks = list_webhooks() + >>> len(webhooks) + 2 + >>> webhooks[0] + WebhookInfo( + id="654bbbc16f2ec14d77f109cc", + watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], + url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", + secret="my-secret", + domains=["repo", "discussion"], + disabled=False, + ) + ``` + """ + response = get_session().get( + f"{constants.ENDPOINT}/api/settings/webhooks", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + webhooks_data = response.json() + + return [ + WebhookInfo( + id=webhook["id"], + url=webhook.get("url"), + job=JobSpec(**webhook["job"]) if webhook.get("job") else None, + watched=[WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook["watched"]], + domains=webhook["domains"], + secret=webhook.get("secret"), + disabled=webhook["disabled"], + ) + for webhook in webhooks_data + ] + + @validate_hf_hub_args + def create_webhook( + self, + *, + url: Optional[str] = None, + job_id: Optional[str] = None, + watched: List[Union[Dict, WebhookWatchedItem]], + domains: Optional[List[constants.WEBHOOK_DOMAIN_T]] = None, + secret: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> WebhookInfo: + """Create a new webhook. + + The webhook can either send a payload to a URL, or trigger a Job to run on Hugging Face infrastructure. + This function should be called with one of `url` or `job_id`, but not both. + + Args: + url (`str`): + URL to send the payload to. + job_id (`str`): + ID of the source Job to trigger with the webhook payload in the environment variable WEBHOOK_PAYLOAD. + Additional environment variables are available for convenience: WEBHOOK_REPO_ID, WEBHOOK_REPO_TYPE and WEBHOOK_SECRET. + watched (`List[WebhookWatchedItem]`): + List of [`WebhookWatchedItem`] to be watched by the webhook. It can be users, orgs, models, datasets or spaces. + Watched items can also be provided as plain dictionaries. + domains (`List[Literal["repo", "discussion"]]`, optional): + List of domains to watch. It can be "repo", "discussion" or both. + secret (`str`, optional): + A secret to sign the payload with. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved token, which is the recommended + method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`WebhookInfo`]: + Info about the newly created webhook. + + Example: + + Create a webhook that sends a payload to a URL + ```python + >>> from huggingface_hub import create_webhook + >>> payload = create_webhook( + ... watched=[{"type": "user", "name": "julien-c"}, {"type": "org", "name": "HuggingFaceH4"}], + ... url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", + ... domains=["repo", "discussion"], + ... secret="my-secret", + ... ) + >>> print(payload) + WebhookInfo( + id="654bbbc16f2ec14d77f109cc", + url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", + job=None, + watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], + domains=["repo", "discussion"], + secret="my-secret", + disabled=False, + ) + ``` + + Run a Job and then create a webhook that triggers this Job + ```python + >>> from huggingface_hub import create_webhook, run_job + >>> job = run_job( + ... image="ubuntu", + ... command=["bash", "-c", r"echo An event occured in $WEBHOOK_REPO_ID: $WEBHOOK_PAYLOAD"], + ... ) + >>> payload = create_webhook( + ... watched=[{"type": "user", "name": "julien-c"}, {"type": "org", "name": "HuggingFaceH4"}], + ... job_id=job.id, + ... domains=["repo", "discussion"], + ... secret="my-secret", + ... ) + >>> print(payload) + WebhookInfo( + id="654bbbc16f2ec14d77f109cc", + url=None, + job=JobSpec( + docker_image='ubuntu', + space_id=None, + command=['bash', '-c', 'echo An event occured in $WEBHOOK_REPO_ID: $WEBHOOK_PAYLOAD'], + arguments=[], + environment={}, + secrets=[], + flavor='cpu-basic', + timeout=None, + tags=None, + arch=None + ), + watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], + domains=["repo", "discussion"], + secret="my-secret", + disabled=False, + ) + ``` + """ + watched_dicts = [asdict(item) if isinstance(item, WebhookWatchedItem) else item for item in watched] + + post_webhooks_json = {"watched": watched_dicts, "domains": domains, "secret": secret} + if url is not None and job_id is not None: + raise ValueError("Set `url` or `job_id` but not both.") + elif url is not None: + post_webhooks_json["url"] = url + elif job_id is not None: + post_webhooks_json["jobSourceId"] = job_id + else: + raise ValueError("Missing argument for webhook: `url` or `job_id`.") + + response = get_session().post( + f"{constants.ENDPOINT}/api/settings/webhooks", + json=post_webhooks_json, + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + webhook_data = response.json()["webhook"] + watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]] + + webhook = WebhookInfo( + id=webhook_data["id"], + url=webhook_data.get("url"), + job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None, + watched=watched_items, + domains=webhook_data["domains"], + secret=webhook_data.get("secret"), + disabled=webhook_data["disabled"], + ) + + return webhook + + @validate_hf_hub_args + def update_webhook( + self, + webhook_id: str, + *, + url: Optional[str] = None, + watched: Optional[List[Union[Dict, WebhookWatchedItem]]] = None, + domains: Optional[List[constants.WEBHOOK_DOMAIN_T]] = None, + secret: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> WebhookInfo: + """Update an existing webhook. + + Args: + webhook_id (`str`): + The unique identifier of the webhook to be updated. + url (`str`, optional): + The URL to which the payload will be sent. + watched (`List[WebhookWatchedItem]`, optional): + List of items to watch. It can be users, orgs, models, datasets, or spaces. + Refer to [`WebhookWatchedItem`] for more details. Watched items can also be provided as plain dictionaries. + domains (`List[Literal["repo", "discussion"]]`, optional): + The domains to watch. This can include "repo", "discussion", or both. + secret (`str`, optional): + A secret to sign the payload with, providing an additional layer of security. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved token, which is the recommended + method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`WebhookInfo`]: + Info about the updated webhook. + + Example: + ```python + >>> from huggingface_hub import update_webhook + >>> updated_payload = update_webhook( + ... webhook_id="654bbbc16f2ec14d77f109cc", + ... url="https://new.webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", + ... watched=[{"type": "user", "name": "julien-c"}, {"type": "org", "name": "HuggingFaceH4"}], + ... domains=["repo"], + ... secret="my-secret", + ... ) + >>> print(updated_payload) + WebhookInfo( + id="654bbbc16f2ec14d77f109cc", + job=None, + url="https://new.webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", + watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], + domains=["repo"], + secret="my-secret", + disabled=False, + ``` + """ + if watched is None: + watched = [] + watched_dicts = [asdict(item) if isinstance(item, WebhookWatchedItem) else item for item in watched] + + response = get_session().post( + f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}", + json={"watched": watched_dicts, "url": url, "domains": domains, "secret": secret}, + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + webhook_data = response.json()["webhook"] + + watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]] + + webhook = WebhookInfo( + id=webhook_data["id"], + url=webhook_data.get("url"), + job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None, + watched=watched_items, + domains=webhook_data["domains"], + secret=webhook_data.get("secret"), + disabled=webhook_data["disabled"], + ) + + return webhook + + @validate_hf_hub_args + def enable_webhook(self, webhook_id: str, *, token: Union[bool, str, None] = None) -> WebhookInfo: + """Enable a webhook (makes it "active"). + + Args: + webhook_id (`str`): + The unique identifier of the webhook to enable. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved token, which is the recommended + method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`WebhookInfo`]: + Info about the enabled webhook. + + Example: + ```python + >>> from huggingface_hub import enable_webhook + >>> enabled_webhook = enable_webhook("654bbbc16f2ec14d77f109cc") + >>> enabled_webhook + WebhookInfo( + id="654bbbc16f2ec14d77f109cc", + job=None, + url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", + watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], + domains=["repo", "discussion"], + secret="my-secret", + disabled=False, + ) + ``` + """ + response = get_session().post( + f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}/enable", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + webhook_data = response.json()["webhook"] + + watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]] + + webhook = WebhookInfo( + id=webhook_data["id"], + url=webhook_data.get("url"), + job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None, + watched=watched_items, + domains=webhook_data["domains"], + secret=webhook_data.get("secret"), + disabled=webhook_data["disabled"], + ) + + return webhook + + @validate_hf_hub_args + def disable_webhook(self, webhook_id: str, *, token: Union[bool, str, None] = None) -> WebhookInfo: + """Disable a webhook (makes it "disabled"). + + Args: + webhook_id (`str`): + The unique identifier of the webhook to disable. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved token, which is the recommended + method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`WebhookInfo`]: + Info about the disabled webhook. + + Example: + ```python + >>> from huggingface_hub import disable_webhook + >>> disabled_webhook = disable_webhook("654bbbc16f2ec14d77f109cc") + >>> disabled_webhook + WebhookInfo( + id="654bbbc16f2ec14d77f109cc", + url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", + jon=None, + watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], + domains=["repo", "discussion"], + secret="my-secret", + disabled=True, + ) + ``` + """ + response = get_session().post( + f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}/disable", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + webhook_data = response.json()["webhook"] + + watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]] + + webhook = WebhookInfo( + id=webhook_data["id"], + url=webhook_data.get("url"), + job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None, + watched=watched_items, + domains=webhook_data["domains"], + secret=webhook_data.get("secret"), + disabled=webhook_data["disabled"], + ) + + return webhook + + @validate_hf_hub_args + def delete_webhook(self, webhook_id: str, *, token: Union[bool, str, None] = None) -> None: + """Delete a webhook. + + Args: + webhook_id (`str`): + The unique identifier of the webhook to delete. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved token, which is the recommended + method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `None` + + Example: + ```python + >>> from huggingface_hub import delete_webhook + >>> delete_webhook("654bbbc16f2ec14d77f109cc") + ``` + """ + response = get_session().delete( + f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + + ############# + # Internals # + ############# + + def _build_hf_headers( + self, + token: Union[bool, str, None] = None, + library_name: Optional[str] = None, + library_version: Optional[str] = None, + user_agent: Union[Dict, str, None] = None, + ) -> Dict[str, str]: + """ + Alias for [`build_hf_headers`] that uses the token from [`HfApi`] client + when `token` is not provided. + """ + if token is None: + # Cannot do `token = token or self.token` as token can be `False`. + token = self.token + return build_hf_headers( + token=token, + library_name=library_name or self.library_name, + library_version=library_version or self.library_version, + user_agent=user_agent or self.user_agent, + headers=self.headers, + ) + + def _prepare_folder_deletions( + self, + repo_id: str, + repo_type: Optional[str], + revision: Optional[str], + path_in_repo: str, + delete_patterns: Optional[Union[List[str], str]], + token: Union[bool, str, None] = None, + ) -> List[CommitOperationDelete]: + """Generate the list of Delete operations for a commit to delete files from a repo. + + List remote files and match them against the `delete_patterns` constraints. Returns a list of [`CommitOperationDelete`] + with the matching items. + + Note: `.gitattributes` file is essential to make a repo work properly on the Hub. This file will always be + kept even if it matches the `delete_patterns` constraints. + """ + if delete_patterns is None: + # If no delete patterns, no need to list and filter remote files + return [] + + # List remote files + filenames = self.list_repo_files(repo_id=repo_id, revision=revision, repo_type=repo_type, token=token) + + # Compute relative path in repo + if path_in_repo and path_in_repo not in (".", "./"): + path_in_repo = path_in_repo.strip("/") + "/" # harmonize + relpath_to_abspath = { + file[len(path_in_repo) :]: file for file in filenames if file.startswith(path_in_repo) + } + else: + relpath_to_abspath = {file: file for file in filenames} + + # Apply filter on relative paths and return + return [ + CommitOperationDelete(path_in_repo=relpath_to_abspath[relpath], is_folder=False) + for relpath in filter_repo_objects(relpath_to_abspath.keys(), allow_patterns=delete_patterns) + if relpath_to_abspath[relpath] != ".gitattributes" + ] + + def _prepare_upload_folder_additions( + self, + folder_path: Union[str, Path], + path_in_repo: str, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + repo_type: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> List[CommitOperationAdd]: + """Generate the list of Add operations for a commit to upload a folder. + + Files not matching the `allow_patterns` (allowlist) and `ignore_patterns` (denylist) + constraints are discarded. + """ + + folder_path = Path(folder_path).expanduser().resolve() + if not folder_path.is_dir(): + raise ValueError(f"Provided path: '{folder_path}' is not a directory") + + # List files from folder + relpath_to_abspath = { + path.relative_to(folder_path).as_posix(): path + for path in sorted(folder_path.glob("**/*")) # sorted to be deterministic + if path.is_file() + } + + # Filter files + # Patterns are applied on the path relative to `folder_path`. `path_in_repo` is prefixed after the filtering. + filtered_repo_objects = list( + filter_repo_objects( + relpath_to_abspath.keys(), allow_patterns=allow_patterns, ignore_patterns=ignore_patterns + ) + ) + + prefix = f"{path_in_repo.strip('/')}/" if path_in_repo else "" + + # If updating a README.md file, make sure the metadata format is valid + # It's better to fail early than to fail after all the files have been hashed. + if "README.md" in filtered_repo_objects: + self._validate_yaml( + content=relpath_to_abspath["README.md"].read_text(encoding="utf8"), + repo_type=repo_type, + token=token, + ) + if len(filtered_repo_objects) > 30: + log = logger.warning if len(filtered_repo_objects) > 200 else logger.info + log( + "It seems you are trying to upload a large folder at once. This might take some time and then fail if " + "the folder is too large. For such cases, it is recommended to upload in smaller batches or to use " + "`HfApi().upload_large_folder(...)`/`hf upload-large-folder` instead. For more details, " + "check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder." + ) + + logger.info(f"Start hashing {len(filtered_repo_objects)} files.") + operations = [ + CommitOperationAdd( + path_or_fileobj=relpath_to_abspath[relpath], # absolute path on disk + path_in_repo=prefix + relpath, # "absolute" path in repo + ) + for relpath in filtered_repo_objects + ] + logger.info(f"Finished hashing {len(filtered_repo_objects)} files.") + return operations + + def _validate_yaml(self, content: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None): + """ + Validate YAML from `README.md`, used before file hashing and upload. + + Args: + content (`str`): + Content of `README.md` to validate. + repo_type (`str`, *optional*): + The type of the repo to grant access to. Must be one of `model`, `dataset` or `space`. + Defaults to `model`. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Raises: + - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if YAML is invalid + """ + repo_type = repo_type if repo_type is not None else constants.REPO_TYPE_MODEL + headers = self._build_hf_headers(token=token) + + response = get_session().post( + f"{self.endpoint}/api/validate-yaml", + json={"content": content, "repoType": repo_type}, + headers=headers, + ) + # Handle warnings (example: empty metadata) + response_content = response.json() + message = "\n".join([f"- {warning.get('message')}" for warning in response_content.get("warnings", [])]) + if message: + warnings.warn(f"Warnings while validating metadata in README.md:\n{message}") + + # Raise on errors + try: + hf_raise_for_status(response) + except BadRequestError as e: + errors = response_content.get("errors", []) + message = "\n".join([f"- {error.get('message')}" for error in errors]) + raise ValueError(f"Invalid metadata in README.md.\n{message}") from e + + def get_user_overview(self, username: str, token: Union[bool, str, None] = None) -> User: + """ + Get an overview of a user on the Hub. + + Args: + username (`str`): + Username of the user to get an overview of. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `User`: A [`User`] object with the user's overview. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 If the user does not exist on the Hub. + """ + r = get_session().get( + f"{constants.ENDPOINT}/api/users/{username}/overview", headers=self._build_hf_headers(token=token) + ) + hf_raise_for_status(r) + return User(**r.json()) + + @validate_hf_hub_args + def get_organization_overview(self, organization: str, token: Union[bool, str, None] = None) -> Organization: + """ + Get an overview of an organization on the Hub. + + Args: + organization (`str`): + Name of the organization to get an overview of. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved token, which is the recommended method + for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Organization`: An [`Organization`] object with the organization's overview. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 If the organization does not exist on the Hub. + """ + r = get_session().get( + f"{constants.ENDPOINT}/api/organizations/{organization}/overview", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(r) + return Organization(**r.json()) + + def list_organization_members(self, organization: str, token: Union[bool, str, None] = None) -> Iterable[User]: + """ + List of members of an organization on the Hub. + + Args: + organization (`str`): + Name of the organization to get the members of. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Iterable[User]`: A list of [`User`] objects with the members of the organization. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 If the organization does not exist on the Hub. + + """ + for member in paginate( + path=f"{constants.ENDPOINT}/api/organizations/{organization}/members", + params={}, + headers=self._build_hf_headers(token=token), + ): + yield User(**member) + + def list_user_followers(self, username: str, token: Union[bool, str, None] = None) -> Iterable[User]: + """ + Get the list of followers of a user on the Hub. + + Args: + username (`str`): + Username of the user to get the followers of. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Iterable[User]`: A list of [`User`] objects with the followers of the user. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 If the user does not exist on the Hub. + + """ + for follower in paginate( + path=f"{constants.ENDPOINT}/api/users/{username}/followers", + params={}, + headers=self._build_hf_headers(token=token), + ): + yield User(**follower) + + def list_user_following(self, username: str, token: Union[bool, str, None] = None) -> Iterable[User]: + """ + Get the list of users followed by a user on the Hub. + + Args: + username (`str`): + Username of the user to get the users followed by. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Iterable[User]`: A list of [`User`] objects with the users followed by the user. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 If the user does not exist on the Hub. + + """ + for followed_user in paginate( + path=f"{constants.ENDPOINT}/api/users/{username}/following", + params={}, + headers=self._build_hf_headers(token=token), + ): + yield User(**followed_user) + + def list_papers( + self, + *, + query: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> Iterable[PaperInfo]: + """ + List daily papers on the Hugging Face Hub given a search query. + + Args: + query (`str`, *optional*): + A search query string to find papers. + If provided, returns papers that match the query. + token (Union[bool, str, None], *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Iterable[PaperInfo]`: an iterable of [`huggingface_hub.hf_api.PaperInfo`] objects. + + Example: + + ```python + >>> from huggingface_hub import HfApi + + >>> api = HfApi() + + # List all papers with "attention" in their title + >>> api.list_papers(query="attention") + ``` + """ + path = f"{self.endpoint}/api/papers/search" + params = {} + if query: + params["q"] = query + r = get_session().get( + path, + params=params, + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(r) + for paper in r.json(): + yield PaperInfo(**paper) + + def paper_info(self, id: str) -> PaperInfo: + """ + Get information for a paper on the Hub. + + Args: + id (`str`, **optional**): + ArXiv id of the paper. + + Returns: + `PaperInfo`: A `PaperInfo` object. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 If the paper does not exist on the Hub. + """ + path = f"{self.endpoint}/api/papers/{id}" + r = get_session().get(path) + hf_raise_for_status(r) + return PaperInfo(**r.json()) + + def auth_check( + self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None + ) -> None: + """ + Check if the provided user token has access to a specific repository on the Hugging Face Hub. + + This method verifies whether the user, authenticated via the provided token, has access to the specified + repository. If the repository is not found or if the user lacks the required permissions to access it, + the method raises an appropriate exception. + + Args: + repo_id (`str`): + The repository to check for access. Format should be `"user/repo_name"`. + Example: `"user/my-cool-model"`. + + repo_type (`str`, *optional*): + The type of the repository. Should be one of `"model"`, `"dataset"`, or `"space"`. + If not specified, the default is `"model"`. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + + Raises: + [`~utils.RepositoryNotFoundError`]: + Raised if the repository does not exist, is private, or the user does not have access. This can + occur if the `repo_id` or `repo_type` is incorrect or if the repository is private but the user + is not authenticated. + + [`~utils.GatedRepoError`]: + Raised if the repository exists but is gated and the user is not authorized to access it. + + Example: + Check if the user has access to a repository: + + ```python + >>> from huggingface_hub import auth_check + >>> from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError + + try: + auth_check("user/my-cool-model") + except GatedRepoError: + # Handle gated repository error + print("You do not have permission to access this gated repository.") + except RepositoryNotFoundError: + # Handle repository not found error + print("The repository was not found or you do not have access.") + ``` + + In this example: + - If the user has access, the method completes successfully. + - If the repository is gated or does not exist, appropriate exceptions are raised, allowing the user + to handle them accordingly. + """ + headers = self._build_hf_headers(token=token) + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/auth-check" + r = get_session().get(path, headers=headers) + hf_raise_for_status(r) + + def run_job( + self, + *, + image: str, + command: List[str], + env: Optional[Dict[str, Any]] = None, + secrets: Optional[Dict[str, Any]] = None, + flavor: Optional[SpaceHardware] = None, + timeout: Optional[Union[int, float, str]] = None, + namespace: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> JobInfo: + """ + Run compute Jobs on Hugging Face infrastructure. + + Args: + image (`str`): + The Docker image to use. + Examples: `"ubuntu"`, `"python:3.12"`, `"pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel"`. + Example with an image from a Space: `"hf.co/spaces/lhoestq/duckdb"`. + + command (`List[str]`): + The command to run. Example: `["echo", "hello"]`. + + env (`Dict[str, Any]`, *optional*): + Defines the environment variables for the Job. + + secrets (`Dict[str, Any]`, *optional*): + Defines the secret environment variables for the Job. + + flavor (`str`, *optional*): + Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values. + Defaults to `"cpu-basic"`. + + timeout (`Union[int, float, str]`, *optional*): + Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days). + Example: `300` or `"5m"` for 5 minutes. + + namespace (`str`, *optional*): + The namespace where the Job will be created. Defaults to the current user's namespace. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + + Example: + Run your first Job: + + ```python + >>> from huggingface_hub import run_job + >>> run_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"]) + ``` + + Run a GPU Job: + + ```python + >>> from huggingface_hub import run_job + >>> image = "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel" + >>> command = ["python", "-c", "import torch; print(f"This code ran with the following GPU: {torch.cuda.get_device_name()}")"] + >>> run_job(image=image, command=command, flavor="a10g-small") + ``` + + """ + if namespace is None: + namespace = self.whoami(token=token)["name"] + job_spec = _create_job_spec( + image=image, + command=command, + env=env, + secrets=secrets, + flavor=flavor, + timeout=timeout, + ) + response = get_session().post( + f"https://huggingface.co/api/jobs/{namespace}", + json=job_spec, + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + job_info = response.json() + return JobInfo(**job_info, endpoint=self.endpoint) + + def fetch_job_logs( + self, + *, + job_id: str, + namespace: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> Iterable[str]: + """ + Fetch all the logs from a compute Job on Hugging Face infrastructure. + + Args: + job_id (`str`): + ID of the Job. + + namespace (`str`, *optional*): + The namespace where the Job is running. Defaults to the current user's namespace. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + + Example: + + ```python + >>> from huggingface_hub import fetch_job_logs, run_job + >>> job = run_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"]) + >>> for log in fetch_job_logs(job.id): + ... print(log) + Hello from HF compute! + ``` + """ + if namespace is None: + namespace = self.whoami(token=token)["name"] + logging_finished = logging_started = False + job_finished = False + # - We need to retry because sometimes the /logs doesn't return logs when the job just started. + # (for example it can return only two lines: one for "Job started" and one empty line) + # - Timeouts can happen in case of build errors + # - ChunkedEncodingError can happen in case of stopped logging in the middle of streaming + # - Infinite empty log stream can happen in case of build error + # (the logs stream is infinite and empty except for the Job started message) + # - there is a ": keep-alive" every 30 seconds + + # We don't use http_backoff since we need to check ourselves if ConnectionError.__context__ is a TimeoutError + max_retries = 5 + min_wait_time = 1 + max_wait_time = 10 + sleep_time = 0 + for _ in range(max_retries): + time.sleep(sleep_time) + sleep_time = min(max_wait_time, max(min_wait_time, sleep_time * 2)) + try: + resp = get_session().get( + f"https://huggingface.co/api/jobs/{namespace}/{job_id}/logs", + headers=self._build_hf_headers(token=token), + stream=True, + timeout=120, + ) + log = None + for line in resp.iter_lines(chunk_size=1): + line = line.decode("utf-8") + if line and line.startswith("data: {"): + data = json.loads(line[len("data: ") :]) + # timestamp = data["timestamp"] + if not data["data"].startswith("===== Job started"): + logging_started = True + log = data["data"] + yield log + logging_finished = logging_started + except requests.exceptions.ChunkedEncodingError: + # Response ended prematurely + break + except KeyboardInterrupt: + break + except requests.exceptions.ConnectionError as err: + is_timeout = err.__context__ and isinstance(getattr(err.__context__, "__cause__", None), TimeoutError) + if logging_started or not is_timeout: + raise + if logging_finished or job_finished: + break + job_status = ( + get_session() + .get( + f"https://huggingface.co/api/jobs/{namespace}/{job_id}", + headers=self._build_hf_headers(token=token), + ) + .json() + ) + if "status" in job_status and job_status["status"]["stage"] not in ("RUNNING", "UPDATING"): + job_finished = True + + def list_jobs( + self, + *, + timeout: Optional[int] = None, + namespace: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> List[JobInfo]: + """ + List compute Jobs on Hugging Face infrastructure. + + Args: + timeout (`float`, *optional*): + Whether to set a timeout for the request to the Hub. + + namespace (`str`, *optional*): + The namespace from where it lists the jobs. Defaults to the current user's namespace. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + """ + if namespace is None: + namespace = whoami(token=token)["name"] + response = get_session().get( + f"{self.endpoint}/api/jobs/{namespace}", + headers=self._build_hf_headers(token=token), + timeout=timeout, + ) + response.raise_for_status() + return [JobInfo(**job_info, endpoint=self.endpoint) for job_info in response.json()] + + def inspect_job( + self, + *, + job_id: str, + namespace: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> JobInfo: + """ + Inspect a compute Job on Hugging Face infrastructure. + + Args: + job_id (`str`): + ID of the Job. + + namespace (`str`, *optional*): + The namespace where the Job is running. Defaults to the current user's namespace. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + + Example: + + ```python + >>> from huggingface_hub import inspect_job, run_job + >>> job = run_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"]) + >>> inspect_job(job.id) + JobInfo( + id='68780d00bbe36d38803f645f', + created_at=datetime.datetime(2025, 7, 16, 20, 35, 12, 808000, tzinfo=datetime.timezone.utc), + docker_image='python:3.12', + space_id=None, + command=['python', '-c', "print('Hello from HF compute!')"], + arguments=[], + environment={}, + secrets={}, + flavor='cpu-basic', + status=JobStatus(stage='RUNNING', message=None) + ) + ``` + """ + if namespace is None: + namespace = self.whoami(token=token)["name"] + response = get_session().get( + f"{self.endpoint}/api/jobs/{namespace}/{job_id}", + headers=self._build_hf_headers(token=token), + ) + response.raise_for_status() + return JobInfo(**response.json(), endpoint=self.endpoint) + + def cancel_job( + self, + *, + job_id: str, + namespace: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> None: + """ + Cancel a compute Job on Hugging Face infrastructure. + + Args: + job_id (`str`): + ID of the Job. + + namespace (`str`, *optional*): + The namespace where the Job is running. Defaults to the current user's namespace. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + """ + if namespace is None: + namespace = self.whoami(token=token)["name"] + get_session().post( + f"{self.endpoint}/api/jobs/{namespace}/{job_id}/cancel", + headers=self._build_hf_headers(token=token), + ).raise_for_status() + + @experimental + def run_uv_job( + self, + script: str, + *, + script_args: Optional[List[str]] = None, + dependencies: Optional[List[str]] = None, + python: Optional[str] = None, + image: Optional[str] = None, + env: Optional[Dict[str, Any]] = None, + secrets: Optional[Dict[str, Any]] = None, + flavor: Optional[SpaceHardware] = None, + timeout: Optional[Union[int, float, str]] = None, + namespace: Optional[str] = None, + token: Union[bool, str, None] = None, + _repo: Optional[str] = None, + ) -> JobInfo: + """ + Run a UV script Job on Hugging Face infrastructure. + + Args: + script (`str`): + Path or URL of the UV script, or a command. + + script_args (`List[str]`, *optional*) + Arguments to pass to the script or command. + + dependencies (`List[str]`, *optional*) + Dependencies to use to run the UV script. + + python (`str`, *optional*) + Use a specific Python version. Default is 3.12. + + image (`str`, *optional*, defaults to "ghcr.io/astral-sh/uv:python3.12-bookworm"): + Use a custom Docker image with `uv` installed. + + env (`Dict[str, Any]`, *optional*): + Defines the environment variables for the Job. + + secrets (`Dict[str, Any]`, *optional*): + Defines the secret environment variables for the Job. + + flavor (`str`, *optional*): + Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values. + Defaults to `"cpu-basic"`. + + timeout (`Union[int, float, str]`, *optional*): + Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days). + Example: `300` or `"5m"` for 5 minutes. + + namespace (`str`, *optional*): + The namespace where the Job will be created. Defaults to the current user's namespace. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + + Example: + + Run a script from a URL: + + ```python + >>> from huggingface_hub import run_uv_job + >>> script = "https://raw.githubusercontent.com/huggingface/trl/refs/heads/main/trl/scripts/sft.py" + >>> script_args = ["--model_name_or_path", "Qwen/Qwen2-0.5B", "--dataset_name", "trl-lib/Capybara", "--push_to_hub"] + >>> run_uv_job(script, script_args=script_args, dependencies=["trl"], flavor="a10g-small") + ``` + + Run a local script: + + ```python + >>> from huggingface_hub import run_uv_job + >>> script = "my_sft.py" + >>> script_args = ["--model_name_or_path", "Qwen/Qwen2-0.5B", "--dataset_name", "trl-lib/Capybara", "--push_to_hub"] + >>> run_uv_job(script, script_args=script_args, dependencies=["trl"], flavor="a10g-small") + ``` + + Run a command: + + ```python + >>> from huggingface_hub import run_uv_job + >>> script = "lighteval" + >>> script_args= ["endpoint", "inference-providers", "model_name=openai/gpt-oss-20b,provider=auto", "lighteval|gsm8k|0|0"] + >>> run_uv_job(script, script_args=script_args, dependencies=["lighteval"], flavor="a10g-small") + ``` + """ + image = image or "ghcr.io/astral-sh/uv:python3.12-bookworm" + env = env or {} + secrets = secrets or {} + + # Build command + command, env, secrets = self._create_uv_command_env_and_secrets( + script=script, + script_args=script_args, + dependencies=dependencies, + python=python, + env=env, + secrets=secrets, + namespace=namespace, + token=token, + _repo=_repo, + ) + # Create RunCommand args + return self.run_job( + image=image, + command=command, + env=env, + secrets=secrets, + flavor=flavor, + timeout=timeout, + namespace=namespace, + token=token, + ) + + def create_scheduled_job( + self, + *, + image: str, + command: List[str], + schedule: str, + suspend: Optional[bool] = None, + concurrency: Optional[bool] = None, + env: Optional[Dict[str, Any]] = None, + secrets: Optional[Dict[str, Any]] = None, + flavor: Optional[SpaceHardware] = None, + timeout: Optional[Union[int, float, str]] = None, + namespace: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> ScheduledJobInfo: + """ + Create scheduled compute Jobs on Hugging Face infrastructure. + + Args: + image (`str`): + The Docker image to use. + Examples: `"ubuntu"`, `"python:3.12"`, `"pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel"`. + Example with an image from a Space: `"hf.co/spaces/lhoestq/duckdb"`. + + command (`List[str]`): + The command to run. Example: `["echo", "hello"]`. + + schedule (`str`): + One of "@annually", "@yearly", "@monthly", "@weekly", "@daily", "@hourly", or a + CRON schedule expression (e.g., '0 9 * * 1' for 9 AM every Monday). + + suspend (`bool`, *optional*): + If True, the scheduled Job is suspended (paused). Defaults to False. + + concurrency (`bool`, *optional*): + If True, multiple instances of this Job can run concurrently. Defaults to False. + + env (`Dict[str, Any]`, *optional*): + Defines the environment variables for the Job. + + secrets (`Dict[str, Any]`, *optional*): + Defines the secret environment variables for the Job. + + flavor (`str`, *optional*): + Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values. + Defaults to `"cpu-basic"`. + + timeout (`Union[int, float, str]`, *optional*): + Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days). + Example: `300` or `"5m"` for 5 minutes. + + namespace (`str`, *optional*): + The namespace where the Job will be created. Defaults to the current user's namespace. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + + Example: + Create your first scheduled Job: + + ```python + >>> from huggingface_hub import create_scheduled_job + >>> create_scheduled_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"], schedule="@hourly") + ``` + + Use a CRON schedule expression: + + ```python + >>> from huggingface_hub import create_scheduled_job + >>> create_scheduled_job(image="python:3.12", command=["python", "-c" ,"print('this runs every 5min')"], schedule="*/5 * * * *") + ``` + + Create a scheduled GPU Job: + + ```python + >>> from huggingface_hub import create_scheduled_job + >>> image = "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel" + >>> command = ["python", "-c", "import torch; print(f"This code ran with the following GPU: {torch.cuda.get_device_name()}")"] + >>> create_scheduled_job(image, command, flavor="a10g-small", schedule="@hourly") + ``` + + """ + if namespace is None: + namespace = self.whoami(token=token)["name"] + + # prepare payload to send to HF Jobs API + job_spec = _create_job_spec( + image=image, + command=command, + env=env, + secrets=secrets, + flavor=flavor, + timeout=timeout, + ) + input_json: Dict[str, Any] = { + "jobSpec": job_spec, + "schedule": schedule, + } + if concurrency is not None: + input_json["concurrency"] = concurrency + if suspend is not None: + input_json["suspend"] = suspend + response = get_session().post( + f"https://huggingface.co/api/scheduled-jobs/{namespace}", + json=input_json, + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + scheduled_job_info = response.json() + return ScheduledJobInfo(**scheduled_job_info) + + def list_scheduled_jobs( + self, + *, + timeout: Optional[int] = None, + namespace: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> List[ScheduledJobInfo]: + """ + List scheduled compute Jobs on Hugging Face infrastructure. + + Args: + timeout (`float`, *optional*): + Whether to set a timeout for the request to the Hub. + + namespace (`str`, *optional*): + The namespace from where it lists the jobs. Defaults to the current user's namespace. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + """ + if namespace is None: + namespace = self.whoami(token=token)["name"] + response = get_session().get( + f"{self.endpoint}/api/scheduled-jobs/{namespace}", + headers=self._build_hf_headers(token=token), + timeout=timeout, + ) + hf_raise_for_status(response) + return [ScheduledJobInfo(**scheduled_job_info) for scheduled_job_info in response.json()] + + def inspect_scheduled_job( + self, + *, + scheduled_job_id: str, + namespace: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> ScheduledJobInfo: + """ + Inspect a scheduled compute Job on Hugging Face infrastructure. + + Args: + scheduled_job_id (`str`): + ID of the scheduled Job. + + namespace (`str`, *optional*): + The namespace where the scheduled Job is. Defaults to the current user's namespace. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + + Example: + + ```python + >>> from huggingface_hub import inspect_job, create_scheduled_job + >>> scheduled_job = create_scheduled_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"], schedule="@hourly") + >>> inspect_scheduled_job(scheduled_job.id) + ``` + """ + if namespace is None: + namespace = self.whoami(token=token)["name"] + response = get_session().get( + f"{self.endpoint}/api/scheduled-jobs/{namespace}/{scheduled_job_id}", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + return ScheduledJobInfo(**response.json()) + + def delete_scheduled_job( + self, + *, + scheduled_job_id: str, + namespace: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> None: + """ + Delete a scheduled compute Job on Hugging Face infrastructure. + + Args: + scheduled_job_id (`str`): + ID of the scheduled Job. + + namespace (`str`, *optional*): + The namespace where the scheduled Job is. Defaults to the current user's namespace. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + """ + if namespace is None: + namespace = self.whoami(token=token)["name"] + response = get_session().delete( + f"{self.endpoint}/api/scheduled-jobs/{namespace}/{scheduled_job_id}", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + + def suspend_scheduled_job( + self, + *, + scheduled_job_id: str, + namespace: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> None: + """ + Suspend (pause) a scheduled compute Job on Hugging Face infrastructure. + + Args: + scheduled_job_id (`str`): + ID of the scheduled Job. + + namespace (`str`, *optional*): + The namespace where the scheduled Job is. Defaults to the current user's namespace. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + """ + if namespace is None: + namespace = self.whoami(token=token)["name"] + get_session().post( + f"{self.endpoint}/api/scheduled-jobs/{namespace}/{scheduled_job_id}/suspend", + headers=self._build_hf_headers(token=token), + ).raise_for_status() + + def resume_scheduled_job( + self, + *, + scheduled_job_id: str, + namespace: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> None: + """ + Resume (unpause) a scheduled compute Job on Hugging Face infrastructure. + + Args: + scheduled_job_id (`str`): + ID of the scheduled Job. + + namespace (`str`, *optional*): + The namespace where the scheduled Job is. Defaults to the current user's namespace. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + """ + if namespace is None: + namespace = self.whoami(token=token)["name"] + get_session().post( + f"{self.endpoint}/api/scheduled-jobs/{namespace}/{scheduled_job_id}/resume", + headers=self._build_hf_headers(token=token), + ).raise_for_status() + + @experimental + def create_scheduled_uv_job( + self, + script: str, + *, + script_args: Optional[List[str]] = None, + schedule: str, + suspend: Optional[bool] = None, + concurrency: Optional[bool] = None, + dependencies: Optional[List[str]] = None, + python: Optional[str] = None, + image: Optional[str] = None, + env: Optional[Dict[str, Any]] = None, + secrets: Optional[Dict[str, Any]] = None, + flavor: Optional[SpaceHardware] = None, + timeout: Optional[Union[int, float, str]] = None, + namespace: Optional[str] = None, + token: Union[bool, str, None] = None, + _repo: Optional[str] = None, + ) -> ScheduledJobInfo: + """ + Run a UV script Job on Hugging Face infrastructure. + + Args: + script (`str`): + Path or URL of the UV script, or a command. + + script_args (`List[str]`, *optional*) + Arguments to pass to the script, or a command. + + schedule (`str`): + One of "@annually", "@yearly", "@monthly", "@weekly", "@daily", "@hourly", or a + CRON schedule expression (e.g., '0 9 * * 1' for 9 AM every Monday). + + suspend (`bool`, *optional*): + If True, the scheduled Job is suspended (paused). Defaults to False. + + concurrency (`bool`, *optional*): + If True, multiple instances of this Job can run concurrently. Defaults to False. + + dependencies (`List[str]`, *optional*) + Dependencies to use to run the UV script. + + python (`str`, *optional*) + Use a specific Python version. Default is 3.12. + + image (`str`, *optional*, defaults to "ghcr.io/astral-sh/uv:python3.12-bookworm"): + Use a custom Docker image with `uv` installed. + + env (`Dict[str, Any]`, *optional*): + Defines the environment variables for the Job. + + secrets (`Dict[str, Any]`, *optional*): + Defines the secret environment variables for the Job. + + flavor (`str`, *optional*): + Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values. + Defaults to `"cpu-basic"`. + + timeout (`Union[int, float, str]`, *optional*): + Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days). + Example: `300` or `"5m"` for 5 minutes. + + namespace (`str`, *optional*): + The namespace where the Job will be created. Defaults to the current user's namespace. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + + Example: + + Schedule a script from a URL: + + ```python + >>> from huggingface_hub import create_scheduled_uv_job + >>> script = "https://raw.githubusercontent.com/huggingface/trl/refs/heads/main/trl/scripts/sft.py" + >>> script_args = ["--model_name_or_path", "Qwen/Qwen2-0.5B", "--dataset_name", "trl-lib/Capybara", "--push_to_hub"] + >>> create_scheduled_uv_job(script, script_args=script_args, dependencies=["trl"], flavor="a10g-small", schedule="@weekly") + ``` + + Schedule a local script: + + ```python + >>> from huggingface_hub import create_scheduled_uv_job + >>> script = "my_sft.py" + >>> script_args = ["--model_name_or_path", "Qwen/Qwen2-0.5B", "--dataset_name", "trl-lib/Capybara", "--push_to_hub"] + >>> create_scheduled_uv_job(script, script_args=script_args, dependencies=["trl"], flavor="a10g-small", schedule="@weekly") + ``` + + Schedule a command: + + ```python + >>> from huggingface_hub import create_scheduled_uv_job + >>> script = "lighteval" + >>> script_args= ["endpoint", "inference-providers", "model_name=openai/gpt-oss-20b,provider=auto", "lighteval|gsm8k|0|0"] + >>> create_scheduled_uv_job(script, script_args=script_args, dependencies=["lighteval"], flavor="a10g-small", schedule="@weekly") + ``` + """ + image = image or "ghcr.io/astral-sh/uv:python3.12-bookworm" + # Build command + command, env, secrets = self._create_uv_command_env_and_secrets( + script=script, + script_args=script_args, + dependencies=dependencies, + python=python, + env=env, + secrets=secrets, + namespace=namespace, + token=token, + _repo=_repo, + ) + # Create RunCommand args + return self.create_scheduled_job( + image=image, + command=command, + schedule=schedule, + suspend=suspend, + concurrency=concurrency, + env=env, + secrets=secrets, + flavor=flavor, + timeout=timeout, + namespace=namespace, + token=token, + ) + + def _create_uv_command_env_and_secrets( + self, + *, + script: str, + script_args: Optional[List[str]], + dependencies: Optional[List[str]], + python: Optional[str], + env: Optional[Dict[str, Any]], + secrets: Optional[Dict[str, Any]], + namespace: Optional[str], + token: Union[bool, str, None], + _repo: Optional[str], + ) -> Tuple[List[str], Dict[str, Any], Dict[str, Any]]: + env = env or {} + secrets = secrets or {} + + # Build command + uv_args = [] + if dependencies: + for dependency in dependencies: + uv_args += ["--with", dependency] + if python: + uv_args += ["--python", python] + script_args = script_args or [] + + if namespace is None: + namespace = self.whoami(token=token)["name"] + + is_url = script.startswith("http://") or script.startswith("https://") + if is_url or not Path(script).is_file(): + # Direct URL execution or command - no upload needed + command = ["uv", "run"] + uv_args + [script] + script_args + else: + # Local file - upload to HF + script_path = Path(script) + filename = script_path.name + # Parse repo + if _repo: + repo_id = _repo + if "/" not in repo_id: + repo_id = f"{namespace}/{repo_id}" + else: + repo_id = f"{namespace}/hf-cli-jobs-uv-run-scripts" + + # Create repo if needed + try: + self.repo_info(repo_id, repo_type="dataset") + logger.debug(f"Using existing repository: {repo_id}") + except RepositoryNotFoundError: + logger.info(f"Creating repository: {repo_id}") + create_repo(repo_id, repo_type="dataset", private=True, exist_ok=True) + + # Upload script + logger.info(f"Uploading {script_path.name} to {repo_id}...") + with open(script_path, "r") as f: + script_content = f.read() + + commit_hash = self.upload_file( + path_or_fileobj=script_content.encode(), + path_in_repo=filename, + repo_id=repo_id, + repo_type="dataset", + ).oid + + script_url = f"{self.endpoint}/datasets/{repo_id}/resolve/{commit_hash}/{filename}" + repo_url = f"{self.endpoint}/datasets/{repo_id}" + + logger.debug(f"✓ Script uploaded to: {repo_url}/blob/main/{filename}") + + # Create and upload minimal README + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC") + readme_content = dedent( + f""" + --- + tags: + - hf-cli-jobs-uv-script + - ephemeral + viewer: false + --- + + # UV Script: {filename} + + Executed via `hf jobs uv run` on {timestamp} + + ## Run this script + + ```bash + hf jobs uv run {filename} + ``` + + --- + *Created with [hf jobs](https://huggingface.co/docs/huggingface_hub/main/en/guides/jobs)* + """ + ) + self.upload_file( + path_or_fileobj=readme_content.encode(), + path_in_repo="README.md", + repo_id=repo_id, + repo_type="dataset", + ) + + secrets["UV_SCRIPT_HF_TOKEN"] = token or self.token or get_token() + env["UV_SCRIPT_URL"] = script_url + + pre_command = ( + dedent( + """ + import urllib.request + import os + from pathlib import Path + o = urllib.request.build_opener() + o.addheaders = [("Authorization", "Bearer " + os.environ["UV_SCRIPT_HF_TOKEN"])] + Path("/tmp/script.py").write_bytes(o.open(os.environ["UV_SCRIPT_URL"]).read()) + """ + ) + .strip() + .replace('"', r"\"") + .split("\n") + ) + pre_command = ["python", "-c", '"' + "; ".join(pre_command) + '"'] + command = ["uv", "run"] + uv_args + ["/tmp/script.py"] + script_args + command = ["bash", "-c", " ".join(pre_command) + " && " + " ".join(command)] + return command, env, secrets + + +def _parse_revision_from_pr_url(pr_url: str) -> str: + """Safely parse revision number from a PR url. + + Example: + ```py + >>> _parse_revision_from_pr_url("https://huggingface.co/bigscience/bloom/discussions/2") + "refs/pr/2" + ``` + """ + re_match = re.match(_REGEX_DISCUSSION_URL, pr_url) + if re_match is None: + raise RuntimeError(f"Unexpected response from the hub, expected a Pull Request URL but got: '{pr_url}'") + return f"refs/pr/{re_match[1]}" + + +api = HfApi() + +whoami = api.whoami +auth_check = api.auth_check +get_token_permission = api.get_token_permission + +list_models = api.list_models +model_info = api.model_info + +list_datasets = api.list_datasets +dataset_info = api.dataset_info + +list_spaces = api.list_spaces +space_info = api.space_info + +list_papers = api.list_papers +paper_info = api.paper_info + +repo_exists = api.repo_exists +revision_exists = api.revision_exists +file_exists = api.file_exists +repo_info = api.repo_info +list_repo_files = api.list_repo_files +list_repo_refs = api.list_repo_refs +list_repo_commits = api.list_repo_commits +list_repo_tree = api.list_repo_tree +get_paths_info = api.get_paths_info + +get_model_tags = api.get_model_tags +get_dataset_tags = api.get_dataset_tags + +create_commit = api.create_commit +create_repo = api.create_repo +delete_repo = api.delete_repo +update_repo_visibility = api.update_repo_visibility +update_repo_settings = api.update_repo_settings +move_repo = api.move_repo +upload_file = api.upload_file +upload_folder = api.upload_folder +delete_file = api.delete_file +delete_folder = api.delete_folder +delete_files = api.delete_files +upload_large_folder = api.upload_large_folder +preupload_lfs_files = api.preupload_lfs_files +create_branch = api.create_branch +delete_branch = api.delete_branch +create_tag = api.create_tag +delete_tag = api.delete_tag +get_full_repo_name = api.get_full_repo_name + +# Danger-zone API +super_squash_history = api.super_squash_history +list_lfs_files = api.list_lfs_files +permanently_delete_lfs_files = api.permanently_delete_lfs_files + +# Safetensors helpers +get_safetensors_metadata = api.get_safetensors_metadata +parse_safetensors_file_metadata = api.parse_safetensors_file_metadata + +# Background jobs +run_as_future = api.run_as_future + +# Activity API +list_liked_repos = api.list_liked_repos +list_repo_likers = api.list_repo_likers +unlike = api.unlike + +# Community API +get_discussion_details = api.get_discussion_details +get_repo_discussions = api.get_repo_discussions +create_discussion = api.create_discussion +create_pull_request = api.create_pull_request +change_discussion_status = api.change_discussion_status +comment_discussion = api.comment_discussion +edit_discussion_comment = api.edit_discussion_comment +rename_discussion = api.rename_discussion +merge_pull_request = api.merge_pull_request + +# Space API +add_space_secret = api.add_space_secret +delete_space_secret = api.delete_space_secret +get_space_variables = api.get_space_variables +add_space_variable = api.add_space_variable +delete_space_variable = api.delete_space_variable +get_space_runtime = api.get_space_runtime +request_space_hardware = api.request_space_hardware +set_space_sleep_time = api.set_space_sleep_time +pause_space = api.pause_space +restart_space = api.restart_space +duplicate_space = api.duplicate_space +request_space_storage = api.request_space_storage +delete_space_storage = api.delete_space_storage + +# Inference Endpoint API +list_inference_endpoints = api.list_inference_endpoints +create_inference_endpoint = api.create_inference_endpoint +get_inference_endpoint = api.get_inference_endpoint +update_inference_endpoint = api.update_inference_endpoint +delete_inference_endpoint = api.delete_inference_endpoint +pause_inference_endpoint = api.pause_inference_endpoint +resume_inference_endpoint = api.resume_inference_endpoint +scale_to_zero_inference_endpoint = api.scale_to_zero_inference_endpoint +create_inference_endpoint_from_catalog = api.create_inference_endpoint_from_catalog +list_inference_catalog = api.list_inference_catalog + +# Collections API +get_collection = api.get_collection +list_collections = api.list_collections +create_collection = api.create_collection +update_collection_metadata = api.update_collection_metadata +delete_collection = api.delete_collection +add_collection_item = api.add_collection_item +update_collection_item = api.update_collection_item +delete_collection_item = api.delete_collection_item +delete_collection_item = api.delete_collection_item + +# Access requests API +list_pending_access_requests = api.list_pending_access_requests +list_accepted_access_requests = api.list_accepted_access_requests +list_rejected_access_requests = api.list_rejected_access_requests +cancel_access_request = api.cancel_access_request +accept_access_request = api.accept_access_request +reject_access_request = api.reject_access_request +grant_access = api.grant_access + +# Webhooks API +create_webhook = api.create_webhook +disable_webhook = api.disable_webhook +delete_webhook = api.delete_webhook +enable_webhook = api.enable_webhook +get_webhook = api.get_webhook +list_webhooks = api.list_webhooks +update_webhook = api.update_webhook + + +# User API +get_user_overview = api.get_user_overview +get_organization_overview = api.get_organization_overview +list_organization_members = api.list_organization_members +list_user_followers = api.list_user_followers +list_user_following = api.list_user_following + +# Jobs API +run_job = api.run_job +fetch_job_logs = api.fetch_job_logs +list_jobs = api.list_jobs +inspect_job = api.inspect_job +cancel_job = api.cancel_job +run_uv_job = api.run_uv_job +create_scheduled_job = api.create_scheduled_job +list_scheduled_jobs = api.list_scheduled_jobs +inspect_scheduled_job = api.inspect_scheduled_job +delete_scheduled_job = api.delete_scheduled_job +suspend_scheduled_job = api.suspend_scheduled_job +resume_scheduled_job = api.resume_scheduled_job +create_scheduled_uv_job = api.create_scheduled_uv_job diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/hf_file_system.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/hf_file_system.py new file mode 100644 index 0000000000000000000000000000000000000000..a29d38a92ee4ddc9348e2575769ca36de6ceab08 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/hf_file_system.py @@ -0,0 +1,1150 @@ +import os +import re +import tempfile +from collections import deque +from dataclasses import dataclass, field +from datetime import datetime +from itertools import chain +from pathlib import Path +from typing import Any, Dict, Iterator, List, NoReturn, Optional, Tuple, Union +from urllib.parse import quote, unquote + +import fsspec +from fsspec.callbacks import _DEFAULT_CALLBACK, NoOpCallback, TqdmCallback +from fsspec.utils import isfilelike +from requests import Response + +from . import constants +from ._commit_api import CommitOperationCopy, CommitOperationDelete +from .errors import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError +from .file_download import hf_hub_url, http_get +from .hf_api import HfApi, LastCommitInfo, RepoFile +from .utils import HFValidationError, hf_raise_for_status, http_backoff + + +# Regex used to match special revisions with "/" in them (see #1710) +SPECIAL_REFS_REVISION_REGEX = re.compile( + r""" + (^refs\/convert\/\w+) # `refs/convert/parquet` revisions + | + (^refs\/pr\/\d+) # PR revisions + """, + re.VERBOSE, +) + + +@dataclass +class HfFileSystemResolvedPath: + """Data structure containing information about a resolved Hugging Face file system path.""" + + repo_type: str + repo_id: str + revision: str + path_in_repo: str + # The part placed after '@' in the initial path. It can be a quoted or unquoted refs revision. + # Used to reconstruct the unresolved path to return to the user. + _raw_revision: Optional[str] = field(default=None, repr=False) + + def unresolve(self) -> str: + repo_path = constants.REPO_TYPES_URL_PREFIXES.get(self.repo_type, "") + self.repo_id + if self._raw_revision: + return f"{repo_path}@{self._raw_revision}/{self.path_in_repo}".rstrip("/") + elif self.revision != constants.DEFAULT_REVISION: + return f"{repo_path}@{safe_revision(self.revision)}/{self.path_in_repo}".rstrip("/") + else: + return f"{repo_path}/{self.path_in_repo}".rstrip("/") + + +class HfFileSystem(fsspec.AbstractFileSystem): + """ + Access a remote Hugging Face Hub repository as if were a local file system. + + > [!WARNING] + > [`HfFileSystem`] provides fsspec compatibility, which is useful for libraries that require it (e.g., reading + > Hugging Face datasets directly with `pandas`). However, it introduces additional overhead due to this compatibility + > layer. For better performance and reliability, it's recommended to use `HfApi` methods when possible. + + Args: + token (`str` or `bool`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + endpoint (`str`, *optional*): + Endpoint of the Hub. Defaults to . + Usage: + + ```python + >>> from huggingface_hub import HfFileSystem + + >>> fs = HfFileSystem() + + >>> # List files + >>> fs.glob("my-username/my-model/*.bin") + ['my-username/my-model/pytorch_model.bin'] + >>> fs.ls("datasets/my-username/my-dataset", detail=False) + ['datasets/my-username/my-dataset/.gitattributes', 'datasets/my-username/my-dataset/README.md', 'datasets/my-username/my-dataset/data.json'] + + >>> # Read/write files + >>> with fs.open("my-username/my-model/pytorch_model.bin") as f: + ... data = f.read() + >>> with fs.open("my-username/my-model/pytorch_model.bin", "wb") as f: + ... f.write(data) + ``` + """ + + root_marker = "" + protocol = "hf" + + def __init__( + self, + *args, + endpoint: Optional[str] = None, + token: Union[bool, str, None] = None, + block_size: Optional[int] = None, + **storage_options, + ): + super().__init__(*args, **storage_options) + self.endpoint = endpoint or constants.ENDPOINT + self.token = token + self._api = HfApi(endpoint=endpoint, token=token) + self.block_size = block_size + # Maps (repo_type, repo_id, revision) to a 2-tuple with: + # * the 1st element indicating whether the repositoy and the revision exist + # * the 2nd element being the exception raised if the repository or revision doesn't exist + self._repo_and_revision_exists_cache: Dict[ + Tuple[str, str, Optional[str]], Tuple[bool, Optional[Exception]] + ] = {} + # Maps parent directory path to path infos + self.dircache: Dict[str, List[Dict[str, Any]]] = {} + + def _repo_and_revision_exist( + self, repo_type: str, repo_id: str, revision: Optional[str] + ) -> Tuple[bool, Optional[Exception]]: + if (repo_type, repo_id, revision) not in self._repo_and_revision_exists_cache: + try: + self._api.repo_info( + repo_id, revision=revision, repo_type=repo_type, timeout=constants.HF_HUB_ETAG_TIMEOUT + ) + except (RepositoryNotFoundError, HFValidationError) as e: + self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)] = False, e + self._repo_and_revision_exists_cache[(repo_type, repo_id, None)] = False, e + except RevisionNotFoundError as e: + self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)] = False, e + self._repo_and_revision_exists_cache[(repo_type, repo_id, None)] = True, None + else: + self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)] = True, None + self._repo_and_revision_exists_cache[(repo_type, repo_id, None)] = True, None + return self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)] + + def resolve_path(self, path: str, revision: Optional[str] = None) -> HfFileSystemResolvedPath: + """ + Resolve a Hugging Face file system path into its components. + + Args: + path (`str`): + Path to resolve. + revision (`str`, *optional*): + The revision of the repo to resolve. Defaults to the revision specified in the path. + + Returns: + [`HfFileSystemResolvedPath`]: Resolved path information containing `repo_type`, `repo_id`, `revision` and `path_in_repo`. + + Raises: + `ValueError`: + If path contains conflicting revision information. + `NotImplementedError`: + If trying to list repositories. + """ + + def _align_revision_in_path_with_revision( + revision_in_path: Optional[str], revision: Optional[str] + ) -> Optional[str]: + if revision is not None: + if revision_in_path is not None and revision_in_path != revision: + raise ValueError( + f'Revision specified in path ("{revision_in_path}") and in `revision` argument ("{revision}")' + " are not the same." + ) + else: + revision = revision_in_path + return revision + + path = self._strip_protocol(path) + if not path: + # can't list repositories at root + raise NotImplementedError("Access to repositories lists is not implemented.") + elif path.split("/")[0] + "/" in constants.REPO_TYPES_URL_PREFIXES.values(): + if "/" not in path: + # can't list repositories at the repository type level + raise NotImplementedError("Access to repositories lists is not implemented.") + repo_type, path = path.split("/", 1) + repo_type = constants.REPO_TYPES_MAPPING[repo_type] + else: + repo_type = constants.REPO_TYPE_MODEL + if path.count("/") > 0: + if "@" in path: + repo_id, revision_in_path = path.split("@", 1) + if "/" in revision_in_path: + match = SPECIAL_REFS_REVISION_REGEX.search(revision_in_path) + if match is not None and revision in (None, match.group()): + # Handle `refs/convert/parquet` and PR revisions separately + path_in_repo = SPECIAL_REFS_REVISION_REGEX.sub("", revision_in_path).lstrip("/") + revision_in_path = match.group() + else: + revision_in_path, path_in_repo = revision_in_path.split("/", 1) + else: + path_in_repo = "" + revision = _align_revision_in_path_with_revision(unquote(revision_in_path), revision) + repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision) + if not repo_and_revision_exist: + _raise_file_not_found(path, err) + else: + revision_in_path = None + repo_id_with_namespace = "/".join(path.split("/")[:2]) + path_in_repo_with_namespace = "/".join(path.split("/")[2:]) + repo_id_without_namespace = path.split("/")[0] + path_in_repo_without_namespace = "/".join(path.split("/")[1:]) + repo_id = repo_id_with_namespace + path_in_repo = path_in_repo_with_namespace + repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision) + if not repo_and_revision_exist: + if isinstance(err, (RepositoryNotFoundError, HFValidationError)): + repo_id = repo_id_without_namespace + path_in_repo = path_in_repo_without_namespace + repo_and_revision_exist, _ = self._repo_and_revision_exist(repo_type, repo_id, revision) + if not repo_and_revision_exist: + _raise_file_not_found(path, err) + else: + _raise_file_not_found(path, err) + else: + repo_id = path + path_in_repo = "" + if "@" in path: + repo_id, revision_in_path = path.split("@", 1) + revision = _align_revision_in_path_with_revision(unquote(revision_in_path), revision) + else: + revision_in_path = None + repo_and_revision_exist, _ = self._repo_and_revision_exist(repo_type, repo_id, revision) + if not repo_and_revision_exist: + raise NotImplementedError("Access to repositories lists is not implemented.") + + revision = revision if revision is not None else constants.DEFAULT_REVISION + return HfFileSystemResolvedPath(repo_type, repo_id, revision, path_in_repo, _raw_revision=revision_in_path) + + def invalidate_cache(self, path: Optional[str] = None) -> None: + """ + Clear the cache for a given path. + + For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.invalidate_cache). + + Args: + path (`str`, *optional*): + Path to clear from cache. If not provided, clear the entire cache. + + """ + if not path: + self.dircache.clear() + self._repo_and_revision_exists_cache.clear() + else: + resolved_path = self.resolve_path(path) + path = resolved_path.unresolve() + while path: + self.dircache.pop(path, None) + path = self._parent(path) + + # Only clear repo cache if path is to repo root + if not resolved_path.path_in_repo: + self._repo_and_revision_exists_cache.pop((resolved_path.repo_type, resolved_path.repo_id, None), None) + self._repo_and_revision_exists_cache.pop( + (resolved_path.repo_type, resolved_path.repo_id, resolved_path.revision), None + ) + + def _open( + self, + path: str, + mode: str = "rb", + revision: Optional[str] = None, + block_size: Optional[int] = None, + **kwargs, + ) -> "HfFileSystemFile": + block_size = block_size if block_size is not None else self.block_size + if block_size is not None: + kwargs["block_size"] = block_size + if "a" in mode: + raise NotImplementedError("Appending to remote files is not yet supported.") + if block_size == 0: + return HfFileSystemStreamFile(self, path, mode=mode, revision=revision, **kwargs) + else: + return HfFileSystemFile(self, path, mode=mode, revision=revision, **kwargs) + + def _rm(self, path: str, revision: Optional[str] = None, **kwargs) -> None: + resolved_path = self.resolve_path(path, revision=revision) + self._api.delete_file( + path_in_repo=resolved_path.path_in_repo, + repo_id=resolved_path.repo_id, + token=self.token, + repo_type=resolved_path.repo_type, + revision=resolved_path.revision, + commit_message=kwargs.get("commit_message"), + commit_description=kwargs.get("commit_description"), + ) + self.invalidate_cache(path=resolved_path.unresolve()) + + def rm( + self, + path: str, + recursive: bool = False, + maxdepth: Optional[int] = None, + revision: Optional[str] = None, + **kwargs, + ) -> None: + """ + Delete files from a repository. + + For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.rm). + + > [!WARNING] + > Note: When possible, use `HfApi.delete_file()` for better performance. + + Args: + path (`str`): + Path to delete. + recursive (`bool`, *optional*): + If True, delete directory and all its contents. Defaults to False. + maxdepth (`int`, *optional*): + Maximum number of subdirectories to visit when deleting recursively. + revision (`str`, *optional*): + The git revision to delete from. + + """ + resolved_path = self.resolve_path(path, revision=revision) + paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth, revision=revision) + paths_in_repo = [self.resolve_path(path).path_in_repo for path in paths if not self.isdir(path)] + operations = [CommitOperationDelete(path_in_repo=path_in_repo) for path_in_repo in paths_in_repo] + commit_message = f"Delete {path} " + commit_message += "recursively " if recursive else "" + commit_message += f"up to depth {maxdepth} " if maxdepth is not None else "" + # TODO: use `commit_description` to list all the deleted paths? + self._api.create_commit( + repo_id=resolved_path.repo_id, + repo_type=resolved_path.repo_type, + token=self.token, + operations=operations, + revision=resolved_path.revision, + commit_message=kwargs.get("commit_message", commit_message), + commit_description=kwargs.get("commit_description"), + ) + self.invalidate_cache(path=resolved_path.unresolve()) + + def ls( + self, path: str, detail: bool = True, refresh: bool = False, revision: Optional[str] = None, **kwargs + ) -> List[Union[str, Dict[str, Any]]]: + """ + List the contents of a directory. + + For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.ls). + + > [!WARNING] + > Note: When possible, use `HfApi.list_repo_tree()` for better performance. + + Args: + path (`str`): + Path to the directory. + detail (`bool`, *optional*): + If True, returns a list of dictionaries containing file information. If False, + returns a list of file paths. Defaults to True. + refresh (`bool`, *optional*): + If True, bypass the cache and fetch the latest data. Defaults to False. + revision (`str`, *optional*): + The git revision to list from. + + Returns: + `List[Union[str, Dict[str, Any]]]`: List of file paths (if detail=False) or list of file information + dictionaries (if detail=True). + """ + resolved_path = self.resolve_path(path, revision=revision) + path = resolved_path.unresolve() + try: + out = self._ls_tree(path, refresh=refresh, revision=revision, **kwargs) + except EntryNotFoundError: + # Path could be a file + if not resolved_path.path_in_repo: + _raise_file_not_found(path, None) + out = self._ls_tree(self._parent(path), refresh=refresh, revision=revision, **kwargs) + out = [o for o in out if o["name"] == path] + if len(out) == 0: + _raise_file_not_found(path, None) + return out if detail else [o["name"] for o in out] + + def _ls_tree( + self, + path: str, + recursive: bool = False, + refresh: bool = False, + revision: Optional[str] = None, + expand_info: bool = False, + maxdepth: Optional[int] = None, + ): + resolved_path = self.resolve_path(path, revision=revision) + path = resolved_path.unresolve() + root_path = HfFileSystemResolvedPath( + resolved_path.repo_type, + resolved_path.repo_id, + resolved_path.revision, + path_in_repo="", + _raw_revision=resolved_path._raw_revision, + ).unresolve() + + out = [] + if path in self.dircache and not refresh: + cached_path_infos = self.dircache[path] + out.extend(cached_path_infos) + dirs_not_in_dircache = [] + if recursive: + # Use BFS to traverse the cache and build the "recursive "output + # (The Hub uses a so-called "tree first" strategy for the tree endpoint but we sort the output to follow the spec so the result is (eventually) the same) + depth = 2 + dirs_to_visit = deque( + [(depth, path_info) for path_info in cached_path_infos if path_info["type"] == "directory"] + ) + while dirs_to_visit: + depth, dir_info = dirs_to_visit.popleft() + if maxdepth is None or depth <= maxdepth: + if dir_info["name"] not in self.dircache: + dirs_not_in_dircache.append(dir_info["name"]) + else: + cached_path_infos = self.dircache[dir_info["name"]] + out.extend(cached_path_infos) + dirs_to_visit.extend( + [ + (depth + 1, path_info) + for path_info in cached_path_infos + if path_info["type"] == "directory" + ] + ) + + dirs_not_expanded = [] + if expand_info: + # Check if there are directories with non-expanded entries + dirs_not_expanded = [self._parent(o["name"]) for o in out if o["last_commit"] is None] + + if (recursive and dirs_not_in_dircache) or (expand_info and dirs_not_expanded): + # If the dircache is incomplete, find the common path of the missing and non-expanded entries + # and extend the output with the result of `_ls_tree(common_path, recursive=True)` + common_prefix = os.path.commonprefix(dirs_not_in_dircache + dirs_not_expanded) + # Get the parent directory if the common prefix itself is not a directory + common_path = ( + common_prefix.rstrip("/") + if common_prefix.endswith("/") + or common_prefix == root_path + or common_prefix in chain(dirs_not_in_dircache, dirs_not_expanded) + else self._parent(common_prefix) + ) + if maxdepth is not None: + common_path_depth = common_path[len(path) :].count("/") + maxdepth -= common_path_depth + out = [o for o in out if not o["name"].startswith(common_path + "/")] + for cached_path in list(self.dircache): + if cached_path.startswith(common_path + "/"): + self.dircache.pop(cached_path, None) + self.dircache.pop(common_path, None) + out.extend( + self._ls_tree( + common_path, + recursive=recursive, + refresh=True, + revision=revision, + expand_info=expand_info, + maxdepth=maxdepth, + ) + ) + else: + tree = self._api.list_repo_tree( + resolved_path.repo_id, + resolved_path.path_in_repo, + recursive=recursive, + expand=expand_info, + revision=resolved_path.revision, + repo_type=resolved_path.repo_type, + ) + for path_info in tree: + cache_path = root_path + "/" + path_info.path + if isinstance(path_info, RepoFile): + cache_path_info = { + "name": cache_path, + "size": path_info.size, + "type": "file", + "blob_id": path_info.blob_id, + "lfs": path_info.lfs, + "last_commit": path_info.last_commit, + "security": path_info.security, + } + else: + cache_path_info = { + "name": cache_path, + "size": 0, + "type": "directory", + "tree_id": path_info.tree_id, + "last_commit": path_info.last_commit, + } + parent_path = self._parent(cache_path_info["name"]) + self.dircache.setdefault(parent_path, []).append(cache_path_info) + depth = cache_path[len(path) :].count("/") + if maxdepth is None or depth <= maxdepth: + out.append(cache_path_info) + return out + + def walk(self, path: str, *args, **kwargs) -> Iterator[Tuple[str, List[str], List[str]]]: + """ + Return all files below the given path. + + For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.walk). + + Args: + path (`str`): + Root path to list files from. + + Returns: + `Iterator[Tuple[str, List[str], List[str]]]`: An iterator of (path, list of directory names, list of file names) tuples. + """ + path = self.resolve_path(path, revision=kwargs.get("revision")).unresolve() + yield from super().walk(path, *args, **kwargs) + + def glob(self, path: str, **kwargs) -> List[str]: + """ + Find files by glob-matching. + + For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.glob). + + Args: + path (`str`): + Path pattern to match. + + Returns: + `List[str]`: List of paths matching the pattern. + """ + path = self.resolve_path(path, revision=kwargs.get("revision")).unresolve() + return super().glob(path, **kwargs) + + def find( + self, + path: str, + maxdepth: Optional[int] = None, + withdirs: bool = False, + detail: bool = False, + refresh: bool = False, + revision: Optional[str] = None, + **kwargs, + ) -> Union[List[str], Dict[str, Dict[str, Any]]]: + """ + List all files below path. + + For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.find). + + Args: + path (`str`): + Root path to list files from. + maxdepth (`int`, *optional*): + Maximum depth to descend into subdirectories. + withdirs (`bool`, *optional*): + Include directory paths in the output. Defaults to False. + detail (`bool`, *optional*): + If True, returns a dict mapping paths to file information. Defaults to False. + refresh (`bool`, *optional*): + If True, bypass the cache and fetch the latest data. Defaults to False. + revision (`str`, *optional*): + The git revision to list from. + + Returns: + `Union[List[str], Dict[str, Dict[str, Any]]]`: List of paths or dict of file information. + """ + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") + resolved_path = self.resolve_path(path, revision=revision) + path = resolved_path.unresolve() + try: + out = self._ls_tree( + path, recursive=True, refresh=refresh, revision=resolved_path.revision, maxdepth=maxdepth, **kwargs + ) + except EntryNotFoundError: + # Path could be a file + try: + if self.info(path, revision=revision, **kwargs)["type"] == "file": + out = {path: {}} + else: + out = {} + except FileNotFoundError: + out = {} + else: + if not withdirs: + out = [o for o in out if o["type"] != "directory"] + else: + # If `withdirs=True`, include the directory itself to be consistent with the spec + path_info = self.info(path, revision=resolved_path.revision, **kwargs) + out = [path_info] + out if path_info["type"] == "directory" else out + out = {o["name"]: o for o in out} + names = sorted(out) + if not detail: + return names + else: + return {name: out[name] for name in names} + + def cp_file(self, path1: str, path2: str, revision: Optional[str] = None, **kwargs) -> None: + """ + Copy a file within or between repositories. + + > [!WARNING] + > Note: When possible, use `HfApi.upload_file()` for better performance. + + Args: + path1 (`str`): + Source path to copy from. + path2 (`str`): + Destination path to copy to. + revision (`str`, *optional*): + The git revision to copy from. + + """ + resolved_path1 = self.resolve_path(path1, revision=revision) + resolved_path2 = self.resolve_path(path2, revision=revision) + + same_repo = ( + resolved_path1.repo_type == resolved_path2.repo_type and resolved_path1.repo_id == resolved_path2.repo_id + ) + + if same_repo: + commit_message = f"Copy {path1} to {path2}" + self._api.create_commit( + repo_id=resolved_path1.repo_id, + repo_type=resolved_path1.repo_type, + revision=resolved_path2.revision, + commit_message=kwargs.get("commit_message", commit_message), + commit_description=kwargs.get("commit_description", ""), + operations=[ + CommitOperationCopy( + src_path_in_repo=resolved_path1.path_in_repo, + path_in_repo=resolved_path2.path_in_repo, + src_revision=resolved_path1.revision, + ) + ], + ) + else: + with self.open(path1, "rb", revision=resolved_path1.revision) as f: + content = f.read() + commit_message = f"Copy {path1} to {path2}" + self._api.upload_file( + path_or_fileobj=content, + path_in_repo=resolved_path2.path_in_repo, + repo_id=resolved_path2.repo_id, + token=self.token, + repo_type=resolved_path2.repo_type, + revision=resolved_path2.revision, + commit_message=kwargs.get("commit_message", commit_message), + commit_description=kwargs.get("commit_description"), + ) + self.invalidate_cache(path=resolved_path1.unresolve()) + self.invalidate_cache(path=resolved_path2.unresolve()) + + def modified(self, path: str, **kwargs) -> datetime: + """ + Get the last modified time of a file. + + For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.modified). + + Args: + path (`str`): + Path to the file. + + Returns: + `datetime`: Last commit date of the file. + """ + info = self.info(path, **{**kwargs, "expand_info": True}) + return info["last_commit"]["date"] + + def info(self, path: str, refresh: bool = False, revision: Optional[str] = None, **kwargs) -> Dict[str, Any]: + """ + Get information about a file or directory. + + For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.info). + + > [!WARNING] + > Note: When possible, use `HfApi.get_paths_info()` or `HfApi.repo_info()` for better performance. + + Args: + path (`str`): + Path to get info for. + refresh (`bool`, *optional*): + If True, bypass the cache and fetch the latest data. Defaults to False. + revision (`str`, *optional*): + The git revision to get info from. + + Returns: + `Dict[str, Any]`: Dictionary containing file information (type, size, commit info, etc.). + + """ + resolved_path = self.resolve_path(path, revision=revision) + path = resolved_path.unresolve() + expand_info = kwargs.get( + "expand_info", False + ) # don't expose it as a parameter in the public API to follow the spec + if not resolved_path.path_in_repo: + # Path is the root directory + out = { + "name": path, + "size": 0, + "type": "directory", + "last_commit": None, + } + if expand_info: + last_commit = self._api.list_repo_commits( + resolved_path.repo_id, repo_type=resolved_path.repo_type, revision=resolved_path.revision + )[-1] + out = { + **out, + "tree_id": None, # TODO: tree_id of the root directory? + "last_commit": LastCommitInfo( + oid=last_commit.commit_id, title=last_commit.title, date=last_commit.created_at + ), + } + else: + out = None + parent_path = self._parent(path) + if not expand_info and parent_path not in self.dircache: + # Fill the cache with cheap call + self.ls(parent_path) + if parent_path in self.dircache: + # Check if the path is in the cache + out1 = [o for o in self.dircache[parent_path] if o["name"] == path] + if not out1: + _raise_file_not_found(path, None) + out = out1[0] + if refresh or out is None or (expand_info and out and out["last_commit"] is None): + paths_info = self._api.get_paths_info( + resolved_path.repo_id, + resolved_path.path_in_repo, + expand=expand_info, + revision=resolved_path.revision, + repo_type=resolved_path.repo_type, + ) + if not paths_info: + _raise_file_not_found(path, None) + path_info = paths_info[0] + root_path = HfFileSystemResolvedPath( + resolved_path.repo_type, + resolved_path.repo_id, + resolved_path.revision, + path_in_repo="", + _raw_revision=resolved_path._raw_revision, + ).unresolve() + if isinstance(path_info, RepoFile): + out = { + "name": root_path + "/" + path_info.path, + "size": path_info.size, + "type": "file", + "blob_id": path_info.blob_id, + "lfs": path_info.lfs, + "last_commit": path_info.last_commit, + "security": path_info.security, + } + else: + out = { + "name": root_path + "/" + path_info.path, + "size": 0, + "type": "directory", + "tree_id": path_info.tree_id, + "last_commit": path_info.last_commit, + } + if not expand_info: + out = {k: out[k] for k in ["name", "size", "type"]} + assert out is not None + return out + + def exists(self, path, **kwargs): + """ + Check if a file exists. + + For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.exists). + + > [!WARNING] + > Note: When possible, use `HfApi.file_exists()` for better performance. + + Args: + path (`str`): + Path to check. + + Returns: + `bool`: True if file exists, False otherwise. + """ + try: + if kwargs.get("refresh", False): + self.invalidate_cache(path) + + self.info(path, **kwargs) + return True + except: # noqa: E722 + return False + + def isdir(self, path): + """ + Check if a path is a directory. + + For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.isdir). + + Args: + path (`str`): + Path to check. + + Returns: + `bool`: True if path is a directory, False otherwise. + """ + try: + return self.info(path)["type"] == "directory" + except OSError: + return False + + def isfile(self, path): + """ + Check if a path is a file. + + For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.isfile). + + Args: + path (`str`): + Path to check. + + Returns: + `bool`: True if path is a file, False otherwise. + """ + try: + return self.info(path)["type"] == "file" + except: # noqa: E722 + return False + + def url(self, path: str) -> str: + """ + Get the HTTP URL of the given path. + + Args: + path (`str`): + Path to get URL for. + + Returns: + `str`: HTTP URL to access the file or directory on the Hub. + """ + resolved_path = self.resolve_path(path) + url = hf_hub_url( + resolved_path.repo_id, + resolved_path.path_in_repo, + repo_type=resolved_path.repo_type, + revision=resolved_path.revision, + endpoint=self.endpoint, + ) + if self.isdir(path): + url = url.replace("/resolve/", "/tree/", 1) + return url + + def get_file(self, rpath, lpath, callback=_DEFAULT_CALLBACK, outfile=None, **kwargs) -> None: + """ + Copy single remote file to local. + + > [!WARNING] + > Note: When possible, use `HfApi.hf_hub_download()` for better performance. + + Args: + rpath (`str`): + Remote path to download from. + lpath (`str`): + Local path to download to. + callback (`Callback`, *optional*): + Optional callback to track download progress. Defaults to no callback. + outfile (`IO`, *optional*): + Optional file-like object to write to. If provided, `lpath` is ignored. + + """ + revision = kwargs.get("revision") + unhandled_kwargs = set(kwargs.keys()) - {"revision"} + if not isinstance(callback, (NoOpCallback, TqdmCallback)) or len(unhandled_kwargs) > 0: + # for now, let's not handle custom callbacks + # and let's not handle custom kwargs + return super().get_file(rpath, lpath, callback=callback, outfile=outfile, **kwargs) + + # Taken from https://github.com/fsspec/filesystem_spec/blob/47b445ae4c284a82dd15e0287b1ffc410e8fc470/fsspec/spec.py#L883 + if isfilelike(lpath): + outfile = lpath + elif self.isdir(rpath): + os.makedirs(lpath, exist_ok=True) + return None + + if isinstance(lpath, (str, Path)): # otherwise, let's assume it's a file-like object + os.makedirs(os.path.dirname(lpath), exist_ok=True) + + # Open file if not already open + close_file = False + if outfile is None: + outfile = open(lpath, "wb") + close_file = True + initial_pos = outfile.tell() + + # Custom implementation of `get_file` to use `http_get`. + resolve_remote_path = self.resolve_path(rpath, revision=revision) + expected_size = self.info(rpath, revision=revision)["size"] + callback.set_size(expected_size) + try: + http_get( + url=hf_hub_url( + repo_id=resolve_remote_path.repo_id, + revision=resolve_remote_path.revision, + filename=resolve_remote_path.path_in_repo, + repo_type=resolve_remote_path.repo_type, + endpoint=self.endpoint, + ), + temp_file=outfile, # type: ignore[arg-type] + displayed_filename=rpath, + expected_size=expected_size, + resume_size=0, + headers=self._api._build_hf_headers(), + _tqdm_bar=callback.tqdm if isinstance(callback, TqdmCallback) else None, + ) + outfile.seek(initial_pos) + finally: + # Close file only if we opened it ourselves + if close_file: + outfile.close() + + @property + def transaction(self): + """A context within which files are committed together upon exit + + Requires the file class to implement `.commit()` and `.discard()` + for the normal and exception cases. + """ + # Taken from https://github.com/fsspec/filesystem_spec/blob/3fbb6fee33b46cccb015607630843dea049d3243/fsspec/spec.py#L231 + # See https://github.com/huggingface/huggingface_hub/issues/1733 + raise NotImplementedError("Transactional commits are not supported.") + + def start_transaction(self): + """Begin write transaction for deferring files, non-context version""" + # Taken from https://github.com/fsspec/filesystem_spec/blob/3fbb6fee33b46cccb015607630843dea049d3243/fsspec/spec.py#L241 + # See https://github.com/huggingface/huggingface_hub/issues/1733 + raise NotImplementedError("Transactional commits are not supported.") + + def __reduce__(self): + # re-populate the instance cache at HfFileSystem._cache and re-populate the cache attributes of every instance + return make_instance, ( + type(self), + self.storage_args, + self.storage_options, + { + "dircache": self.dircache, + "_repo_and_revision_exists_cache": self._repo_and_revision_exists_cache, + }, + ) + + +class HfFileSystemFile(fsspec.spec.AbstractBufferedFile): + def __init__(self, fs: HfFileSystem, path: str, revision: Optional[str] = None, **kwargs): + try: + self.resolved_path = fs.resolve_path(path, revision=revision) + except FileNotFoundError as e: + if "w" in kwargs.get("mode", ""): + raise FileNotFoundError( + f"{e}.\nMake sure the repository and revision exist before writing data." + ) from e + raise + super().__init__(fs, self.resolved_path.unresolve(), **kwargs) + self.fs: HfFileSystem + + def __del__(self): + if not hasattr(self, "resolved_path"): + # Means that the constructor failed. Nothing to do. + return + return super().__del__() + + def _fetch_range(self, start: int, end: int) -> bytes: + headers = { + "range": f"bytes={start}-{end - 1}", + **self.fs._api._build_hf_headers(), + } + url = hf_hub_url( + repo_id=self.resolved_path.repo_id, + revision=self.resolved_path.revision, + filename=self.resolved_path.path_in_repo, + repo_type=self.resolved_path.repo_type, + endpoint=self.fs.endpoint, + ) + r = http_backoff("GET", url, headers=headers, timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT) + hf_raise_for_status(r) + return r.content + + def _initiate_upload(self) -> None: + self.temp_file = tempfile.NamedTemporaryFile(prefix="hffs-", delete=False) + + def _upload_chunk(self, final: bool = False) -> None: + self.buffer.seek(0) + block = self.buffer.read() + self.temp_file.write(block) + if final: + self.temp_file.close() + self.fs._api.upload_file( + path_or_fileobj=self.temp_file.name, + path_in_repo=self.resolved_path.path_in_repo, + repo_id=self.resolved_path.repo_id, + token=self.fs.token, + repo_type=self.resolved_path.repo_type, + revision=self.resolved_path.revision, + commit_message=self.kwargs.get("commit_message"), + commit_description=self.kwargs.get("commit_description"), + ) + os.remove(self.temp_file.name) + self.fs.invalidate_cache( + path=self.resolved_path.unresolve(), + ) + + def read(self, length=-1): + """Read remote file. + + If `length` is not provided or is -1, the entire file is downloaded and read. On POSIX systems and if + `hf_transfer` is not enabled, the file is loaded in memory directly. Otherwise, the file is downloaded to a + temporary file and read from there. + """ + if self.mode == "rb" and (length is None or length == -1) and self.loc == 0: + with self.fs.open(self.path, "rb", block_size=0) as f: # block_size=0 enables fast streaming + out = f.read() + self.loc += len(out) + return out + return super().read(length) + + def url(self) -> str: + return self.fs.url(self.path) + + +class HfFileSystemStreamFile(fsspec.spec.AbstractBufferedFile): + def __init__( + self, + fs: HfFileSystem, + path: str, + mode: str = "rb", + revision: Optional[str] = None, + block_size: int = 0, + cache_type: str = "none", + **kwargs, + ): + if block_size != 0: + raise ValueError(f"HfFileSystemStreamFile only supports block_size=0 but got {block_size}") + if cache_type != "none": + raise ValueError(f"HfFileSystemStreamFile only supports cache_type='none' but got {cache_type}") + if "w" in mode: + raise ValueError(f"HfFileSystemStreamFile only supports reading but got mode='{mode}'") + try: + self.resolved_path = fs.resolve_path(path, revision=revision) + except FileNotFoundError as e: + if "w" in kwargs.get("mode", ""): + raise FileNotFoundError( + f"{e}.\nMake sure the repository and revision exist before writing data." + ) from e + # avoid an unnecessary .info() call to instantiate .details + self.details = {"name": self.resolved_path.unresolve(), "size": None} + super().__init__( + fs, self.resolved_path.unresolve(), mode=mode, block_size=block_size, cache_type=cache_type, **kwargs + ) + self.response: Optional[Response] = None + self.fs: HfFileSystem + + def seek(self, loc: int, whence: int = 0): + if loc == 0 and whence == 1: + return + if loc == self.loc and whence == 0: + return + raise ValueError("Cannot seek streaming HF file") + + def read(self, length: int = -1): + read_args = (length,) if length >= 0 else () + if self.response is None: + url = hf_hub_url( + repo_id=self.resolved_path.repo_id, + revision=self.resolved_path.revision, + filename=self.resolved_path.path_in_repo, + repo_type=self.resolved_path.repo_type, + endpoint=self.fs.endpoint, + ) + self.response = http_backoff( + "GET", + url, + headers=self.fs._api._build_hf_headers(), + stream=True, + timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT, + ) + hf_raise_for_status(self.response) + try: + self.response.raw.decode_content = True + out = self.response.raw.read(*read_args) + except Exception: + self.response.close() + + # Retry by recreating the connection + url = hf_hub_url( + repo_id=self.resolved_path.repo_id, + revision=self.resolved_path.revision, + filename=self.resolved_path.path_in_repo, + repo_type=self.resolved_path.repo_type, + endpoint=self.fs.endpoint, + ) + self.response = http_backoff( + "GET", + url, + headers={"Range": "bytes=%d-" % self.loc, **self.fs._api._build_hf_headers()}, + stream=True, + timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT, + ) + hf_raise_for_status(self.response) + try: + self.response.raw.decode_content = True + out = self.response.raw.read(*read_args) + except Exception: + self.response.close() + raise + self.loc += len(out) + return out + + def url(self) -> str: + return self.fs.url(self.path) + + def __del__(self): + if not hasattr(self, "resolved_path"): + # Means that the constructor failed. Nothing to do. + return + return super().__del__() + + def __reduce__(self): + return reopen, (self.fs, self.path, self.mode, self.blocksize, self.cache.name) + + +def safe_revision(revision: str) -> str: + return revision if SPECIAL_REFS_REVISION_REGEX.match(revision) else safe_quote(revision) + + +def safe_quote(s: str) -> str: + return quote(s, safe="") + + +def _raise_file_not_found(path: str, err: Optional[Exception]) -> NoReturn: + msg = path + if isinstance(err, RepositoryNotFoundError): + msg = f"{path} (repository not found)" + elif isinstance(err, RevisionNotFoundError): + msg = f"{path} (revision not found)" + elif isinstance(err, HFValidationError): + msg = f"{path} (invalid repository id)" + raise FileNotFoundError(msg) from err + + +def reopen(fs: HfFileSystem, path: str, mode: str, block_size: int, cache_type: str): + return fs.open(path, mode=mode, block_size=block_size, cache_type=cache_type) + + +def make_instance(cls, args, kwargs, instance_cache_attributes_dict): + fs = cls(*args, **kwargs) + for attr, cached_value in instance_cache_attributes_dict.items(): + setattr(fs, attr, cached_value) + return fs diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/hub_mixin.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/hub_mixin.py new file mode 100644 index 0000000000000000000000000000000000000000..9fa702ceda97318a817cb1a325223e26a78e2710 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/hub_mixin.py @@ -0,0 +1,853 @@ +import inspect +import json +import os +from dataclasses import Field, asdict, dataclass, is_dataclass +from pathlib import Path +from typing import Any, Callable, ClassVar, Dict, List, Optional, Protocol, Tuple, Type, TypeVar, Union + +import packaging.version + +from . import constants +from .errors import EntryNotFoundError, HfHubHTTPError +from .file_download import hf_hub_download +from .hf_api import HfApi +from .repocard import ModelCard, ModelCardData +from .utils import ( + SoftTemporaryDirectory, + is_jsonable, + is_safetensors_available, + is_simple_optional_type, + is_torch_available, + logging, + unwrap_simple_optional_type, + validate_hf_hub_args, +) + + +if is_torch_available(): + import torch # type: ignore + +if is_safetensors_available(): + import safetensors + from safetensors.torch import load_model as load_model_as_safetensor + from safetensors.torch import save_model as save_model_as_safetensor + + +logger = logging.get_logger(__name__) + + +# Type alias for dataclass instances, copied from https://github.com/python/typeshed/blob/9f28171658b9ca6c32a7cb93fbb99fc92b17858b/stdlib/_typeshed/__init__.pyi#L349 +class DataclassInstance(Protocol): + __dataclass_fields__: ClassVar[Dict[str, Field]] + + +# Generic variable that is either ModelHubMixin or a subclass thereof +T = TypeVar("T", bound="ModelHubMixin") +# Generic variable to represent an args type +ARGS_T = TypeVar("ARGS_T") +ENCODER_T = Callable[[ARGS_T], Any] +DECODER_T = Callable[[Any], ARGS_T] +CODER_T = Tuple[ENCODER_T, DECODER_T] + + +DEFAULT_MODEL_CARD = """ +--- +# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 +# Doc / guide: https://huggingface.co/docs/hub/model-cards +{{ card_data }} +--- + +This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration: +- Code: {{ repo_url | default("[More Information Needed]", true) }} +- Paper: {{ paper_url | default("[More Information Needed]", true) }} +- Docs: {{ docs_url | default("[More Information Needed]", true) }} +""" + + +@dataclass +class MixinInfo: + model_card_template: str + model_card_data: ModelCardData + docs_url: Optional[str] = None + paper_url: Optional[str] = None + repo_url: Optional[str] = None + + +class ModelHubMixin: + """ + A generic mixin to integrate ANY machine learning framework with the Hub. + + To integrate your framework, your model class must inherit from this class. Custom logic for saving/loading models + have to be overwritten in [`_from_pretrained`] and [`_save_pretrained`]. [`PyTorchModelHubMixin`] is a good example + of mixin integration with the Hub. Check out our [integration guide](../guides/integrations) for more instructions. + + When inheriting from [`ModelHubMixin`], you can define class-level attributes. These attributes are not passed to + `__init__` but to the class definition itself. This is useful to define metadata about the library integrating + [`ModelHubMixin`]. + + For more details on how to integrate the mixin with your library, checkout the [integration guide](../guides/integrations). + + Args: + repo_url (`str`, *optional*): + URL of the library repository. Used to generate model card. + paper_url (`str`, *optional*): + URL of the library paper. Used to generate model card. + docs_url (`str`, *optional*): + URL of the library documentation. Used to generate model card. + model_card_template (`str`, *optional*): + Template of the model card. Used to generate model card. Defaults to a generic template. + language (`str` or `List[str]`, *optional*): + Language supported by the library. Used to generate model card. + library_name (`str`, *optional*): + Name of the library integrating ModelHubMixin. Used to generate model card. + license (`str`, *optional*): + License of the library integrating ModelHubMixin. Used to generate model card. + E.g: "apache-2.0" + license_name (`str`, *optional*): + Name of the library integrating ModelHubMixin. Used to generate model card. + Only used if `license` is set to `other`. + E.g: "coqui-public-model-license". + license_link (`str`, *optional*): + URL to the license of the library integrating ModelHubMixin. Used to generate model card. + Only used if `license` is set to `other` and `license_name` is set. + E.g: "https://coqui.ai/cpml". + pipeline_tag (`str`, *optional*): + Tag of the pipeline. Used to generate model card. E.g. "text-classification". + tags (`List[str]`, *optional*): + Tags to be added to the model card. Used to generate model card. E.g. ["computer-vision"] + coders (`Dict[Type, Tuple[Callable, Callable]]`, *optional*): + Dictionary of custom types and their encoders/decoders. Used to encode/decode arguments that are not + jsonable by default. E.g dataclasses, argparse.Namespace, OmegaConf, etc. + + Example: + + ```python + >>> from huggingface_hub import ModelHubMixin + + # Inherit from ModelHubMixin + >>> class MyCustomModel( + ... ModelHubMixin, + ... library_name="my-library", + ... tags=["computer-vision"], + ... repo_url="https://github.com/huggingface/my-cool-library", + ... paper_url="https://arxiv.org/abs/2304.12244", + ... docs_url="https://huggingface.co/docs/my-cool-library", + ... # ^ optional metadata to generate model card + ... ): + ... def __init__(self, size: int = 512, device: str = "cpu"): + ... # define how to initialize your model + ... super().__init__() + ... ... + ... + ... def _save_pretrained(self, save_directory: Path) -> None: + ... # define how to serialize your model + ... ... + ... + ... @classmethod + ... def from_pretrained( + ... cls: Type[T], + ... pretrained_model_name_or_path: Union[str, Path], + ... *, + ... force_download: bool = False, + ... resume_download: Optional[bool] = None, + ... proxies: Optional[Dict] = None, + ... token: Optional[Union[str, bool]] = None, + ... cache_dir: Optional[Union[str, Path]] = None, + ... local_files_only: bool = False, + ... revision: Optional[str] = None, + ... **model_kwargs, + ... ) -> T: + ... # define how to deserialize your model + ... ... + + >>> model = MyCustomModel(size=256, device="gpu") + + # Save model weights to local directory + >>> model.save_pretrained("my-awesome-model") + + # Push model weights to the Hub + >>> model.push_to_hub("my-awesome-model") + + # Download and initialize weights from the Hub + >>> reloaded_model = MyCustomModel.from_pretrained("username/my-awesome-model") + >>> reloaded_model.size + 256 + + # Model card has been correctly populated + >>> from huggingface_hub import ModelCard + >>> card = ModelCard.load("username/my-awesome-model") + >>> card.data.tags + ["x-custom-tag", "pytorch_model_hub_mixin", "model_hub_mixin"] + >>> card.data.library_name + "my-library" + ``` + """ + + _hub_mixin_config: Optional[Union[dict, DataclassInstance]] = None + # ^ optional config attribute automatically set in `from_pretrained` + _hub_mixin_info: MixinInfo + # ^ information about the library integrating ModelHubMixin (used to generate model card) + _hub_mixin_inject_config: bool # whether `_from_pretrained` expects `config` or not + _hub_mixin_init_parameters: Dict[str, inspect.Parameter] # __init__ parameters + _hub_mixin_jsonable_default_values: Dict[str, Any] # default values for __init__ parameters + _hub_mixin_jsonable_custom_types: Tuple[Type, ...] # custom types that can be encoded/decoded + _hub_mixin_coders: Dict[Type, CODER_T] # encoders/decoders for custom types + # ^ internal values to handle config + + def __init_subclass__( + cls, + *, + # Generic info for model card + repo_url: Optional[str] = None, + paper_url: Optional[str] = None, + docs_url: Optional[str] = None, + # Model card template + model_card_template: str = DEFAULT_MODEL_CARD, + # Model card metadata + language: Optional[List[str]] = None, + library_name: Optional[str] = None, + license: Optional[str] = None, + license_name: Optional[str] = None, + license_link: Optional[str] = None, + pipeline_tag: Optional[str] = None, + tags: Optional[List[str]] = None, + # How to encode/decode arguments with custom type into a JSON config? + coders: Optional[ + Dict[Type, CODER_T] + # Key is a type. + # Value is a tuple (encoder, decoder). + # Example: {MyCustomType: (lambda x: x.value, lambda data: MyCustomType(data))} + ] = None, + ) -> None: + """Inspect __init__ signature only once when subclassing + handle modelcard.""" + super().__init_subclass__() + + # Will be reused when creating modelcard + tags = tags or [] + tags.append("model_hub_mixin") + + # Initialize MixinInfo if not existent + info = MixinInfo(model_card_template=model_card_template, model_card_data=ModelCardData()) + + # If parent class has a MixinInfo, inherit from it as a copy + if hasattr(cls, "_hub_mixin_info"): + # Inherit model card template from parent class if not explicitly set + if model_card_template == DEFAULT_MODEL_CARD: + info.model_card_template = cls._hub_mixin_info.model_card_template + + # Inherit from parent model card data + info.model_card_data = ModelCardData(**cls._hub_mixin_info.model_card_data.to_dict()) + + # Inherit other info + info.docs_url = cls._hub_mixin_info.docs_url + info.paper_url = cls._hub_mixin_info.paper_url + info.repo_url = cls._hub_mixin_info.repo_url + cls._hub_mixin_info = info + + # Update MixinInfo with metadata + if model_card_template is not None and model_card_template != DEFAULT_MODEL_CARD: + info.model_card_template = model_card_template + if repo_url is not None: + info.repo_url = repo_url + if paper_url is not None: + info.paper_url = paper_url + if docs_url is not None: + info.docs_url = docs_url + if language is not None: + info.model_card_data.language = language + if library_name is not None: + info.model_card_data.library_name = library_name + if license is not None: + info.model_card_data.license = license + if license_name is not None: + info.model_card_data.license_name = license_name + if license_link is not None: + info.model_card_data.license_link = license_link + if pipeline_tag is not None: + info.model_card_data.pipeline_tag = pipeline_tag + if tags is not None: + normalized_tags = list(tags) + if info.model_card_data.tags is not None: + info.model_card_data.tags.extend(normalized_tags) + else: + info.model_card_data.tags = normalized_tags + + if info.model_card_data.tags is not None: + info.model_card_data.tags = sorted(set(info.model_card_data.tags)) + + # Handle encoders/decoders for args + cls._hub_mixin_coders = coders or {} + cls._hub_mixin_jsonable_custom_types = tuple(cls._hub_mixin_coders.keys()) + + # Inspect __init__ signature to handle config + cls._hub_mixin_init_parameters = dict(inspect.signature(cls.__init__).parameters) + cls._hub_mixin_jsonable_default_values = { + param.name: cls._encode_arg(param.default) + for param in cls._hub_mixin_init_parameters.values() + if param.default is not inspect.Parameter.empty and cls._is_jsonable(param.default) + } + cls._hub_mixin_inject_config = "config" in inspect.signature(cls._from_pretrained).parameters + + def __new__(cls: Type[T], *args, **kwargs) -> T: + """Create a new instance of the class and handle config. + + 3 cases: + - If `self._hub_mixin_config` is already set, do nothing. + - If `config` is passed as a dataclass, set it as `self._hub_mixin_config`. + - Otherwise, build `self._hub_mixin_config` from default values and passed values. + """ + instance = super().__new__(cls) + + # If `config` is already set, return early + if instance._hub_mixin_config is not None: + return instance + + # Infer passed values + passed_values = { + **{ + key: value + for key, value in zip( + # [1:] to skip `self` parameter + list(cls._hub_mixin_init_parameters)[1:], + args, + ) + }, + **kwargs, + } + + # If config passed as dataclass => set it and return early + if is_dataclass(passed_values.get("config")): + instance._hub_mixin_config = passed_values["config"] + return instance + + # Otherwise, build config from default + passed values + init_config = { + # default values + **cls._hub_mixin_jsonable_default_values, + # passed values + **{ + key: cls._encode_arg(value) # Encode custom types as jsonable value + for key, value in passed_values.items() + if instance._is_jsonable(value) # Only if jsonable or we have a custom encoder + }, + } + passed_config = init_config.pop("config", {}) + + # Populate `init_config` with provided config + if isinstance(passed_config, dict): + init_config.update(passed_config) + + # Set `config` attribute and return + if init_config != {}: + instance._hub_mixin_config = init_config + return instance + + @classmethod + def _is_jsonable(cls, value: Any) -> bool: + """Check if a value is JSON serializable.""" + if is_dataclass(value): + return True + if isinstance(value, cls._hub_mixin_jsonable_custom_types): + return True + return is_jsonable(value) + + @classmethod + def _encode_arg(cls, arg: Any) -> Any: + """Encode an argument into a JSON serializable format.""" + if is_dataclass(arg): + return asdict(arg) # type: ignore[arg-type] + for type_, (encoder, _) in cls._hub_mixin_coders.items(): + if isinstance(arg, type_): + if arg is None: + return None + return encoder(arg) + return arg + + @classmethod + def _decode_arg(cls, expected_type: Type[ARGS_T], value: Any) -> Optional[ARGS_T]: + """Decode a JSON serializable value into an argument.""" + if is_simple_optional_type(expected_type): + if value is None: + return None + expected_type = unwrap_simple_optional_type(expected_type) + # Dataclass => handle it + if is_dataclass(expected_type): + return _load_dataclass(expected_type, value) # type: ignore[return-value] + # Otherwise => check custom decoders + for type_, (_, decoder) in cls._hub_mixin_coders.items(): + if inspect.isclass(expected_type) and issubclass(expected_type, type_): + return decoder(value) + # Otherwise => don't decode + return value + + def save_pretrained( + self, + save_directory: Union[str, Path], + *, + config: Optional[Union[dict, DataclassInstance]] = None, + repo_id: Optional[str] = None, + push_to_hub: bool = False, + model_card_kwargs: Optional[Dict[str, Any]] = None, + **push_to_hub_kwargs, + ) -> Optional[str]: + """ + Save weights in local directory. + + Args: + save_directory (`str` or `Path`): + Path to directory in which the model weights and configuration will be saved. + config (`dict` or `DataclassInstance`, *optional*): + Model configuration specified as a key/value dictionary or a dataclass instance. + push_to_hub (`bool`, *optional*, defaults to `False`): + Whether or not to push your model to the Huggingface Hub after saving it. + repo_id (`str`, *optional*): + ID of your repository on the Hub. Used only if `push_to_hub=True`. Will default to the folder name if + not provided. + model_card_kwargs (`Dict[str, Any]`, *optional*): + Additional arguments passed to the model card template to customize the model card. + push_to_hub_kwargs: + Additional key word arguments passed along to the [`~ModelHubMixin.push_to_hub`] method. + Returns: + `str` or `None`: url of the commit on the Hub if `push_to_hub=True`, `None` otherwise. + """ + save_directory = Path(save_directory) + save_directory.mkdir(parents=True, exist_ok=True) + + # Remove config.json if already exists. After `_save_pretrained` we don't want to overwrite config.json + # as it might have been saved by the custom `_save_pretrained` already. However we do want to overwrite + # an existing config.json if it was not saved by `_save_pretrained`. + config_path = save_directory / constants.CONFIG_NAME + config_path.unlink(missing_ok=True) + + # save model weights/files (framework-specific) + self._save_pretrained(save_directory) + + # save config (if provided and if not serialized yet in `_save_pretrained`) + if config is None: + config = self._hub_mixin_config + if config is not None: + if is_dataclass(config): + config = asdict(config) # type: ignore[arg-type] + if not config_path.exists(): + config_str = json.dumps(config, sort_keys=True, indent=2) + config_path.write_text(config_str) + + # save model card + model_card_path = save_directory / "README.md" + model_card_kwargs = model_card_kwargs if model_card_kwargs is not None else {} + if not model_card_path.exists(): # do not overwrite if already exists + self.generate_model_card(**model_card_kwargs).save(save_directory / "README.md") + + # push to the Hub if required + if push_to_hub: + kwargs = push_to_hub_kwargs.copy() # soft-copy to avoid mutating input + if config is not None: # kwarg for `push_to_hub` + kwargs["config"] = config + if repo_id is None: + repo_id = save_directory.name # Defaults to `save_directory` name + return self.push_to_hub(repo_id=repo_id, model_card_kwargs=model_card_kwargs, **kwargs) + return None + + def _save_pretrained(self, save_directory: Path) -> None: + """ + Overwrite this method in subclass to define how to save your model. + Check out our [integration guide](../guides/integrations) for instructions. + + Args: + save_directory (`str` or `Path`): + Path to directory in which the model weights and configuration will be saved. + """ + raise NotImplementedError + + @classmethod + @validate_hf_hub_args + def from_pretrained( + cls: Type[T], + pretrained_model_name_or_path: Union[str, Path], + *, + force_download: bool = False, + resume_download: Optional[bool] = None, + proxies: Optional[Dict] = None, + token: Optional[Union[str, bool]] = None, + cache_dir: Optional[Union[str, Path]] = None, + local_files_only: bool = False, + revision: Optional[str] = None, + **model_kwargs, + ) -> T: + """ + Download a model from the Huggingface Hub and instantiate it. + + Args: + pretrained_model_name_or_path (`str`, `Path`): + - Either the `model_id` (string) of a model hosted on the Hub, e.g. `bigscience/bloom`. + - Or a path to a `directory` containing model weights saved using + [`~transformers.PreTrainedModel.save_pretrained`], e.g., `../path/to/my_model_directory/`. + revision (`str`, *optional*): + Revision of the model on the Hub. Can be a branch name, a git tag or any commit id. + Defaults to the latest commit on `main` branch. + force_download (`bool`, *optional*, defaults to `False`): + Whether to force (re-)downloading the model weights and configuration files from the Hub, overriding + the existing cache. + proxies (`Dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on every request. + token (`str` or `bool`, *optional*): + The token to use as HTTP bearer authorization for remote files. By default, it will use the token + cached when running `hf auth login`. + cache_dir (`str`, `Path`, *optional*): + Path to the folder where cached files are stored. + local_files_only (`bool`, *optional*, defaults to `False`): + If `True`, avoid downloading the file and return the path to the local cached file if it exists. + model_kwargs (`Dict`, *optional*): + Additional kwargs to pass to the model during initialization. + """ + model_id = str(pretrained_model_name_or_path) + config_file: Optional[str] = None + if os.path.isdir(model_id): + if constants.CONFIG_NAME in os.listdir(model_id): + config_file = os.path.join(model_id, constants.CONFIG_NAME) + else: + logger.warning(f"{constants.CONFIG_NAME} not found in {Path(model_id).resolve()}") + else: + try: + config_file = hf_hub_download( + repo_id=model_id, + filename=constants.CONFIG_NAME, + revision=revision, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + resume_download=resume_download, + token=token, + local_files_only=local_files_only, + ) + except HfHubHTTPError as e: + logger.info(f"{constants.CONFIG_NAME} not found on the HuggingFace Hub: {str(e)}") + + # Read config + config = None + if config_file is not None: + with open(config_file, "r", encoding="utf-8") as f: + config = json.load(f) + + # Decode custom types in config + for key, value in config.items(): + if key in cls._hub_mixin_init_parameters: + expected_type = cls._hub_mixin_init_parameters[key].annotation + if expected_type is not inspect.Parameter.empty: + config[key] = cls._decode_arg(expected_type, value) + + # Populate model_kwargs from config + for param in cls._hub_mixin_init_parameters.values(): + if param.name not in model_kwargs and param.name in config: + model_kwargs[param.name] = config[param.name] + + # Check if `config` argument was passed at init + if "config" in cls._hub_mixin_init_parameters and "config" not in model_kwargs: + # Decode `config` argument if it was passed + config_annotation = cls._hub_mixin_init_parameters["config"].annotation + config = cls._decode_arg(config_annotation, config) + + # Forward config to model initialization + model_kwargs["config"] = config + + # Inject config if `**kwargs` are expected + if is_dataclass(cls): + for key in cls.__dataclass_fields__: + if key not in model_kwargs and key in config: + model_kwargs[key] = config[key] + elif any(param.kind == inspect.Parameter.VAR_KEYWORD for param in cls._hub_mixin_init_parameters.values()): + for key, value in config.items(): + if key not in model_kwargs: + model_kwargs[key] = value + + # Finally, also inject if `_from_pretrained` expects it + if cls._hub_mixin_inject_config and "config" not in model_kwargs: + model_kwargs["config"] = config + + instance = cls._from_pretrained( + model_id=str(model_id), + revision=revision, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + resume_download=resume_download, + local_files_only=local_files_only, + token=token, + **model_kwargs, + ) + + # Implicitly set the config as instance attribute if not already set by the class + # This way `config` will be available when calling `save_pretrained` or `push_to_hub`. + if config is not None and (getattr(instance, "_hub_mixin_config", None) in (None, {})): + instance._hub_mixin_config = config + + return instance + + @classmethod + def _from_pretrained( + cls: Type[T], + *, + model_id: str, + revision: Optional[str], + cache_dir: Optional[Union[str, Path]], + force_download: bool, + proxies: Optional[Dict], + resume_download: Optional[bool], + local_files_only: bool, + token: Optional[Union[str, bool]], + **model_kwargs, + ) -> T: + """Overwrite this method in subclass to define how to load your model from pretrained. + + Use [`hf_hub_download`] or [`snapshot_download`] to download files from the Hub before loading them. Most + args taken as input can be directly passed to those 2 methods. If needed, you can add more arguments to this + method using "model_kwargs". For example [`PyTorchModelHubMixin._from_pretrained`] takes as input a `map_location` + parameter to set on which device the model should be loaded. + + Check out our [integration guide](../guides/integrations) for more instructions. + + Args: + model_id (`str`): + ID of the model to load from the Huggingface Hub (e.g. `bigscience/bloom`). + revision (`str`, *optional*): + Revision of the model on the Hub. Can be a branch name, a git tag or any commit id. Defaults to the + latest commit on `main` branch. + force_download (`bool`, *optional*, defaults to `False`): + Whether to force (re-)downloading the model weights and configuration files from the Hub, overriding + the existing cache. + proxies (`Dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint (e.g., `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`). + token (`str` or `bool`, *optional*): + The token to use as HTTP bearer authorization for remote files. By default, it will use the token + cached when running `hf auth login`. + cache_dir (`str`, `Path`, *optional*): + Path to the folder where cached files are stored. + local_files_only (`bool`, *optional*, defaults to `False`): + If `True`, avoid downloading the file and return the path to the local cached file if it exists. + model_kwargs: + Additional keyword arguments passed along to the [`~ModelHubMixin._from_pretrained`] method. + """ + raise NotImplementedError + + @validate_hf_hub_args + def push_to_hub( + self, + repo_id: str, + *, + config: Optional[Union[dict, DataclassInstance]] = None, + commit_message: str = "Push model using huggingface_hub.", + private: Optional[bool] = None, + token: Optional[str] = None, + branch: Optional[str] = None, + create_pr: Optional[bool] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + delete_patterns: Optional[Union[List[str], str]] = None, + model_card_kwargs: Optional[Dict[str, Any]] = None, + ) -> str: + """ + Upload model checkpoint to the Hub. + + Use `allow_patterns` and `ignore_patterns` to precisely filter which files should be pushed to the hub. Use + `delete_patterns` to delete existing remote files in the same commit. See [`upload_folder`] reference for more + details. + + Args: + repo_id (`str`): + ID of the repository to push to (example: `"username/my-model"`). + config (`dict` or `DataclassInstance`, *optional*): + Model configuration specified as a key/value dictionary or a dataclass instance. + commit_message (`str`, *optional*): + Message to commit while pushing. + private (`bool`, *optional*): + Whether the repository created should be private. + If `None` (default), the repo will be public unless the organization's default is private. + token (`str`, *optional*): + The token to use as HTTP bearer authorization for remote files. By default, it will use the token + cached when running `hf auth login`. + branch (`str`, *optional*): + The git branch on which to push the model. This defaults to `"main"`. + create_pr (`boolean`, *optional*): + Whether or not to create a Pull Request from `branch` with that commit. Defaults to `False`. + allow_patterns (`List[str]` or `str`, *optional*): + If provided, only files matching at least one pattern are pushed. + ignore_patterns (`List[str]` or `str`, *optional*): + If provided, files matching any of the patterns are not pushed. + delete_patterns (`List[str]` or `str`, *optional*): + If provided, remote files matching any of the patterns will be deleted from the repo. + model_card_kwargs (`Dict[str, Any]`, *optional*): + Additional arguments passed to the model card template to customize the model card. + + Returns: + The url of the commit of your model in the given repository. + """ + api = HfApi(token=token) + repo_id = api.create_repo(repo_id=repo_id, private=private, exist_ok=True).repo_id + + # Push the files to the repo in a single commit + with SoftTemporaryDirectory() as tmp: + saved_path = Path(tmp) / repo_id + self.save_pretrained(saved_path, config=config, model_card_kwargs=model_card_kwargs) + return api.upload_folder( + repo_id=repo_id, + repo_type="model", + folder_path=saved_path, + commit_message=commit_message, + revision=branch, + create_pr=create_pr, + allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, + delete_patterns=delete_patterns, + ) + + def generate_model_card(self, *args, **kwargs) -> ModelCard: + card = ModelCard.from_template( + card_data=self._hub_mixin_info.model_card_data, + template_str=self._hub_mixin_info.model_card_template, + repo_url=self._hub_mixin_info.repo_url, + paper_url=self._hub_mixin_info.paper_url, + docs_url=self._hub_mixin_info.docs_url, + **kwargs, + ) + return card + + +class PyTorchModelHubMixin(ModelHubMixin): + """ + Implementation of [`ModelHubMixin`] to provide model Hub upload/download capabilities to PyTorch models. The model + is set in evaluation mode by default using `model.eval()` (dropout modules are deactivated). To train the model, + you should first set it back in training mode with `model.train()`. + + See [`ModelHubMixin`] for more details on how to use the mixin. + + Example: + + ```python + >>> import torch + >>> import torch.nn as nn + >>> from huggingface_hub import PyTorchModelHubMixin + + >>> class MyModel( + ... nn.Module, + ... PyTorchModelHubMixin, + ... library_name="keras-nlp", + ... repo_url="https://github.com/keras-team/keras-nlp", + ... paper_url="https://arxiv.org/abs/2304.12244", + ... docs_url="https://keras.io/keras_nlp/", + ... # ^ optional metadata to generate model card + ... ): + ... def __init__(self, hidden_size: int = 512, vocab_size: int = 30000, output_size: int = 4): + ... super().__init__() + ... self.param = nn.Parameter(torch.rand(hidden_size, vocab_size)) + ... self.linear = nn.Linear(output_size, vocab_size) + + ... def forward(self, x): + ... return self.linear(x + self.param) + >>> model = MyModel(hidden_size=256) + + # Save model weights to local directory + >>> model.save_pretrained("my-awesome-model") + + # Push model weights to the Hub + >>> model.push_to_hub("my-awesome-model") + + # Download and initialize weights from the Hub + >>> model = MyModel.from_pretrained("username/my-awesome-model") + >>> model.hidden_size + 256 + ``` + """ + + def __init_subclass__(cls, *args, tags: Optional[List[str]] = None, **kwargs) -> None: + tags = tags or [] + tags.append("pytorch_model_hub_mixin") + kwargs["tags"] = tags + return super().__init_subclass__(*args, **kwargs) + + def _save_pretrained(self, save_directory: Path) -> None: + """Save weights from a Pytorch model to a local directory.""" + model_to_save = self.module if hasattr(self, "module") else self # type: ignore + save_model_as_safetensor(model_to_save, str(save_directory / constants.SAFETENSORS_SINGLE_FILE)) # type: ignore [arg-type] + + @classmethod + def _from_pretrained( + cls, + *, + model_id: str, + revision: Optional[str], + cache_dir: Optional[Union[str, Path]], + force_download: bool, + proxies: Optional[Dict], + resume_download: Optional[bool], + local_files_only: bool, + token: Union[str, bool, None], + map_location: str = "cpu", + strict: bool = False, + **model_kwargs, + ): + """Load Pytorch pretrained weights and return the loaded model.""" + model = cls(**model_kwargs) + if os.path.isdir(model_id): + print("Loading weights from local directory") + model_file = os.path.join(model_id, constants.SAFETENSORS_SINGLE_FILE) + return cls._load_as_safetensor(model, model_file, map_location, strict) + else: + try: + model_file = hf_hub_download( + repo_id=model_id, + filename=constants.SAFETENSORS_SINGLE_FILE, + revision=revision, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + resume_download=resume_download, + token=token, + local_files_only=local_files_only, + ) + return cls._load_as_safetensor(model, model_file, map_location, strict) + except EntryNotFoundError: + model_file = hf_hub_download( + repo_id=model_id, + filename=constants.PYTORCH_WEIGHTS_NAME, + revision=revision, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + resume_download=resume_download, + token=token, + local_files_only=local_files_only, + ) + return cls._load_as_pickle(model, model_file, map_location, strict) + + @classmethod + def _load_as_pickle(cls, model: T, model_file: str, map_location: str, strict: bool) -> T: + state_dict = torch.load(model_file, map_location=torch.device(map_location), weights_only=True) + model.load_state_dict(state_dict, strict=strict) # type: ignore + model.eval() # type: ignore + return model + + @classmethod + def _load_as_safetensor(cls, model: T, model_file: str, map_location: str, strict: bool) -> T: + if packaging.version.parse(safetensors.__version__) < packaging.version.parse("0.4.3"): # type: ignore [attr-defined] + load_model_as_safetensor(model, model_file, strict=strict) # type: ignore [arg-type] + if map_location != "cpu": + logger.warning( + "Loading model weights on other devices than 'cpu' is not supported natively in your version of safetensors." + " This means that the model is loaded on 'cpu' first and then copied to the device." + " This leads to a slower loading time." + " Please update safetensors to version 0.4.3 or above for improved performance." + ) + model.to(map_location) # type: ignore [attr-defined] + else: + safetensors.torch.load_model(model, model_file, strict=strict, device=map_location) # type: ignore [arg-type] + return model + + +def _load_dataclass(datacls: Type[DataclassInstance], data: dict) -> DataclassInstance: + """Load a dataclass instance from a dictionary. + + Fields not expected by the dataclass are ignored. + """ + return datacls(**{k: v for k, v in data.items() if k in datacls.__dataclass_fields__}) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/inference_api.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/inference_api.py new file mode 100644 index 0000000000000000000000000000000000000000..f895fcc61c3867838b013ecd3f6789cbc010b5b3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/inference_api.py @@ -0,0 +1,217 @@ +import io +from typing import Any, Dict, List, Optional, Union + +from . import constants +from .hf_api import HfApi +from .utils import build_hf_headers, get_session, is_pillow_available, logging, validate_hf_hub_args +from .utils._deprecation import _deprecate_method + + +logger = logging.get_logger(__name__) + + +ALL_TASKS = [ + # NLP + "text-classification", + "token-classification", + "table-question-answering", + "question-answering", + "zero-shot-classification", + "translation", + "summarization", + "conversational", + "feature-extraction", + "text-generation", + "text2text-generation", + "fill-mask", + "sentence-similarity", + # Audio + "text-to-speech", + "automatic-speech-recognition", + "audio-to-audio", + "audio-classification", + "voice-activity-detection", + # Computer vision + "image-classification", + "object-detection", + "image-segmentation", + "text-to-image", + "image-to-image", + # Others + "tabular-classification", + "tabular-regression", +] + + +class InferenceApi: + """Client to configure requests and make calls to the HuggingFace Inference API. + + Example: + + ```python + >>> from huggingface_hub.inference_api import InferenceApi + + >>> # Mask-fill example + >>> inference = InferenceApi("bert-base-uncased") + >>> inference(inputs="The goal of life is [MASK].") + [{'sequence': 'the goal of life is life.', 'score': 0.10933292657136917, 'token': 2166, 'token_str': 'life'}] + + >>> # Question Answering example + >>> inference = InferenceApi("deepset/roberta-base-squad2") + >>> inputs = { + ... "question": "What's my name?", + ... "context": "My name is Clara and I live in Berkeley.", + ... } + >>> inference(inputs) + {'score': 0.9326569437980652, 'start': 11, 'end': 16, 'answer': 'Clara'} + + >>> # Zero-shot example + >>> inference = InferenceApi("typeform/distilbert-base-uncased-mnli") + >>> inputs = "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!" + >>> params = {"candidate_labels": ["refund", "legal", "faq"]} + >>> inference(inputs, params) + {'sequence': 'Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!', 'labels': ['refund', 'faq', 'legal'], 'scores': [0.9378499388694763, 0.04914155602455139, 0.013008488342165947]} + + >>> # Overriding configured task + >>> inference = InferenceApi("bert-base-uncased", task="feature-extraction") + + >>> # Text-to-image + >>> inference = InferenceApi("stabilityai/stable-diffusion-2-1") + >>> inference("cat") + + + >>> # Return as raw response to parse the output yourself + >>> inference = InferenceApi("mio/amadeus") + >>> response = inference("hello world", raw_response=True) + >>> response.headers + {"Content-Type": "audio/flac", ...} + >>> response.content # raw bytes from server + b'(...)' + ``` + """ + + @validate_hf_hub_args + @_deprecate_method( + version="1.0", + message=( + "`InferenceApi` client is deprecated in favor of the more feature-complete `InferenceClient`. Check out" + " this guide to learn how to convert your script to use it:" + " https://huggingface.co/docs/huggingface_hub/guides/inference#legacy-inferenceapi-client." + ), + ) + def __init__( + self, + repo_id: str, + task: Optional[str] = None, + token: Optional[str] = None, + gpu: bool = False, + ): + """Inits headers and API call information. + + Args: + repo_id (``str``): + Id of repository (e.g. `user/bert-base-uncased`). + task (``str``, `optional`, defaults ``None``): + Whether to force a task instead of using task specified in the + repository. + token (`str`, `optional`): + The API token to use as HTTP bearer authorization. This is not + the authentication token. You can find the token in + https://huggingface.co/settings/token. Alternatively, you can + find both your organizations and personal API tokens using + `HfApi().whoami(token)`. + gpu (`bool`, `optional`, defaults `False`): + Whether to use GPU instead of CPU for inference(requires Startup + plan at least). + """ + self.options = {"wait_for_model": True, "use_gpu": gpu} + self.headers = build_hf_headers(token=token) + + # Configure task + model_info = HfApi(token=token).model_info(repo_id=repo_id) + if not model_info.pipeline_tag and not task: + raise ValueError( + "Task not specified in the repository. Please add it to the model card" + " using pipeline_tag" + " (https://huggingface.co/docs#how-is-a-models-type-of-inference-api-and-widget-determined)" + ) + + if task and task != model_info.pipeline_tag: + if task not in ALL_TASKS: + raise ValueError(f"Invalid task {task}. Make sure it's valid.") + + logger.warning( + "You're using a different task than the one specified in the" + " repository. Be sure to know what you're doing :)" + ) + self.task = task + else: + assert model_info.pipeline_tag is not None, "Pipeline tag cannot be None" + self.task = model_info.pipeline_tag + + self.api_url = f"{constants.INFERENCE_ENDPOINT}/pipeline/{self.task}/{repo_id}" + + def __repr__(self): + # Do not add headers to repr to avoid leaking token. + return f"InferenceAPI(api_url='{self.api_url}', task='{self.task}', options={self.options})" + + def __call__( + self, + inputs: Optional[Union[str, Dict, List[str], List[List[str]]]] = None, + params: Optional[Dict] = None, + data: Optional[bytes] = None, + raw_response: bool = False, + ) -> Any: + """Make a call to the Inference API. + + Args: + inputs (`str` or `Dict` or `List[str]` or `List[List[str]]`, *optional*): + Inputs for the prediction. + params (`Dict`, *optional*): + Additional parameters for the models. Will be sent as `parameters` in the + payload. + data (`bytes`, *optional*): + Bytes content of the request. In this case, leave `inputs` and `params` empty. + raw_response (`bool`, defaults to `False`): + If `True`, the raw `Response` object is returned. You can parse its content + as preferred. By default, the content is parsed into a more practical format + (json dictionary or PIL Image for example). + """ + # Build payload + payload: Dict[str, Any] = { + "options": self.options, + } + if inputs: + payload["inputs"] = inputs + if params: + payload["parameters"] = params + + # Make API call + response = get_session().post(self.api_url, headers=self.headers, json=payload, data=data) + + # Let the user handle the response + if raw_response: + return response + + # By default, parse the response for the user. + content_type = response.headers.get("Content-Type") or "" + if content_type.startswith("image"): + if not is_pillow_available(): + raise ImportError( + f"Task '{self.task}' returned as image but Pillow is not installed." + " Please install it (`pip install Pillow`) or pass" + " `raw_response=True` to get the raw `Response` object and parse" + " the image by yourself." + ) + + from PIL import Image + + return Image.open(io.BytesIO(response.content)) + elif content_type == "application/json": + return response.json() + else: + raise NotImplementedError( + f"{content_type} output type is not implemented yet. You can pass" + " `raw_response=True` to get the raw `Response` object and parse the" + " output by yourself." + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/keras_mixin.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/keras_mixin.py new file mode 100644 index 0000000000000000000000000000000000000000..c284947c1d3c25da421b90e902683054830788d3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/keras_mixin.py @@ -0,0 +1,497 @@ +import collections.abc as collections +import json +import os +import warnings +from functools import wraps +from pathlib import Path +from shutil import copytree +from typing import Any, Dict, List, Optional, Union + +from huggingface_hub import ModelHubMixin, snapshot_download +from huggingface_hub.utils import ( + get_tf_version, + is_graphviz_available, + is_pydot_available, + is_tf_available, + yaml_dump, +) + +from . import constants +from .hf_api import HfApi +from .utils import SoftTemporaryDirectory, logging, validate_hf_hub_args +from .utils._typing import CallableT + + +logger = logging.get_logger(__name__) + +keras = None +if is_tf_available(): + # Depending on which version of TensorFlow is installed, we need to import + # keras from the correct location. + # See https://github.com/tensorflow/tensorflow/releases/tag/v2.16.1. + # Note: saving a keras model only works with Keras<3.0. + try: + import tf_keras as keras # type: ignore + except ImportError: + import tensorflow as tf # type: ignore + + keras = tf.keras + + +def _requires_keras_2_model(fn: CallableT) -> CallableT: + # Wrapper to raise if user tries to save a Keras 3.x model + @wraps(fn) + def _inner(model, *args, **kwargs): + if not hasattr(model, "history"): # hacky way to check if model is Keras 2.x + raise NotImplementedError( + f"Cannot use '{fn.__name__}': Keras 3.x is not supported." + " Please save models manually and upload them using `upload_folder` or `hf upload`." + ) + return fn(model, *args, **kwargs) + + return _inner # type: ignore [return-value] + + +def _flatten_dict(dictionary, parent_key=""): + """Flatten a nested dictionary. + Reference: https://stackoverflow.com/a/6027615/10319735 + + Args: + dictionary (`dict`): + The nested dictionary to be flattened. + parent_key (`str`): + The parent key to be prefixed to the children keys. + Necessary for recursing over the nested dictionary. + + Returns: + The flattened dictionary. + """ + items = [] + for key, value in dictionary.items(): + new_key = f"{parent_key}.{key}" if parent_key else key + if isinstance(value, collections.MutableMapping): + items.extend( + _flatten_dict( + value, + new_key, + ).items() + ) + else: + items.append((new_key, value)) + return dict(items) + + +def _create_hyperparameter_table(model): + """Parse hyperparameter dictionary into a markdown table.""" + table = None + if model.optimizer is not None: + optimizer_params = model.optimizer.get_config() + # flatten the configuration + optimizer_params = _flatten_dict(optimizer_params) + optimizer_params["training_precision"] = keras.mixed_precision.global_policy().name + table = "| Hyperparameters | Value |\n| :-- | :-- |\n" + for key, value in optimizer_params.items(): + table += f"| {key} | {value} |\n" + return table + + +def _plot_network(model, save_directory): + keras.utils.plot_model( + model, + to_file=f"{save_directory}/model.png", + show_shapes=False, + show_dtype=False, + show_layer_names=True, + rankdir="TB", + expand_nested=False, + dpi=96, + layer_range=None, + ) + + +def _create_model_card( + model, + repo_dir: Path, + plot_model: bool = True, + metadata: Optional[dict] = None, +): + """ + Creates a model card for the repository. + + Do not overwrite an existing README.md file. + """ + readme_path = repo_dir / "README.md" + if readme_path.exists(): + return + + hyperparameters = _create_hyperparameter_table(model) + if plot_model and is_graphviz_available() and is_pydot_available(): + _plot_network(model, repo_dir) + if metadata is None: + metadata = {} + metadata["library_name"] = "keras" + model_card: str = "---\n" + model_card += yaml_dump(metadata, default_flow_style=False) + model_card += "---\n" + model_card += "\n## Model description\n\nMore information needed\n" + model_card += "\n## Intended uses & limitations\n\nMore information needed\n" + model_card += "\n## Training and evaluation data\n\nMore information needed\n" + if hyperparameters is not None: + model_card += "\n## Training procedure\n" + model_card += "\n### Training hyperparameters\n" + model_card += "\nThe following hyperparameters were used during training:\n\n" + model_card += hyperparameters + model_card += "\n" + if plot_model and os.path.exists(f"{repo_dir}/model.png"): + model_card += "\n ## Model Plot\n" + model_card += "\n
" + model_card += "\nView Model Plot\n" + path_to_plot = "./model.png" + model_card += f"\n![Model Image]({path_to_plot})\n" + model_card += "\n
" + + readme_path.write_text(model_card) + + +@_requires_keras_2_model +def save_pretrained_keras( + model, + save_directory: Union[str, Path], + config: Optional[Dict[str, Any]] = None, + include_optimizer: bool = False, + plot_model: bool = True, + tags: Optional[Union[list, str]] = None, + **model_save_kwargs, +): + """ + Saves a Keras model to save_directory in SavedModel format. Use this if + you're using the Functional or Sequential APIs. + + Args: + model (`Keras.Model`): + The [Keras + model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) + you'd like to save. The model must be compiled and built. + save_directory (`str` or `Path`): + Specify directory in which you want to save the Keras model. + config (`dict`, *optional*): + Configuration object to be saved alongside the model weights. + include_optimizer(`bool`, *optional*, defaults to `False`): + Whether or not to include optimizer in serialization. + plot_model (`bool`, *optional*, defaults to `True`): + Setting this to `True` will plot the model and put it in the model + card. Requires graphviz and pydot to be installed. + tags (Union[`str`,`list`], *optional*): + List of tags that are related to model or string of a single tag. See example tags + [here](https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1). + model_save_kwargs(`dict`, *optional*): + model_save_kwargs will be passed to + [`tf.keras.models.save_model()`](https://www.tensorflow.org/api_docs/python/tf/keras/models/save_model). + """ + if keras is None: + raise ImportError("Called a Tensorflow-specific function but could not import it.") + + if not model.built: + raise ValueError("Model should be built before trying to save") + + save_directory = Path(save_directory) + save_directory.mkdir(parents=True, exist_ok=True) + + # saving config + if config: + if not isinstance(config, dict): + raise RuntimeError(f"Provided config to save_pretrained_keras should be a dict. Got: '{type(config)}'") + + with (save_directory / constants.CONFIG_NAME).open("w") as f: + json.dump(config, f) + + metadata = {} + if isinstance(tags, list): + metadata["tags"] = tags + elif isinstance(tags, str): + metadata["tags"] = [tags] + + task_name = model_save_kwargs.pop("task_name", None) + if task_name is not None: + warnings.warn( + "`task_name` input argument is deprecated. Pass `tags` instead.", + FutureWarning, + ) + if "tags" in metadata: + metadata["tags"].append(task_name) + else: + metadata["tags"] = [task_name] + + if model.history is not None: + if model.history.history != {}: + path = save_directory / "history.json" + if path.exists(): + warnings.warn( + "`history.json` file already exists, it will be overwritten by the history of this version.", + UserWarning, + ) + with path.open("w", encoding="utf-8") as f: + json.dump(model.history.history, f, indent=2, sort_keys=True) + + _create_model_card(model, save_directory, plot_model, metadata) + keras.models.save_model(model, save_directory, include_optimizer=include_optimizer, **model_save_kwargs) + + +def from_pretrained_keras(*args, **kwargs) -> "KerasModelHubMixin": + r""" + Instantiate a pretrained Keras model from a pre-trained model from the Hub. + The model is expected to be in `SavedModel` format. + + Args: + pretrained_model_name_or_path (`str` or `os.PathLike`): + Can be either: + - A string, the `model id` of a pretrained model hosted inside a + model repo on huggingface.co. Valid model ids can be located + at the root-level, like `bert-base-uncased`, or namespaced + under a user or organization name, like + `dbmdz/bert-base-german-cased`. + - You can add `revision` by appending `@` at the end of model_id + simply like this: `dbmdz/bert-base-german-cased@main` Revision + is the specific model version to use. It can be a branch name, + a tag name, or a commit id, since we use a git-based system + for storing models and other artifacts on huggingface.co, so + `revision` can be any identifier allowed by git. + - A path to a `directory` containing model weights saved using + [`~transformers.PreTrainedModel.save_pretrained`], e.g., + `./my_model_directory/`. + - `None` if you are both providing the configuration and state + dictionary (resp. with keyword arguments `config` and + `state_dict`). + force_download (`bool`, *optional*, defaults to `False`): + Whether to force the (re-)download of the model weights and + configuration files, overriding the cached versions if they exist. + proxies (`Dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, e.g., + `{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The + proxies are used on each request. + token (`str` or `bool`, *optional*): + The token to use as HTTP bearer authorization for remote files. If + `True`, will use the token generated when running `transformers-cli + login` (stored in `~/.huggingface`). + cache_dir (`Union[str, os.PathLike]`, *optional*): + Path to a directory in which a downloaded pretrained model + configuration should be cached if the standard cache should not be + used. + local_files_only(`bool`, *optional*, defaults to `False`): + Whether to only look at local files (i.e., do not try to download + the model). + model_kwargs (`Dict`, *optional*): + model_kwargs will be passed to the model during initialization + + > [!TIP] + > Passing `token=True` is required when you want to use a private + > model. + """ + return KerasModelHubMixin.from_pretrained(*args, **kwargs) + + +@validate_hf_hub_args +@_requires_keras_2_model +def push_to_hub_keras( + model, + repo_id: str, + *, + config: Optional[dict] = None, + commit_message: str = "Push Keras model using huggingface_hub.", + private: Optional[bool] = None, + api_endpoint: Optional[str] = None, + token: Optional[str] = None, + branch: Optional[str] = None, + create_pr: Optional[bool] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + delete_patterns: Optional[Union[List[str], str]] = None, + log_dir: Optional[str] = None, + include_optimizer: bool = False, + tags: Optional[Union[list, str]] = None, + plot_model: bool = True, + **model_save_kwargs, +): + """ + Upload model checkpoint to the Hub. + + Use `allow_patterns` and `ignore_patterns` to precisely filter which files should be pushed to the hub. Use + `delete_patterns` to delete existing remote files in the same commit. See [`upload_folder`] reference for more + details. + + Args: + model (`Keras.Model`): + The [Keras model](`https://www.tensorflow.org/api_docs/python/tf/keras/Model`) you'd like to push to the + Hub. The model must be compiled and built. + repo_id (`str`): + ID of the repository to push to (example: `"username/my-model"`). + commit_message (`str`, *optional*, defaults to "Add Keras model"): + Message to commit while pushing. + private (`bool`, *optional*): + Whether the repository created should be private. + If `None` (default), the repo will be public unless the organization's default is private. + api_endpoint (`str`, *optional*): + The API endpoint to use when pushing the model to the hub. + token (`str`, *optional*): + The token to use as HTTP bearer authorization for remote files. If + not set, will use the token set when logging in with + `hf auth login` (stored in `~/.huggingface`). + branch (`str`, *optional*): + The git branch on which to push the model. This defaults to + the default branch as specified in your repository, which + defaults to `"main"`. + create_pr (`boolean`, *optional*): + Whether or not to create a Pull Request from `branch` with that commit. + Defaults to `False`. + config (`dict`, *optional*): + Configuration object to be saved alongside the model weights. + allow_patterns (`List[str]` or `str`, *optional*): + If provided, only files matching at least one pattern are pushed. + ignore_patterns (`List[str]` or `str`, *optional*): + If provided, files matching any of the patterns are not pushed. + delete_patterns (`List[str]` or `str`, *optional*): + If provided, remote files matching any of the patterns will be deleted from the repo. + log_dir (`str`, *optional*): + TensorBoard logging directory to be pushed. The Hub automatically + hosts and displays a TensorBoard instance if log files are included + in the repository. + include_optimizer (`bool`, *optional*, defaults to `False`): + Whether or not to include optimizer during serialization. + tags (Union[`list`, `str`], *optional*): + List of tags that are related to model or string of a single tag. See example tags + [here](https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1). + plot_model (`bool`, *optional*, defaults to `True`): + Setting this to `True` will plot the model and put it in the model + card. Requires graphviz and pydot to be installed. + model_save_kwargs(`dict`, *optional*): + model_save_kwargs will be passed to + [`tf.keras.models.save_model()`](https://www.tensorflow.org/api_docs/python/tf/keras/models/save_model). + + Returns: + The url of the commit of your model in the given repository. + """ + api = HfApi(endpoint=api_endpoint) + repo_id = api.create_repo(repo_id=repo_id, token=token, private=private, exist_ok=True).repo_id + + # Push the files to the repo in a single commit + with SoftTemporaryDirectory() as tmp: + saved_path = Path(tmp) / repo_id + save_pretrained_keras( + model, + saved_path, + config=config, + include_optimizer=include_optimizer, + tags=tags, + plot_model=plot_model, + **model_save_kwargs, + ) + + # If `log_dir` provided, delete remote logs and upload new ones + if log_dir is not None: + delete_patterns = ( + [] + if delete_patterns is None + else ( + [delete_patterns] # convert `delete_patterns` to a list + if isinstance(delete_patterns, str) + else delete_patterns + ) + ) + delete_patterns.append("logs/*") + copytree(log_dir, saved_path / "logs") + + return api.upload_folder( + repo_type="model", + repo_id=repo_id, + folder_path=saved_path, + commit_message=commit_message, + token=token, + revision=branch, + create_pr=create_pr, + allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, + delete_patterns=delete_patterns, + ) + + +class KerasModelHubMixin(ModelHubMixin): + """ + Implementation of [`ModelHubMixin`] to provide model Hub upload/download + capabilities to Keras models. + + + ```python + >>> import tensorflow as tf + >>> from huggingface_hub import KerasModelHubMixin + + + >>> class MyModel(tf.keras.Model, KerasModelHubMixin): + ... def __init__(self, **kwargs): + ... super().__init__() + ... self.config = kwargs.pop("config", None) + ... self.dummy_inputs = ... + ... self.layer = ... + + ... def call(self, *args): + ... return ... + + + >>> # Initialize and compile the model as you normally would + >>> model = MyModel() + >>> model.compile(...) + >>> # Build the graph by training it or passing dummy inputs + >>> _ = model(model.dummy_inputs) + >>> # Save model weights to local directory + >>> model.save_pretrained("my-awesome-model") + >>> # Push model weights to the Hub + >>> model.push_to_hub("my-awesome-model") + >>> # Download and initialize weights from the Hub + >>> model = MyModel.from_pretrained("username/super-cool-model") + ``` + """ + + def _save_pretrained(self, save_directory): + save_pretrained_keras(self, save_directory) + + @classmethod + def _from_pretrained( + cls, + model_id, + revision, + cache_dir, + force_download, + proxies, + resume_download, + local_files_only, + token, + config: Optional[Dict[str, Any]] = None, + **model_kwargs, + ): + """Here we just call [`from_pretrained_keras`] function so both the mixin and + functional APIs stay in sync. + + TODO - Some args above aren't used since we are calling + snapshot_download instead of hf_hub_download. + """ + if keras is None: + raise ImportError("Called a TensorFlow-specific function but could not import it.") + + # Root is either a local filepath matching model_id or a cached snapshot + if not os.path.isdir(model_id): + storage_folder = snapshot_download( + repo_id=model_id, + revision=revision, + cache_dir=cache_dir, + library_name="keras", + library_version=get_tf_version(), + ) + else: + storage_folder = model_id + + # TODO: change this in a future PR. We are not returning a KerasModelHubMixin instance here... + model = keras.models.load_model(storage_folder) + + # For now, we add a new attribute, config, to store the config loaded from the hub/a local dir. + model.config = config + + return model diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/lfs.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/lfs.py new file mode 100644 index 0000000000000000000000000000000000000000..40b6ad087ca6bd33874433439a2c4f5b23d100c5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/lfs.py @@ -0,0 +1,466 @@ +# coding=utf-8 +# Copyright 2019-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Git LFS related type definitions and utilities""" + +import inspect +import io +import re +import warnings +from dataclasses import dataclass +from math import ceil +from os.path import getsize +from pathlib import Path +from typing import TYPE_CHECKING, BinaryIO, Dict, Iterable, List, Optional, Tuple, TypedDict +from urllib.parse import unquote + +from huggingface_hub import constants + +from .utils import ( + build_hf_headers, + fix_hf_endpoint_in_url, + get_session, + hf_raise_for_status, + http_backoff, + logging, + tqdm, + validate_hf_hub_args, +) +from .utils._lfs import SliceFileObj +from .utils.sha import sha256, sha_fileobj +from .utils.tqdm import is_tqdm_disabled + + +if TYPE_CHECKING: + from ._commit_api import CommitOperationAdd + +logger = logging.get_logger(__name__) + +OID_REGEX = re.compile(r"^[0-9a-f]{40}$") + +LFS_MULTIPART_UPLOAD_COMMAND = "lfs-multipart-upload" + +LFS_HEADERS = { + "Accept": "application/vnd.git-lfs+json", + "Content-Type": "application/vnd.git-lfs+json", +} + + +@dataclass +class UploadInfo: + """ + Dataclass holding required information to determine whether a blob + should be uploaded to the hub using the LFS protocol or the regular protocol + + Args: + sha256 (`bytes`): + SHA256 hash of the blob + size (`int`): + Size in bytes of the blob + sample (`bytes`): + First 512 bytes of the blob + """ + + sha256: bytes + size: int + sample: bytes + + @classmethod + def from_path(cls, path: str): + size = getsize(path) + with io.open(path, "rb") as file: + sample = file.peek(512)[:512] + sha = sha_fileobj(file) + return cls(size=size, sha256=sha, sample=sample) + + @classmethod + def from_bytes(cls, data: bytes): + sha = sha256(data).digest() + return cls(size=len(data), sample=data[:512], sha256=sha) + + @classmethod + def from_fileobj(cls, fileobj: BinaryIO): + sample = fileobj.read(512) + fileobj.seek(0, io.SEEK_SET) + sha = sha_fileobj(fileobj) + size = fileobj.tell() + fileobj.seek(0, io.SEEK_SET) + return cls(size=size, sha256=sha, sample=sample) + + +@validate_hf_hub_args +def post_lfs_batch_info( + upload_infos: Iterable[UploadInfo], + token: Optional[str], + repo_type: str, + repo_id: str, + revision: Optional[str] = None, + endpoint: Optional[str] = None, + headers: Optional[Dict[str, str]] = None, + transfers: Optional[List[str]] = None, +) -> Tuple[List[dict], List[dict], Optional[str]]: + """ + Requests the LFS batch endpoint to retrieve upload instructions + + Learn more: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md + + Args: + upload_infos (`Iterable` of `UploadInfo`): + `UploadInfo` for the files that are being uploaded, typically obtained + from `CommitOperationAdd.upload_info` + repo_type (`str`): + Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`. + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + revision (`str`, *optional*): + The git revision to upload to. + headers (`dict`, *optional*): + Additional headers to include in the request + transfers (`list`, *optional*): + List of transfer methods to use. Defaults to ["basic", "multipart"]. + + Returns: + `LfsBatchInfo`: 3-tuple: + - First element is the list of upload instructions from the server + - Second element is a list of errors, if any + - Third element is the chosen transfer adapter if provided by the server (e.g. "basic", "multipart", "xet") + + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If an argument is invalid or the server response is malformed. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + If the server returned an error. + """ + endpoint = endpoint if endpoint is not None else constants.ENDPOINT + url_prefix = "" + if repo_type in constants.REPO_TYPES_URL_PREFIXES: + url_prefix = constants.REPO_TYPES_URL_PREFIXES[repo_type] + batch_url = f"{endpoint}/{url_prefix}{repo_id}.git/info/lfs/objects/batch" + payload: Dict = { + "operation": "upload", + "transfers": transfers if transfers is not None else ["basic", "multipart"], + "objects": [ + { + "oid": upload.sha256.hex(), + "size": upload.size, + } + for upload in upload_infos + ], + "hash_algo": "sha256", + } + if revision is not None: + payload["ref"] = {"name": unquote(revision)} # revision has been previously 'quoted' + + headers = { + **LFS_HEADERS, + **build_hf_headers(token=token), + **(headers or {}), + } + resp = get_session().post(batch_url, headers=headers, json=payload) + hf_raise_for_status(resp) + batch_info = resp.json() + + objects = batch_info.get("objects", None) + if not isinstance(objects, list): + raise ValueError("Malformed response from server") + + chosen_transfer = batch_info.get("transfer") + chosen_transfer = chosen_transfer if isinstance(chosen_transfer, str) else None + + return ( + [_validate_batch_actions(obj) for obj in objects if "error" not in obj], + [_validate_batch_error(obj) for obj in objects if "error" in obj], + chosen_transfer, + ) + + +class PayloadPartT(TypedDict): + partNumber: int + etag: str + + +class CompletionPayloadT(TypedDict): + """Payload that will be sent to the Hub when uploading multi-part.""" + + oid: str + parts: List[PayloadPartT] + + +def lfs_upload( + operation: "CommitOperationAdd", + lfs_batch_action: Dict, + token: Optional[str] = None, + headers: Optional[Dict[str, str]] = None, + endpoint: Optional[str] = None, +) -> None: + """ + Handles uploading a given object to the Hub with the LFS protocol. + + Can be a No-op if the content of the file is already present on the hub large file storage. + + Args: + operation (`CommitOperationAdd`): + The add operation triggering this upload. + lfs_batch_action (`dict`): + Upload instructions from the LFS batch endpoint for this object. See [`~utils.lfs.post_lfs_batch_info`] for + more details. + headers (`dict`, *optional*): + Headers to include in the request, including authentication and user agent headers. + + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If `lfs_batch_action` is improperly formatted + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + If the upload resulted in an error + """ + # 0. If LFS file is already present, skip upload + _validate_batch_actions(lfs_batch_action) + actions = lfs_batch_action.get("actions") + if actions is None: + # The file was already uploaded + logger.debug(f"Content of file {operation.path_in_repo} is already present upstream - skipping upload") + return + + # 1. Validate server response (check required keys in dict) + upload_action = lfs_batch_action["actions"]["upload"] + _validate_lfs_action(upload_action) + verify_action = lfs_batch_action["actions"].get("verify") + if verify_action is not None: + _validate_lfs_action(verify_action) + + # 2. Upload file (either single part or multi-part) + header = upload_action.get("header", {}) + chunk_size = header.get("chunk_size") + upload_url = fix_hf_endpoint_in_url(upload_action["href"], endpoint=endpoint) + if chunk_size is not None: + try: + chunk_size = int(chunk_size) + except (ValueError, TypeError): + raise ValueError( + f"Malformed response from LFS batch endpoint: `chunk_size` should be an integer. Got '{chunk_size}'." + ) + _upload_multi_part(operation=operation, header=header, chunk_size=chunk_size, upload_url=upload_url) + else: + _upload_single_part(operation=operation, upload_url=upload_url) + + # 3. Verify upload went well + if verify_action is not None: + _validate_lfs_action(verify_action) + verify_url = fix_hf_endpoint_in_url(verify_action["href"], endpoint) + verify_resp = get_session().post( + verify_url, + headers=build_hf_headers(token=token, headers=headers), + json={"oid": operation.upload_info.sha256.hex(), "size": operation.upload_info.size}, + ) + hf_raise_for_status(verify_resp) + logger.debug(f"{operation.path_in_repo}: Upload successful") + + +def _validate_lfs_action(lfs_action: dict): + """validates response from the LFS batch endpoint""" + if not ( + isinstance(lfs_action.get("href"), str) + and (lfs_action.get("header") is None or isinstance(lfs_action.get("header"), dict)) + ): + raise ValueError("lfs_action is improperly formatted") + return lfs_action + + +def _validate_batch_actions(lfs_batch_actions: dict): + """validates response from the LFS batch endpoint""" + if not (isinstance(lfs_batch_actions.get("oid"), str) and isinstance(lfs_batch_actions.get("size"), int)): + raise ValueError("lfs_batch_actions is improperly formatted") + + upload_action = lfs_batch_actions.get("actions", {}).get("upload") + verify_action = lfs_batch_actions.get("actions", {}).get("verify") + if upload_action is not None: + _validate_lfs_action(upload_action) + if verify_action is not None: + _validate_lfs_action(verify_action) + return lfs_batch_actions + + +def _validate_batch_error(lfs_batch_error: dict): + """validates response from the LFS batch endpoint""" + if not (isinstance(lfs_batch_error.get("oid"), str) and isinstance(lfs_batch_error.get("size"), int)): + raise ValueError("lfs_batch_error is improperly formatted") + error_info = lfs_batch_error.get("error") + if not ( + isinstance(error_info, dict) + and isinstance(error_info.get("message"), str) + and isinstance(error_info.get("code"), int) + ): + raise ValueError("lfs_batch_error is improperly formatted") + return lfs_batch_error + + +def _upload_single_part(operation: "CommitOperationAdd", upload_url: str) -> None: + """ + Uploads `fileobj` as a single PUT HTTP request (basic LFS transfer protocol) + + Args: + upload_url (`str`): + The URL to PUT the file to. + fileobj: + The file-like object holding the data to upload. + + Returns: `requests.Response` + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + If the upload resulted in an error. + """ + with operation.as_file(with_tqdm=True) as fileobj: + # S3 might raise a transient 500 error -> let's retry if that happens + response = http_backoff("PUT", upload_url, data=fileobj) + hf_raise_for_status(response) + + +def _upload_multi_part(operation: "CommitOperationAdd", header: Dict, chunk_size: int, upload_url: str) -> None: + """ + Uploads file using HF multipart LFS transfer protocol. + """ + # 1. Get upload URLs for each part + sorted_parts_urls = _get_sorted_parts_urls(header=header, upload_info=operation.upload_info, chunk_size=chunk_size) + + # 2. Upload parts (either with hf_transfer or in pure Python) + use_hf_transfer = constants.HF_HUB_ENABLE_HF_TRANSFER + if ( + constants.HF_HUB_ENABLE_HF_TRANSFER + and not isinstance(operation.path_or_fileobj, str) + and not isinstance(operation.path_or_fileobj, Path) + ): + warnings.warn( + "hf_transfer is enabled but does not support uploading from bytes or BinaryIO, falling back to regular" + " upload" + ) + use_hf_transfer = False + + response_headers = ( + _upload_parts_hf_transfer(operation=operation, sorted_parts_urls=sorted_parts_urls, chunk_size=chunk_size) + if use_hf_transfer + else _upload_parts_iteratively(operation=operation, sorted_parts_urls=sorted_parts_urls, chunk_size=chunk_size) + ) + + # 3. Send completion request + completion_res = get_session().post( + upload_url, + json=_get_completion_payload(response_headers, operation.upload_info.sha256.hex()), + headers=LFS_HEADERS, + ) + hf_raise_for_status(completion_res) + + +def _get_sorted_parts_urls(header: Dict, upload_info: UploadInfo, chunk_size: int) -> List[str]: + sorted_part_upload_urls = [ + upload_url + for _, upload_url in sorted( + [ + (int(part_num, 10), upload_url) + for part_num, upload_url in header.items() + if part_num.isdigit() and len(part_num) > 0 + ], + key=lambda t: t[0], + ) + ] + num_parts = len(sorted_part_upload_urls) + if num_parts != ceil(upload_info.size / chunk_size): + raise ValueError("Invalid server response to upload large LFS file") + return sorted_part_upload_urls + + +def _get_completion_payload(response_headers: List[Dict], oid: str) -> CompletionPayloadT: + parts: List[PayloadPartT] = [] + for part_number, header in enumerate(response_headers): + etag = header.get("etag") + if etag is None or etag == "": + raise ValueError(f"Invalid etag (`{etag}`) returned for part {part_number + 1}") + parts.append( + { + "partNumber": part_number + 1, + "etag": etag, + } + ) + return {"oid": oid, "parts": parts} + + +def _upload_parts_iteratively( + operation: "CommitOperationAdd", sorted_parts_urls: List[str], chunk_size: int +) -> List[Dict]: + headers = [] + with operation.as_file(with_tqdm=True) as fileobj: + for part_idx, part_upload_url in enumerate(sorted_parts_urls): + with SliceFileObj( + fileobj, + seek_from=chunk_size * part_idx, + read_limit=chunk_size, + ) as fileobj_slice: + # S3 might raise a transient 500 error -> let's retry if that happens + part_upload_res = http_backoff("PUT", part_upload_url, data=fileobj_slice) + hf_raise_for_status(part_upload_res) + headers.append(part_upload_res.headers) + return headers # type: ignore + + +def _upload_parts_hf_transfer( + operation: "CommitOperationAdd", sorted_parts_urls: List[str], chunk_size: int +) -> List[Dict]: + # Upload file using an external Rust-based package. Upload is faster but support less features (no progress bars). + try: + from hf_transfer import multipart_upload + except ImportError: + raise ValueError( + "Fast uploading using 'hf_transfer' is enabled (HF_HUB_ENABLE_HF_TRANSFER=1) but 'hf_transfer' package is" + " not available in your environment. Try `pip install hf_transfer`." + ) + + supports_callback = "callback" in inspect.signature(multipart_upload).parameters + if not supports_callback: + warnings.warn( + "You are using an outdated version of `hf_transfer`. Consider upgrading to latest version to enable progress bars using `pip install -U hf_transfer`." + ) + + total = operation.upload_info.size + desc = operation.path_in_repo + if len(desc) > 40: + desc = f"(…){desc[-40:]}" + + with tqdm( + unit="B", + unit_scale=True, + total=total, + initial=0, + desc=desc, + disable=is_tqdm_disabled(logger.getEffectiveLevel()), + name="huggingface_hub.lfs_upload", + ) as progress: + try: + output = multipart_upload( + file_path=operation.path_or_fileobj, + parts_urls=sorted_parts_urls, + chunk_size=chunk_size, + max_files=128, + parallel_failures=127, # could be removed + max_retries=5, + **({"callback": progress.update} if supports_callback else {}), + ) + except Exception as e: + raise RuntimeError( + "An error occurred while uploading using `hf_transfer`. Consider disabling HF_HUB_ENABLE_HF_TRANSFER for" + " better error handling." + ) from e + if not supports_callback: + progress.update(total) + return output diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/py.typed b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/repocard.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/repocard.py new file mode 100644 index 0000000000000000000000000000000000000000..357935c3f1831df2afc86a30f82f10fa8039a225 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/repocard.py @@ -0,0 +1,827 @@ +import os +import re +from pathlib import Path +from typing import Any, Dict, Literal, Optional, Type, Union + +import requests +import yaml + +from huggingface_hub.file_download import hf_hub_download +from huggingface_hub.hf_api import upload_file +from huggingface_hub.repocard_data import ( + CardData, + DatasetCardData, + EvalResult, + ModelCardData, + SpaceCardData, + eval_results_to_model_index, + model_index_to_eval_results, +) +from huggingface_hub.utils import get_session, is_jinja_available, yaml_dump + +from . import constants +from .errors import EntryNotFoundError +from .utils import SoftTemporaryDirectory, logging, validate_hf_hub_args + + +logger = logging.get_logger(__name__) + + +TEMPLATE_MODELCARD_PATH = Path(__file__).parent / "templates" / "modelcard_template.md" +TEMPLATE_DATASETCARD_PATH = Path(__file__).parent / "templates" / "datasetcard_template.md" + +# exact same regex as in the Hub server. Please keep in sync. +# See https://github.com/huggingface/moon-landing/blob/main/server/lib/ViewMarkdown.ts#L18 +REGEX_YAML_BLOCK = re.compile(r"^(\s*---[\r\n]+)([\S\s]*?)([\r\n]+---(\r\n|\n|$))") + + +class RepoCard: + card_data_class = CardData + default_template_path = TEMPLATE_MODELCARD_PATH + repo_type = "model" + + def __init__(self, content: str, ignore_metadata_errors: bool = False): + """Initialize a RepoCard from string content. The content should be a + Markdown file with a YAML block at the beginning and a Markdown body. + + Args: + content (`str`): The content of the Markdown file. + + Example: + ```python + >>> from huggingface_hub.repocard import RepoCard + >>> text = ''' + ... --- + ... language: en + ... license: mit + ... --- + ... + ... # My repo + ... ''' + >>> card = RepoCard(text) + >>> card.data.to_dict() + {'language': 'en', 'license': 'mit'} + >>> card.text + '\\n# My repo\\n' + + ``` + > [!TIP] + > Raises the following error: + > + > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + > when the content of the repo card metadata is not a dictionary. + """ + + # Set the content of the RepoCard, as well as underlying .data and .text attributes. + # See the `content` property setter for more details. + self.ignore_metadata_errors = ignore_metadata_errors + self.content = content + + @property + def content(self): + """The content of the RepoCard, including the YAML block and the Markdown body.""" + line_break = _detect_line_ending(self._content) or "\n" + return f"---{line_break}{self.data.to_yaml(line_break=line_break, original_order=self._original_order)}{line_break}---{line_break}{self.text}" + + @content.setter + def content(self, content: str): + """Set the content of the RepoCard.""" + self._content = content + + match = REGEX_YAML_BLOCK.search(content) + if match: + # Metadata found in the YAML block + yaml_block = match.group(2) + self.text = content[match.end() :] + data_dict = yaml.safe_load(yaml_block) + + if data_dict is None: + data_dict = {} + + # The YAML block's data should be a dictionary + if not isinstance(data_dict, dict): + raise ValueError("repo card metadata block should be a dict") + else: + # Model card without metadata... create empty metadata + logger.warning("Repo card metadata block was not found. Setting CardData to empty.") + data_dict = {} + self.text = content + + self.data = self.card_data_class(**data_dict, ignore_metadata_errors=self.ignore_metadata_errors) + self._original_order = list(data_dict.keys()) + + def __str__(self): + return self.content + + def save(self, filepath: Union[Path, str]): + r"""Save a RepoCard to a file. + + Args: + filepath (`Union[Path, str]`): Filepath to the markdown file to save. + + Example: + ```python + >>> from huggingface_hub.repocard import RepoCard + >>> card = RepoCard("---\nlanguage: en\n---\n# This is a test repo card") + >>> card.save("/tmp/test.md") + + ``` + """ + filepath = Path(filepath) + filepath.parent.mkdir(parents=True, exist_ok=True) + # Preserve newlines as in the existing file. + with open(filepath, mode="w", newline="", encoding="utf-8") as f: + f.write(str(self)) + + @classmethod + def load( + cls, + repo_id_or_path: Union[str, Path], + repo_type: Optional[str] = None, + token: Optional[str] = None, + ignore_metadata_errors: bool = False, + ): + """Initialize a RepoCard from a Hugging Face Hub repo's README.md or a local filepath. + + Args: + repo_id_or_path (`Union[str, Path]`): + The repo ID associated with a Hugging Face Hub repo or a local filepath. + repo_type (`str`, *optional*): + The type of Hugging Face repo to push to. Defaults to None, which will use use "model". Other options + are "dataset" and "space". Not used when loading from a local filepath. If this is called from a child + class, the default value will be the child class's `repo_type`. + token (`str`, *optional*): + Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to the stored token. + ignore_metadata_errors (`str`): + If True, errors while parsing the metadata section will be ignored. Some information might be lost during + the process. Use it at your own risk. + + Returns: + [`huggingface_hub.repocard.RepoCard`]: The RepoCard (or subclass) initialized from the repo's + README.md file or filepath. + + Example: + ```python + >>> from huggingface_hub.repocard import RepoCard + >>> card = RepoCard.load("nateraw/food") + >>> assert card.data.tags == ["generated_from_trainer", "image-classification", "pytorch"] + + ``` + """ + + if Path(repo_id_or_path).is_file(): + card_path = Path(repo_id_or_path) + elif isinstance(repo_id_or_path, str): + card_path = Path( + hf_hub_download( + repo_id_or_path, + constants.REPOCARD_NAME, + repo_type=repo_type or cls.repo_type, + token=token, + ) + ) + else: + raise ValueError(f"Cannot load RepoCard: path not found on disk ({repo_id_or_path}).") + + # Preserve newlines in the existing file. + with card_path.open(mode="r", newline="", encoding="utf-8") as f: + return cls(f.read(), ignore_metadata_errors=ignore_metadata_errors) + + def validate(self, repo_type: Optional[str] = None): + """Validates card against Hugging Face Hub's card validation logic. + Using this function requires access to the internet, so it is only called + internally by [`huggingface_hub.repocard.RepoCard.push_to_hub`]. + + Args: + repo_type (`str`, *optional*, defaults to "model"): + The type of Hugging Face repo to push to. Options are "model", "dataset", and "space". + If this function is called from a child class, the default will be the child class's `repo_type`. + + > [!TIP] + > Raises the following errors: + > + > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + > if the card fails validation checks. + > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + > if the request to the Hub API fails for any other reason. + """ + + # If repo type is provided, otherwise, use the repo type of the card. + repo_type = repo_type or self.repo_type + + body = { + "repoType": repo_type, + "content": str(self), + } + headers = {"Accept": "text/plain"} + + try: + r = get_session().post("https://huggingface.co/api/validate-yaml", body, headers=headers) + r.raise_for_status() + except requests.exceptions.HTTPError as exc: + if r.status_code == 400: + raise ValueError(r.text) + else: + raise exc + + def push_to_hub( + self, + repo_id: str, + token: Optional[str] = None, + repo_type: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + ): + """Push a RepoCard to a Hugging Face Hub repo. + + Args: + repo_id (`str`): + The repo ID of the Hugging Face Hub repo to push to. Example: "nateraw/food". + token (`str`, *optional*): + Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to + the stored token. + repo_type (`str`, *optional*, defaults to "model"): + The type of Hugging Face repo to push to. Options are "model", "dataset", and "space". If this + function is called by a child class, it will default to the child class's `repo_type`. + commit_message (`str`, *optional*): + The summary / title / first line of the generated commit. + commit_description (`str`, *optional*) + The description of the generated commit. + revision (`str`, *optional*): + The git revision to commit from. Defaults to the head of the `"main"` branch. + create_pr (`bool`, *optional*): + Whether or not to create a Pull Request with this commit. Defaults to `False`. + parent_commit (`str`, *optional*): + The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. + If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. + If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. + Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be + especially useful if the repo is updated / committed to concurrently. + Returns: + `str`: URL of the commit which updated the card metadata. + """ + + # If repo type is provided, otherwise, use the repo type of the card. + repo_type = repo_type or self.repo_type + + # Validate card before pushing to hub + self.validate(repo_type=repo_type) + + with SoftTemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) / constants.REPOCARD_NAME + tmp_path.write_text(str(self), encoding="utf-8") + url = upload_file( + path_or_fileobj=str(tmp_path), + path_in_repo=constants.REPOCARD_NAME, + repo_id=repo_id, + token=token, + repo_type=repo_type, + commit_message=commit_message, + commit_description=commit_description, + create_pr=create_pr, + revision=revision, + parent_commit=parent_commit, + ) + return url + + @classmethod + def from_template( + cls, + card_data: CardData, + template_path: Optional[str] = None, + template_str: Optional[str] = None, + **template_kwargs, + ): + """Initialize a RepoCard from a template. By default, it uses the default template. + + Templates are Jinja2 templates that can be customized by passing keyword arguments. + + Args: + card_data (`huggingface_hub.CardData`): + A huggingface_hub.CardData instance containing the metadata you want to include in the YAML + header of the repo card on the Hugging Face Hub. + template_path (`str`, *optional*): + A path to a markdown file with optional Jinja template variables that can be filled + in with `template_kwargs`. Defaults to the default template. + + Returns: + [`huggingface_hub.repocard.RepoCard`]: A RepoCard instance with the specified card data and content from the + template. + """ + if is_jinja_available(): + import jinja2 + else: + raise ImportError( + "Using RepoCard.from_template requires Jinja2 to be installed. Please" + " install it with `pip install Jinja2`." + ) + + kwargs = card_data.to_dict().copy() + kwargs.update(template_kwargs) # Template_kwargs have priority + + if template_path is not None: + template_str = Path(template_path).read_text() + if template_str is None: + template_str = Path(cls.default_template_path).read_text() + template = jinja2.Template(template_str) + content = template.render(card_data=card_data.to_yaml(), **kwargs) + return cls(content) + + +class ModelCard(RepoCard): + card_data_class = ModelCardData + default_template_path = TEMPLATE_MODELCARD_PATH + repo_type = "model" + + @classmethod + def from_template( # type: ignore # violates Liskov property but easier to use + cls, + card_data: ModelCardData, + template_path: Optional[str] = None, + template_str: Optional[str] = None, + **template_kwargs, + ): + """Initialize a ModelCard from a template. By default, it uses the default template, which can be found here: + https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md + + Templates are Jinja2 templates that can be customized by passing keyword arguments. + + Args: + card_data (`huggingface_hub.ModelCardData`): + A huggingface_hub.ModelCardData instance containing the metadata you want to include in the YAML + header of the model card on the Hugging Face Hub. + template_path (`str`, *optional*): + A path to a markdown file with optional Jinja template variables that can be filled + in with `template_kwargs`. Defaults to the default template. + + Returns: + [`huggingface_hub.ModelCard`]: A ModelCard instance with the specified card data and content from the + template. + + Example: + ```python + >>> from huggingface_hub import ModelCard, ModelCardData, EvalResult + + >>> # Using the Default Template + >>> card_data = ModelCardData( + ... language='en', + ... license='mit', + ... library_name='timm', + ... tags=['image-classification', 'resnet'], + ... datasets=['beans'], + ... metrics=['accuracy'], + ... ) + >>> card = ModelCard.from_template( + ... card_data, + ... model_description='This model does x + y...' + ... ) + + >>> # Including Evaluation Results + >>> card_data = ModelCardData( + ... language='en', + ... tags=['image-classification', 'resnet'], + ... eval_results=[ + ... EvalResult( + ... task_type='image-classification', + ... dataset_type='beans', + ... dataset_name='Beans', + ... metric_type='accuracy', + ... metric_value=0.9, + ... ), + ... ], + ... model_name='my-cool-model', + ... ) + >>> card = ModelCard.from_template(card_data) + + >>> # Using a Custom Template + >>> card_data = ModelCardData( + ... language='en', + ... tags=['image-classification', 'resnet'] + ... ) + >>> card = ModelCard.from_template( + ... card_data=card_data, + ... template_path='./src/huggingface_hub/templates/modelcard_template.md', + ... custom_template_var='custom value', # will be replaced in template if it exists + ... ) + + ``` + """ + return super().from_template(card_data, template_path, template_str, **template_kwargs) + + +class DatasetCard(RepoCard): + card_data_class = DatasetCardData + default_template_path = TEMPLATE_DATASETCARD_PATH + repo_type = "dataset" + + @classmethod + def from_template( # type: ignore # violates Liskov property but easier to use + cls, + card_data: DatasetCardData, + template_path: Optional[str] = None, + template_str: Optional[str] = None, + **template_kwargs, + ): + """Initialize a DatasetCard from a template. By default, it uses the default template, which can be found here: + https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/datasetcard_template.md + + Templates are Jinja2 templates that can be customized by passing keyword arguments. + + Args: + card_data (`huggingface_hub.DatasetCardData`): + A huggingface_hub.DatasetCardData instance containing the metadata you want to include in the YAML + header of the dataset card on the Hugging Face Hub. + template_path (`str`, *optional*): + A path to a markdown file with optional Jinja template variables that can be filled + in with `template_kwargs`. Defaults to the default template. + + Returns: + [`huggingface_hub.DatasetCard`]: A DatasetCard instance with the specified card data and content from the + template. + + Example: + ```python + >>> from huggingface_hub import DatasetCard, DatasetCardData + + >>> # Using the Default Template + >>> card_data = DatasetCardData( + ... language='en', + ... license='mit', + ... annotations_creators='crowdsourced', + ... task_categories=['text-classification'], + ... task_ids=['sentiment-classification', 'text-scoring'], + ... multilinguality='monolingual', + ... pretty_name='My Text Classification Dataset', + ... ) + >>> card = DatasetCard.from_template( + ... card_data, + ... pretty_name=card_data.pretty_name, + ... ) + + >>> # Using a Custom Template + >>> card_data = DatasetCardData( + ... language='en', + ... license='mit', + ... ) + >>> card = DatasetCard.from_template( + ... card_data=card_data, + ... template_path='./src/huggingface_hub/templates/datasetcard_template.md', + ... custom_template_var='custom value', # will be replaced in template if it exists + ... ) + + ``` + """ + return super().from_template(card_data, template_path, template_str, **template_kwargs) + + +class SpaceCard(RepoCard): + card_data_class = SpaceCardData + default_template_path = TEMPLATE_MODELCARD_PATH + repo_type = "space" + + +def _detect_line_ending(content: str) -> Literal["\r", "\n", "\r\n", None]: # noqa: F722 + """Detect the line ending of a string. Used by RepoCard to avoid making huge diff on newlines. + + Uses same implementation as in Hub server, keep it in sync. + + Returns: + str: The detected line ending of the string. + """ + cr = content.count("\r") + lf = content.count("\n") + crlf = content.count("\r\n") + if cr + lf == 0: + return None + if crlf == cr and crlf == lf: + return "\r\n" + if cr > lf: + return "\r" + else: + return "\n" + + +def metadata_load(local_path: Union[str, Path]) -> Optional[Dict]: + content = Path(local_path).read_text() + match = REGEX_YAML_BLOCK.search(content) + if match: + yaml_block = match.group(2) + data = yaml.safe_load(yaml_block) + if data is None or isinstance(data, dict): + return data + raise ValueError("repo card metadata block should be a dict") + else: + return None + + +def metadata_save(local_path: Union[str, Path], data: Dict) -> None: + """ + Save the metadata dict in the upper YAML part Trying to preserve newlines as + in the existing file. Docs about open() with newline="" parameter: + https://docs.python.org/3/library/functions.html?highlight=open#open Does + not work with "^M" linebreaks, which are replaced by \n + """ + line_break = "\n" + content = "" + # try to detect existing newline character + if os.path.exists(local_path): + with open(local_path, "r", newline="", encoding="utf8") as readme: + content = readme.read() + if isinstance(readme.newlines, tuple): + line_break = readme.newlines[0] + elif isinstance(readme.newlines, str): + line_break = readme.newlines + + # creates a new file if it not + with open(local_path, "w", newline="", encoding="utf8") as readme: + data_yaml = yaml_dump(data, sort_keys=False, line_break=line_break) + # sort_keys: keep dict order + match = REGEX_YAML_BLOCK.search(content) + if match: + output = content[: match.start()] + f"---{line_break}{data_yaml}---{line_break}" + content[match.end() :] + else: + output = f"---{line_break}{data_yaml}---{line_break}{content}" + + readme.write(output) + readme.close() + + +def metadata_eval_result( + *, + model_pretty_name: str, + task_pretty_name: str, + task_id: str, + metrics_pretty_name: str, + metrics_id: str, + metrics_value: Any, + dataset_pretty_name: str, + dataset_id: str, + metrics_config: Optional[str] = None, + metrics_verified: bool = False, + dataset_config: Optional[str] = None, + dataset_split: Optional[str] = None, + dataset_revision: Optional[str] = None, + metrics_verification_token: Optional[str] = None, +) -> Dict: + """ + Creates a metadata dict with the result from a model evaluated on a dataset. + + Args: + model_pretty_name (`str`): + The name of the model in natural language. + task_pretty_name (`str`): + The name of a task in natural language. + task_id (`str`): + Example: automatic-speech-recognition. A task id. + metrics_pretty_name (`str`): + A name for the metric in natural language. Example: Test WER. + metrics_id (`str`): + Example: wer. A metric id from https://hf.co/metrics. + metrics_value (`Any`): + The value from the metric. Example: 20.0 or "20.0 ± 1.2". + dataset_pretty_name (`str`): + The name of the dataset in natural language. + dataset_id (`str`): + Example: common_voice. A dataset id from https://hf.co/datasets. + metrics_config (`str`, *optional*): + The name of the metric configuration used in `load_metric()`. + Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`. + metrics_verified (`bool`, *optional*, defaults to `False`): + Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set. + dataset_config (`str`, *optional*): + Example: fr. The name of the dataset configuration used in `load_dataset()`. + dataset_split (`str`, *optional*): + Example: test. The name of the dataset split used in `load_dataset()`. + dataset_revision (`str`, *optional*): + Example: 5503434ddd753f426f4b38109466949a1217c2bb. The name of the dataset dataset revision + used in `load_dataset()`. + metrics_verification_token (`bool`, *optional*): + A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. + + Returns: + `dict`: a metadata dict with the result from a model evaluated on a dataset. + + Example: + ```python + >>> from huggingface_hub import metadata_eval_result + >>> results = metadata_eval_result( + ... model_pretty_name="RoBERTa fine-tuned on ReactionGIF", + ... task_pretty_name="Text Classification", + ... task_id="text-classification", + ... metrics_pretty_name="Accuracy", + ... metrics_id="accuracy", + ... metrics_value=0.2662102282047272, + ... dataset_pretty_name="ReactionJPEG", + ... dataset_id="julien-c/reactionjpeg", + ... dataset_config="default", + ... dataset_split="test", + ... ) + >>> results == { + ... 'model-index': [ + ... { + ... 'name': 'RoBERTa fine-tuned on ReactionGIF', + ... 'results': [ + ... { + ... 'task': { + ... 'type': 'text-classification', + ... 'name': 'Text Classification' + ... }, + ... 'dataset': { + ... 'name': 'ReactionJPEG', + ... 'type': 'julien-c/reactionjpeg', + ... 'config': 'default', + ... 'split': 'test' + ... }, + ... 'metrics': [ + ... { + ... 'type': 'accuracy', + ... 'value': 0.2662102282047272, + ... 'name': 'Accuracy', + ... 'verified': False + ... } + ... ] + ... } + ... ] + ... } + ... ] + ... } + True + + ``` + """ + + return { + "model-index": eval_results_to_model_index( + model_name=model_pretty_name, + eval_results=[ + EvalResult( + task_name=task_pretty_name, + task_type=task_id, + metric_name=metrics_pretty_name, + metric_type=metrics_id, + metric_value=metrics_value, + dataset_name=dataset_pretty_name, + dataset_type=dataset_id, + metric_config=metrics_config, + verified=metrics_verified, + verify_token=metrics_verification_token, + dataset_config=dataset_config, + dataset_split=dataset_split, + dataset_revision=dataset_revision, + ) + ], + ) + } + + +@validate_hf_hub_args +def metadata_update( + repo_id: str, + metadata: Dict, + *, + repo_type: Optional[str] = None, + overwrite: bool = False, + token: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + revision: Optional[str] = None, + create_pr: bool = False, + parent_commit: Optional[str] = None, +) -> str: + """ + Updates the metadata in the README.md of a repository on the Hugging Face Hub. + If the README.md file doesn't exist yet, a new one is created with metadata and an + the default ModelCard or DatasetCard template. For `space` repo, an error is thrown + as a Space cannot exist without a `README.md` file. + + Args: + repo_id (`str`): + The name of the repository. + metadata (`dict`): + A dictionary containing the metadata to be updated. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if updating to a dataset or space, + `None` or `"model"` if updating to a model. Default is `None`. + overwrite (`bool`, *optional*, defaults to `False`): + If set to `True` an existing field can be overwritten, otherwise + attempting to overwrite an existing field will cause an error. + token (`str`, *optional*): + The Hugging Face authentication token. + commit_message (`str`, *optional*): + The summary / title / first line of the generated commit. Defaults to + `f"Update metadata with huggingface_hub"` + commit_description (`str` *optional*) + The description of the generated commit + revision (`str`, *optional*): + The git revision to commit from. Defaults to the head of the + `"main"` branch. + create_pr (`boolean`, *optional*): + Whether or not to create a Pull Request from `revision` with that commit. + Defaults to `False`. + parent_commit (`str`, *optional*): + The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. + If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. + If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. + Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be + especially useful if the repo is updated / committed to concurrently. + Returns: + `str`: URL of the commit which updated the card metadata. + + Example: + ```python + >>> from huggingface_hub import metadata_update + >>> metadata = {'model-index': [{'name': 'RoBERTa fine-tuned on ReactionGIF', + ... 'results': [{'dataset': {'name': 'ReactionGIF', + ... 'type': 'julien-c/reactiongif'}, + ... 'metrics': [{'name': 'Recall', + ... 'type': 'recall', + ... 'value': 0.7762102282047272}], + ... 'task': {'name': 'Text Classification', + ... 'type': 'text-classification'}}]}]} + >>> url = metadata_update("hf-internal-testing/reactiongif-roberta-card", metadata) + + ``` + """ + commit_message = commit_message if commit_message is not None else "Update metadata with huggingface_hub" + + # Card class given repo_type + card_class: Type[RepoCard] + if repo_type is None or repo_type == "model": + card_class = ModelCard + elif repo_type == "dataset": + card_class = DatasetCard + elif repo_type == "space": + card_class = RepoCard + else: + raise ValueError(f"Unknown repo_type: {repo_type}") + + # Either load repo_card from the Hub or create an empty one. + # NOTE: Will not create the repo if it doesn't exist. + try: + card = card_class.load(repo_id, token=token, repo_type=repo_type) + except EntryNotFoundError: + if repo_type == "space": + raise ValueError("Cannot update metadata on a Space that doesn't contain a `README.md` file.") + + # Initialize a ModelCard or DatasetCard from default template and no data. + # Cast to the concrete expected card type to satisfy type checkers. + card = card_class.from_template(CardData()) # type: ignore[return-value] + + for key, value in metadata.items(): + if key == "model-index": + # if the new metadata doesn't include a name, either use existing one or repo name + if "name" not in value[0]: + value[0]["name"] = getattr(card, "model_name", repo_id) + model_name, new_results = model_index_to_eval_results(value) + if card.data.eval_results is None: + card.data.eval_results = new_results + card.data.model_name = model_name + else: + existing_results = card.data.eval_results + + # Iterate over new results + # Iterate over existing results + # If both results describe the same metric but value is different: + # If overwrite=True: overwrite the metric value + # Else: raise ValueError + # Else: append new result to existing ones. + for new_result in new_results: + result_found = False + for existing_result in existing_results: + if new_result.is_equal_except_value(existing_result): + if new_result != existing_result and not overwrite: + raise ValueError( + "You passed a new value for the existing metric" + f" 'name: {new_result.metric_name}, type: " + f"{new_result.metric_type}'. Set `overwrite=True`" + " to overwrite existing metrics." + ) + result_found = True + existing_result.metric_value = new_result.metric_value + if existing_result.verified is True: + existing_result.verify_token = new_result.verify_token + if not result_found: + card.data.eval_results.append(new_result) + else: + # Any metadata that is not a result metric + if card.data.get(key) is not None and not overwrite and card.data.get(key) != value: + raise ValueError( + f"You passed a new value for the existing meta data field '{key}'." + " Set `overwrite=True` to overwrite existing metadata." + ) + else: + card.data[key] = value + + return card.push_to_hub( + repo_id, + token=token, + repo_type=repo_type, + commit_message=commit_message, + commit_description=commit_description, + create_pr=create_pr, + revision=revision, + parent_commit=parent_commit, + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/repocard_data.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/repocard_data.py new file mode 100644 index 0000000000000000000000000000000000000000..62215f2274e482d4ed69a1d6deeafdf34fc5a6a4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/repocard_data.py @@ -0,0 +1,770 @@ +import copy +from collections import defaultdict +from dataclasses import dataclass +from typing import Any, Dict, List, Optional, Tuple, Union + +from huggingface_hub.utils import logging, yaml_dump + + +logger = logging.get_logger(__name__) + + +@dataclass +class EvalResult: + """ + Flattened representation of individual evaluation results found in model-index of Model Cards. + + For more information on the model-index spec, see https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1. + + Args: + task_type (`str`): + The task identifier. Example: "image-classification". + dataset_type (`str`): + The dataset identifier. Example: "common_voice". Use dataset id from https://hf.co/datasets. + dataset_name (`str`): + A pretty name for the dataset. Example: "Common Voice (French)". + metric_type (`str`): + The metric identifier. Example: "wer". Use metric id from https://hf.co/metrics. + metric_value (`Any`): + The metric value. Example: 0.9 or "20.0 ± 1.2". + task_name (`str`, *optional*): + A pretty name for the task. Example: "Speech Recognition". + dataset_config (`str`, *optional*): + The name of the dataset configuration used in `load_dataset()`. + Example: fr in `load_dataset("common_voice", "fr")`. See the `datasets` docs for more info: + https://hf.co/docs/datasets/package_reference/loading_methods#datasets.load_dataset.name + dataset_split (`str`, *optional*): + The split used in `load_dataset()`. Example: "test". + dataset_revision (`str`, *optional*): + The revision (AKA Git Sha) of the dataset used in `load_dataset()`. + Example: 5503434ddd753f426f4b38109466949a1217c2bb + dataset_args (`Dict[str, Any]`, *optional*): + The arguments passed during `Metric.compute()`. Example for `bleu`: `{"max_order": 4}` + metric_name (`str`, *optional*): + A pretty name for the metric. Example: "Test WER". + metric_config (`str`, *optional*): + The name of the metric configuration used in `load_metric()`. + Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`. + See the `datasets` docs for more info: https://huggingface.co/docs/datasets/v2.1.0/en/loading#load-configurations + metric_args (`Dict[str, Any]`, *optional*): + The arguments passed during `Metric.compute()`. Example for `bleu`: max_order: 4 + verified (`bool`, *optional*): + Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set. + verify_token (`str`, *optional*): + A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. + source_name (`str`, *optional*): + The name of the source of the evaluation result. Example: "Open LLM Leaderboard". + source_url (`str`, *optional*): + The URL of the source of the evaluation result. Example: "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard". + """ + + # Required + + # The task identifier + # Example: automatic-speech-recognition + task_type: str + + # The dataset identifier + # Example: common_voice. Use dataset id from https://hf.co/datasets + dataset_type: str + + # A pretty name for the dataset. + # Example: Common Voice (French) + dataset_name: str + + # The metric identifier + # Example: wer. Use metric id from https://hf.co/metrics + metric_type: str + + # Value of the metric. + # Example: 20.0 or "20.0 ± 1.2" + metric_value: Any + + # Optional + + # A pretty name for the task. + # Example: Speech Recognition + task_name: Optional[str] = None + + # The name of the dataset configuration used in `load_dataset()`. + # Example: fr in `load_dataset("common_voice", "fr")`. + # See the `datasets` docs for more info: + # https://huggingface.co/docs/datasets/package_reference/loading_methods#datasets.load_dataset.name + dataset_config: Optional[str] = None + + # The split used in `load_dataset()`. + # Example: test + dataset_split: Optional[str] = None + + # The revision (AKA Git Sha) of the dataset used in `load_dataset()`. + # Example: 5503434ddd753f426f4b38109466949a1217c2bb + dataset_revision: Optional[str] = None + + # The arguments passed during `Metric.compute()`. + # Example for `bleu`: max_order: 4 + dataset_args: Optional[Dict[str, Any]] = None + + # A pretty name for the metric. + # Example: Test WER + metric_name: Optional[str] = None + + # The name of the metric configuration used in `load_metric()`. + # Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`. + # See the `datasets` docs for more info: https://huggingface.co/docs/datasets/v2.1.0/en/loading#load-configurations + metric_config: Optional[str] = None + + # The arguments passed during `Metric.compute()`. + # Example for `bleu`: max_order: 4 + metric_args: Optional[Dict[str, Any]] = None + + # Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set. + verified: Optional[bool] = None + + # A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. + verify_token: Optional[str] = None + + # The name of the source of the evaluation result. + # Example: Open LLM Leaderboard + source_name: Optional[str] = None + + # The URL of the source of the evaluation result. + # Example: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard + source_url: Optional[str] = None + + @property + def unique_identifier(self) -> tuple: + """Returns a tuple that uniquely identifies this evaluation.""" + return ( + self.task_type, + self.dataset_type, + self.dataset_config, + self.dataset_split, + self.dataset_revision, + ) + + def is_equal_except_value(self, other: "EvalResult") -> bool: + """ + Return True if `self` and `other` describe exactly the same metric but with a + different value. + """ + for key, _ in self.__dict__.items(): + if key == "metric_value": + continue + # For metrics computed by Hugging Face's evaluation service, `verify_token` is derived from `metric_value`, + # so we exclude it here in the comparison. + if key != "verify_token" and getattr(self, key) != getattr(other, key): + return False + return True + + def __post_init__(self) -> None: + if self.source_name is not None and self.source_url is None: + raise ValueError("If `source_name` is provided, `source_url` must also be provided.") + + +@dataclass +class CardData: + """Structure containing metadata from a RepoCard. + + [`CardData`] is the parent class of [`ModelCardData`] and [`DatasetCardData`]. + + Metadata can be exported as a dictionary or YAML. Export can be customized to alter the representation of the data + (example: flatten evaluation results). `CardData` behaves as a dictionary (can get, pop, set values) but do not + inherit from `dict` to allow this export step. + """ + + def __init__(self, ignore_metadata_errors: bool = False, **kwargs): + self.__dict__.update(kwargs) + + def to_dict(self): + """Converts CardData to a dict. + + Returns: + `dict`: CardData represented as a dictionary ready to be dumped to a YAML + block for inclusion in a README.md file. + """ + + data_dict = copy.deepcopy(self.__dict__) + self._to_dict(data_dict) + return {key: value for key, value in data_dict.items() if value is not None} + + def _to_dict(self, data_dict): + """Use this method in child classes to alter the dict representation of the data. Alter the dict in-place. + + Args: + data_dict (`dict`): The raw dict representation of the card data. + """ + pass + + def to_yaml(self, line_break=None, original_order: Optional[List[str]] = None) -> str: + """Dumps CardData to a YAML block for inclusion in a README.md file. + + Args: + line_break (str, *optional*): + The line break to use when dumping to yaml. + + Returns: + `str`: CardData represented as a YAML block. + """ + if original_order: + self.__dict__ = { + k: self.__dict__[k] + for k in original_order + list(set(self.__dict__.keys()) - set(original_order)) + if k in self.__dict__ + } + return yaml_dump(self.to_dict(), sort_keys=False, line_break=line_break).strip() + + def __repr__(self): + return repr(self.__dict__) + + def __str__(self): + return self.to_yaml() + + def get(self, key: str, default: Any = None) -> Any: + """Get value for a given metadata key.""" + value = self.__dict__.get(key) + return default if value is None else value + + def pop(self, key: str, default: Any = None) -> Any: + """Pop value for a given metadata key.""" + return self.__dict__.pop(key, default) + + def __getitem__(self, key: str) -> Any: + """Get value for a given metadata key.""" + return self.__dict__[key] + + def __setitem__(self, key: str, value: Any) -> None: + """Set value for a given metadata key.""" + self.__dict__[key] = value + + def __contains__(self, key: str) -> bool: + """Check if a given metadata key is set.""" + return key in self.__dict__ + + def __len__(self) -> int: + """Return the number of metadata keys set.""" + return len(self.__dict__) + + +def _validate_eval_results( + eval_results: Optional[Union[EvalResult, List[EvalResult]]], + model_name: Optional[str], +) -> List[EvalResult]: + if eval_results is None: + return [] + if isinstance(eval_results, EvalResult): + eval_results = [eval_results] + if not isinstance(eval_results, list) or not all(isinstance(r, EvalResult) for r in eval_results): + raise ValueError( + f"`eval_results` should be of type `EvalResult` or a list of `EvalResult`, got {type(eval_results)}." + ) + if model_name is None: + raise ValueError("Passing `eval_results` requires `model_name` to be set.") + return eval_results + + +class ModelCardData(CardData): + """Model Card Metadata that is used by Hugging Face Hub when included at the top of your README.md + + Args: + base_model (`str` or `List[str]`, *optional*): + The identifier of the base model from which the model derives. This is applicable for example if your model is a + fine-tune or adapter of an existing model. The value must be the ID of a model on the Hub (or a list of IDs + if your model derives from multiple models). Defaults to None. + datasets (`Union[str, List[str]]`, *optional*): + Dataset or list of datasets that were used to train this model. Should be a dataset ID + found on https://hf.co/datasets. Defaults to None. + eval_results (`Union[List[EvalResult], EvalResult]`, *optional*): + List of `huggingface_hub.EvalResult` that define evaluation results of the model. If provided, + `model_name` is used to as a name on PapersWithCode's leaderboards. Defaults to `None`. + language (`Union[str, List[str]]`, *optional*): + Language of model's training data or metadata. It must be an ISO 639-1, 639-2 or + 639-3 code (two/three letters), or a special value like "code", "multilingual". Defaults to `None`. + library_name (`str`, *optional*): + Name of library used by this model. Example: keras or any library from + https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/model-libraries.ts. + Defaults to None. + license (`str`, *optional*): + License of this model. Example: apache-2.0 or any license from + https://huggingface.co/docs/hub/repositories-licenses. Defaults to None. + license_name (`str`, *optional*): + Name of the license of this model. Defaults to None. To be used in conjunction with `license_link`. + Common licenses (Apache-2.0, MIT, CC-BY-SA-4.0) do not need a name. In that case, use `license` instead. + license_link (`str`, *optional*): + Link to the license of this model. Defaults to None. To be used in conjunction with `license_name`. + Common licenses (Apache-2.0, MIT, CC-BY-SA-4.0) do not need a link. In that case, use `license` instead. + metrics (`List[str]`, *optional*): + List of metrics used to evaluate this model. Should be a metric name that can be found + at https://hf.co/metrics. Example: 'accuracy'. Defaults to None. + model_name (`str`, *optional*): + A name for this model. It is used along with + `eval_results` to construct the `model-index` within the card's metadata. The name + you supply here is what will be used on PapersWithCode's leaderboards. If None is provided + then the repo name is used as a default. Defaults to None. + pipeline_tag (`str`, *optional*): + The pipeline tag associated with the model. Example: "text-classification". + tags (`List[str]`, *optional*): + List of tags to add to your model that can be used when filtering on the Hugging + Face Hub. Defaults to None. + ignore_metadata_errors (`str`): + If True, errors while parsing the metadata section will be ignored. Some information might be lost during + the process. Use it at your own risk. + kwargs (`dict`, *optional*): + Additional metadata that will be added to the model card. Defaults to None. + + Example: + ```python + >>> from huggingface_hub import ModelCardData + >>> card_data = ModelCardData( + ... language="en", + ... license="mit", + ... library_name="timm", + ... tags=['image-classification', 'resnet'], + ... ) + >>> card_data.to_dict() + {'language': 'en', 'license': 'mit', 'library_name': 'timm', 'tags': ['image-classification', 'resnet']} + + ``` + """ + + def __init__( + self, + *, + base_model: Optional[Union[str, List[str]]] = None, + datasets: Optional[Union[str, List[str]]] = None, + eval_results: Optional[List[EvalResult]] = None, + language: Optional[Union[str, List[str]]] = None, + library_name: Optional[str] = None, + license: Optional[str] = None, + license_name: Optional[str] = None, + license_link: Optional[str] = None, + metrics: Optional[List[str]] = None, + model_name: Optional[str] = None, + pipeline_tag: Optional[str] = None, + tags: Optional[List[str]] = None, + ignore_metadata_errors: bool = False, + **kwargs, + ): + self.base_model = base_model + self.datasets = datasets + self.eval_results = eval_results + self.language = language + self.library_name = library_name + self.license = license + self.license_name = license_name + self.license_link = license_link + self.metrics = metrics + self.model_name = model_name + self.pipeline_tag = pipeline_tag + self.tags = _to_unique_list(tags) + + model_index = kwargs.pop("model-index", None) + if model_index: + try: + model_name, eval_results = model_index_to_eval_results(model_index) + self.model_name = model_name + self.eval_results = eval_results + except (KeyError, TypeError) as error: + if ignore_metadata_errors: + logger.warning("Invalid model-index. Not loading eval results into CardData.") + else: + raise ValueError( + f"Invalid `model_index` in metadata cannot be parsed: {error.__class__} {error}. Pass" + " `ignore_metadata_errors=True` to ignore this error while loading a Model Card. Warning:" + " some information will be lost. Use it at your own risk." + ) + + super().__init__(**kwargs) + + if self.eval_results: + try: + self.eval_results = _validate_eval_results(self.eval_results, self.model_name) + except Exception as e: + if ignore_metadata_errors: + logger.warning(f"Failed to validate eval_results: {e}. Not loading eval results into CardData.") + else: + raise ValueError(f"Failed to validate eval_results: {e}") from e + + def _to_dict(self, data_dict): + """Format the internal data dict. In this case, we convert eval results to a valid model index""" + if self.eval_results is not None: + data_dict["model-index"] = eval_results_to_model_index(self.model_name, self.eval_results) + del data_dict["eval_results"], data_dict["model_name"] + + +class DatasetCardData(CardData): + """Dataset Card Metadata that is used by Hugging Face Hub when included at the top of your README.md + + Args: + language (`List[str]`, *optional*): + Language of dataset's data or metadata. It must be an ISO 639-1, 639-2 or + 639-3 code (two/three letters), or a special value like "code", "multilingual". + license (`Union[str, List[str]]`, *optional*): + License(s) of this dataset. Example: apache-2.0 or any license from + https://huggingface.co/docs/hub/repositories-licenses. + annotations_creators (`Union[str, List[str]]`, *optional*): + How the annotations for the dataset were created. + Options are: 'found', 'crowdsourced', 'expert-generated', 'machine-generated', 'no-annotation', 'other'. + language_creators (`Union[str, List[str]]`, *optional*): + How the text-based data in the dataset was created. + Options are: 'found', 'crowdsourced', 'expert-generated', 'machine-generated', 'other' + multilinguality (`Union[str, List[str]]`, *optional*): + Whether the dataset is multilingual. + Options are: 'monolingual', 'multilingual', 'translation', 'other'. + size_categories (`Union[str, List[str]]`, *optional*): + The number of examples in the dataset. Options are: 'n<1K', '1K1T', and 'other'. + source_datasets (`List[str]]`, *optional*): + Indicates whether the dataset is an original dataset or extended from another existing dataset. + Options are: 'original' and 'extended'. + task_categories (`Union[str, List[str]]`, *optional*): + What categories of task does the dataset support? + task_ids (`Union[str, List[str]]`, *optional*): + What specific tasks does the dataset support? + paperswithcode_id (`str`, *optional*): + ID of the dataset on PapersWithCode. + pretty_name (`str`, *optional*): + A more human-readable name for the dataset. (ex. "Cats vs. Dogs") + train_eval_index (`Dict`, *optional*): + A dictionary that describes the necessary spec for doing evaluation on the Hub. + If not provided, it will be gathered from the 'train-eval-index' key of the kwargs. + config_names (`Union[str, List[str]]`, *optional*): + A list of the available dataset configs for the dataset. + """ + + def __init__( + self, + *, + language: Optional[Union[str, List[str]]] = None, + license: Optional[Union[str, List[str]]] = None, + annotations_creators: Optional[Union[str, List[str]]] = None, + language_creators: Optional[Union[str, List[str]]] = None, + multilinguality: Optional[Union[str, List[str]]] = None, + size_categories: Optional[Union[str, List[str]]] = None, + source_datasets: Optional[List[str]] = None, + task_categories: Optional[Union[str, List[str]]] = None, + task_ids: Optional[Union[str, List[str]]] = None, + paperswithcode_id: Optional[str] = None, + pretty_name: Optional[str] = None, + train_eval_index: Optional[Dict] = None, + config_names: Optional[Union[str, List[str]]] = None, + ignore_metadata_errors: bool = False, + **kwargs, + ): + self.annotations_creators = annotations_creators + self.language_creators = language_creators + self.language = language + self.license = license + self.multilinguality = multilinguality + self.size_categories = size_categories + self.source_datasets = source_datasets + self.task_categories = task_categories + self.task_ids = task_ids + self.paperswithcode_id = paperswithcode_id + self.pretty_name = pretty_name + self.config_names = config_names + + # TODO - maybe handle this similarly to EvalResult? + self.train_eval_index = train_eval_index or kwargs.pop("train-eval-index", None) + super().__init__(**kwargs) + + def _to_dict(self, data_dict): + data_dict["train-eval-index"] = data_dict.pop("train_eval_index") + + +class SpaceCardData(CardData): + """Space Card Metadata that is used by Hugging Face Hub when included at the top of your README.md + + To get an exhaustive reference of Spaces configuration, please visit https://huggingface.co/docs/hub/spaces-config-reference#spaces-configuration-reference. + + Args: + title (`str`, *optional*) + Title of the Space. + sdk (`str`, *optional*) + SDK of the Space (one of `gradio`, `streamlit`, `docker`, or `static`). + sdk_version (`str`, *optional*) + Version of the used SDK (if Gradio/Streamlit sdk). + python_version (`str`, *optional*) + Python version used in the Space (if Gradio/Streamlit sdk). + app_file (`str`, *optional*) + Path to your main application file (which contains either gradio or streamlit Python code, or static html code). + Path is relative to the root of the repository. + app_port (`str`, *optional*) + Port on which your application is running. Used only if sdk is `docker`. + license (`str`, *optional*) + License of this model. Example: apache-2.0 or any license from + https://huggingface.co/docs/hub/repositories-licenses. + duplicated_from (`str`, *optional*) + ID of the original Space if this is a duplicated Space. + models (List[`str`], *optional*) + List of models related to this Space. Should be a dataset ID found on https://hf.co/models. + datasets (`List[str]`, *optional*) + List of datasets related to this Space. Should be a dataset ID found on https://hf.co/datasets. + tags (`List[str]`, *optional*) + List of tags to add to your Space that can be used when filtering on the Hub. + ignore_metadata_errors (`str`): + If True, errors while parsing the metadata section will be ignored. Some information might be lost during + the process. Use it at your own risk. + kwargs (`dict`, *optional*): + Additional metadata that will be added to the space card. + + Example: + ```python + >>> from huggingface_hub import SpaceCardData + >>> card_data = SpaceCardData( + ... title="Dreambooth Training", + ... license="mit", + ... sdk="gradio", + ... duplicated_from="multimodalart/dreambooth-training" + ... ) + >>> card_data.to_dict() + {'title': 'Dreambooth Training', 'sdk': 'gradio', 'license': 'mit', 'duplicated_from': 'multimodalart/dreambooth-training'} + ``` + """ + + def __init__( + self, + *, + title: Optional[str] = None, + sdk: Optional[str] = None, + sdk_version: Optional[str] = None, + python_version: Optional[str] = None, + app_file: Optional[str] = None, + app_port: Optional[int] = None, + license: Optional[str] = None, + duplicated_from: Optional[str] = None, + models: Optional[List[str]] = None, + datasets: Optional[List[str]] = None, + tags: Optional[List[str]] = None, + ignore_metadata_errors: bool = False, + **kwargs, + ): + self.title = title + self.sdk = sdk + self.sdk_version = sdk_version + self.python_version = python_version + self.app_file = app_file + self.app_port = app_port + self.license = license + self.duplicated_from = duplicated_from + self.models = models + self.datasets = datasets + self.tags = _to_unique_list(tags) + super().__init__(**kwargs) + + +def model_index_to_eval_results(model_index: List[Dict[str, Any]]) -> Tuple[str, List[EvalResult]]: + """Takes in a model index and returns the model name and a list of `huggingface_hub.EvalResult` objects. + + A detailed spec of the model index can be found here: + https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 + + Args: + model_index (`List[Dict[str, Any]]`): + A model index data structure, likely coming from a README.md file on the + Hugging Face Hub. + + Returns: + model_name (`str`): + The name of the model as found in the model index. This is used as the + identifier for the model on leaderboards like PapersWithCode. + eval_results (`List[EvalResult]`): + A list of `huggingface_hub.EvalResult` objects containing the metrics + reported in the provided model_index. + + Example: + ```python + >>> from huggingface_hub.repocard_data import model_index_to_eval_results + >>> # Define a minimal model index + >>> model_index = [ + ... { + ... "name": "my-cool-model", + ... "results": [ + ... { + ... "task": { + ... "type": "image-classification" + ... }, + ... "dataset": { + ... "type": "beans", + ... "name": "Beans" + ... }, + ... "metrics": [ + ... { + ... "type": "accuracy", + ... "value": 0.9 + ... } + ... ] + ... } + ... ] + ... } + ... ] + >>> model_name, eval_results = model_index_to_eval_results(model_index) + >>> model_name + 'my-cool-model' + >>> eval_results[0].task_type + 'image-classification' + >>> eval_results[0].metric_type + 'accuracy' + + ``` + """ + + eval_results = [] + for elem in model_index: + name = elem["name"] + results = elem["results"] + for result in results: + task_type = result["task"]["type"] + task_name = result["task"].get("name") + dataset_type = result["dataset"]["type"] + dataset_name = result["dataset"]["name"] + dataset_config = result["dataset"].get("config") + dataset_split = result["dataset"].get("split") + dataset_revision = result["dataset"].get("revision") + dataset_args = result["dataset"].get("args") + source_name = result.get("source", {}).get("name") + source_url = result.get("source", {}).get("url") + + for metric in result["metrics"]: + metric_type = metric["type"] + metric_value = metric["value"] + metric_name = metric.get("name") + metric_args = metric.get("args") + metric_config = metric.get("config") + verified = metric.get("verified") + verify_token = metric.get("verifyToken") + + eval_result = EvalResult( + task_type=task_type, # Required + dataset_type=dataset_type, # Required + dataset_name=dataset_name, # Required + metric_type=metric_type, # Required + metric_value=metric_value, # Required + task_name=task_name, + dataset_config=dataset_config, + dataset_split=dataset_split, + dataset_revision=dataset_revision, + dataset_args=dataset_args, + metric_name=metric_name, + metric_args=metric_args, + metric_config=metric_config, + verified=verified, + verify_token=verify_token, + source_name=source_name, + source_url=source_url, + ) + eval_results.append(eval_result) + return name, eval_results + + +def _remove_none(obj): + """ + Recursively remove `None` values from a dict. Borrowed from: https://stackoverflow.com/a/20558778 + """ + if isinstance(obj, (list, tuple, set)): + return type(obj)(_remove_none(x) for x in obj if x is not None) + elif isinstance(obj, dict): + return type(obj)((_remove_none(k), _remove_none(v)) for k, v in obj.items() if k is not None and v is not None) + else: + return obj + + +def eval_results_to_model_index(model_name: str, eval_results: List[EvalResult]) -> List[Dict[str, Any]]: + """Takes in given model name and list of `huggingface_hub.EvalResult` and returns a + valid model-index that will be compatible with the format expected by the + Hugging Face Hub. + + Args: + model_name (`str`): + Name of the model (ex. "my-cool-model"). This is used as the identifier + for the model on leaderboards like PapersWithCode. + eval_results (`List[EvalResult]`): + List of `huggingface_hub.EvalResult` objects containing the metrics to be + reported in the model-index. + + Returns: + model_index (`List[Dict[str, Any]]`): The eval_results converted to a model-index. + + Example: + ```python + >>> from huggingface_hub.repocard_data import eval_results_to_model_index, EvalResult + >>> # Define minimal eval_results + >>> eval_results = [ + ... EvalResult( + ... task_type="image-classification", # Required + ... dataset_type="beans", # Required + ... dataset_name="Beans", # Required + ... metric_type="accuracy", # Required + ... metric_value=0.9, # Required + ... ) + ... ] + >>> eval_results_to_model_index("my-cool-model", eval_results) + [{'name': 'my-cool-model', 'results': [{'task': {'type': 'image-classification'}, 'dataset': {'name': 'Beans', 'type': 'beans'}, 'metrics': [{'type': 'accuracy', 'value': 0.9}]}]}] + + ``` + """ + + # Metrics are reported on a unique task-and-dataset basis. + # Here, we make a map of those pairs and the associated EvalResults. + task_and_ds_types_map: Dict[Any, List[EvalResult]] = defaultdict(list) + for eval_result in eval_results: + task_and_ds_types_map[eval_result.unique_identifier].append(eval_result) + + # Use the map from above to generate the model index data. + model_index_data = [] + for results in task_and_ds_types_map.values(): + # All items from `results` share same metadata + sample_result = results[0] + data = { + "task": { + "type": sample_result.task_type, + "name": sample_result.task_name, + }, + "dataset": { + "name": sample_result.dataset_name, + "type": sample_result.dataset_type, + "config": sample_result.dataset_config, + "split": sample_result.dataset_split, + "revision": sample_result.dataset_revision, + "args": sample_result.dataset_args, + }, + "metrics": [ + { + "type": result.metric_type, + "value": result.metric_value, + "name": result.metric_name, + "config": result.metric_config, + "args": result.metric_args, + "verified": result.verified, + "verifyToken": result.verify_token, + } + for result in results + ], + } + if sample_result.source_url is not None: + source = { + "url": sample_result.source_url, + } + if sample_result.source_name is not None: + source["name"] = sample_result.source_name + data["source"] = source + model_index_data.append(data) + + # TODO - Check if there cases where this list is longer than one? + # Finally, the model index itself is list of dicts. + model_index = [ + { + "name": model_name, + "results": model_index_data, + } + ] + return _remove_none(model_index) + + +def _to_unique_list(tags: Optional[List[str]]) -> Optional[List[str]]: + if tags is None: + return tags + unique_tags = [] # make tags unique + keep order explicitly + for tag in tags: + if tag not in unique_tags: + unique_tags.append(tag) + return unique_tags diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/repository.py b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/repository.py new file mode 100644 index 0000000000000000000000000000000000000000..56e2bce619dff404476f95343fb039a0dae9fc56 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/huggingface_hub/repository.py @@ -0,0 +1,1471 @@ +import atexit +import os +import re +import subprocess +import threading +import time +from contextlib import contextmanager +from pathlib import Path +from typing import Callable, Dict, Iterator, List, Optional, Tuple, TypedDict, Union +from urllib.parse import urlparse + +from huggingface_hub import constants +from huggingface_hub.repocard import metadata_load, metadata_save + +from .hf_api import HfApi, repo_type_and_id_from_hf_id +from .lfs import LFS_MULTIPART_UPLOAD_COMMAND +from .utils import ( + SoftTemporaryDirectory, + get_token, + logging, + run_subprocess, + tqdm, + validate_hf_hub_args, +) +from .utils._deprecation import _deprecate_method + + +logger = logging.get_logger(__name__) + + +class CommandInProgress: + """ + Utility to follow commands launched asynchronously. + """ + + def __init__( + self, + title: str, + is_done_method: Callable, + status_method: Callable, + process: subprocess.Popen, + post_method: Optional[Callable] = None, + ): + self.title = title + self._is_done = is_done_method + self._status = status_method + self._process = process + self._stderr = "" + self._stdout = "" + self._post_method = post_method + + @property + def is_done(self) -> bool: + """ + Whether the process is done. + """ + result = self._is_done() + + if result and self._post_method is not None: + self._post_method() + self._post_method = None + + return result + + @property + def status(self) -> int: + """ + The exit code/status of the current action. Will return `0` if the + command has completed successfully, and a number between 1 and 255 if + the process errored-out. + + Will return -1 if the command is still ongoing. + """ + return self._status() + + @property + def failed(self) -> bool: + """ + Whether the process errored-out. + """ + return self.status > 0 + + @property + def stderr(self) -> str: + """ + The current output message on the standard error. + """ + if self._process.stderr is not None: + self._stderr += self._process.stderr.read() + return self._stderr + + @property + def stdout(self) -> str: + """ + The current output message on the standard output. + """ + if self._process.stdout is not None: + self._stdout += self._process.stdout.read() + return self._stdout + + def __repr__(self): + status = self.status + + if status == -1: + status = "running" + + return ( + f"[{self.title} command, status code: {status}," + f" {'in progress.' if not self.is_done else 'finished.'} PID:" + f" {self._process.pid}]" + ) + + +def is_git_repo(folder: Union[str, Path]) -> bool: + """ + Check if the folder is the root or part of a git repository + + Args: + folder (`str`): + The folder in which to run the command. + + Returns: + `bool`: `True` if the repository is part of a repository, `False` + otherwise. + """ + folder_exists = os.path.exists(os.path.join(folder, ".git")) + git_branch = subprocess.run("git branch".split(), cwd=folder, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + return folder_exists and git_branch.returncode == 0 + + +def is_local_clone(folder: Union[str, Path], remote_url: str) -> bool: + """ + Check if the folder is a local clone of the remote_url + + Args: + folder (`str` or `Path`): + The folder in which to run the command. + remote_url (`str`): + The url of a git repository. + + Returns: + `bool`: `True` if the repository is a local clone of the remote + repository specified, `False` otherwise. + """ + if not is_git_repo(folder): + return False + + remotes = run_subprocess("git remote -v", folder).stdout + + # Remove token for the test with remotes. + remote_url = re.sub(r"https://.*@", "https://", remote_url) + remotes = [re.sub(r"https://.*@", "https://", remote) for remote in remotes.split()] + return remote_url in remotes + + +def is_tracked_with_lfs(filename: Union[str, Path]) -> bool: + """ + Check if the file passed is tracked with git-lfs. + + Args: + filename (`str` or `Path`): + The filename to check. + + Returns: + `bool`: `True` if the file passed is tracked with git-lfs, `False` + otherwise. + """ + folder = Path(filename).parent + filename = Path(filename).name + + try: + p = run_subprocess("git check-attr -a".split() + [filename], folder) + attributes = p.stdout.strip() + except subprocess.CalledProcessError as exc: + if not is_git_repo(folder): + return False + else: + raise OSError(exc.stderr) + + if len(attributes) == 0: + return False + + found_lfs_tag = {"diff": False, "merge": False, "filter": False} + + for attribute in attributes.split("\n"): + for tag in found_lfs_tag.keys(): + if tag in attribute and "lfs" in attribute: + found_lfs_tag[tag] = True + + return all(found_lfs_tag.values()) + + +def is_git_ignored(filename: Union[str, Path]) -> bool: + """ + Check if file is git-ignored. Supports nested .gitignore files. + + Args: + filename (`str` or `Path`): + The filename to check. + + Returns: + `bool`: `True` if the file passed is ignored by `git`, `False` + otherwise. + """ + folder = Path(filename).parent + filename = Path(filename).name + + try: + p = run_subprocess("git check-ignore".split() + [filename], folder, check=False) + # Will return exit code 1 if not gitignored + is_ignored = not bool(p.returncode) + except subprocess.CalledProcessError as exc: + raise OSError(exc.stderr) + + return is_ignored + + +def is_binary_file(filename: Union[str, Path]) -> bool: + """ + Check if file is a binary file. + + Args: + filename (`str` or `Path`): + The filename to check. + + Returns: + `bool`: `True` if the file passed is a binary file, `False` otherwise. + """ + try: + with open(filename, "rb") as f: + content = f.read(10 * (1024**2)) # Read a maximum of 10MB + + # Code sample taken from the following stack overflow thread + # https://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python/7392391#7392391 + text_chars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F}) + return bool(content.translate(None, text_chars)) + except UnicodeDecodeError: + return True + + +def files_to_be_staged(pattern: str = ".", folder: Union[str, Path, None] = None) -> List[str]: + """ + Returns a list of filenames that are to be staged. + + Args: + pattern (`str` or `Path`): + The pattern of filenames to check. Put `.` to get all files. + folder (`str` or `Path`): + The folder in which to run the command. + + Returns: + `List[str]`: List of files that are to be staged. + """ + try: + p = run_subprocess("git ls-files --exclude-standard -mo".split() + [pattern], folder) + if len(p.stdout.strip()): + files = p.stdout.strip().split("\n") + else: + files = [] + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + return files + + +def is_tracked_upstream(folder: Union[str, Path]) -> bool: + """ + Check if the current checked-out branch is tracked upstream. + + Args: + folder (`str` or `Path`): + The folder in which to run the command. + + Returns: + `bool`: `True` if the current checked-out branch is tracked upstream, + `False` otherwise. + """ + try: + run_subprocess("git rev-parse --symbolic-full-name --abbrev-ref @{u}", folder) + return True + except subprocess.CalledProcessError as exc: + if "HEAD" in exc.stderr: + raise OSError("No branch checked out") + + return False + + +def commits_to_push(folder: Union[str, Path], upstream: Optional[str] = None) -> int: + """ + Check the number of commits that would be pushed upstream + + Args: + folder (`str` or `Path`): + The folder in which to run the command. + upstream (`str`, *optional*): + The name of the upstream repository with which the comparison should be + made. + + Returns: + `int`: Number of commits that would be pushed upstream were a `git + push` to proceed. + """ + try: + result = run_subprocess(f"git cherry -v {upstream or ''}", folder) + return len(result.stdout.split("\n")) - 1 + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + +class PbarT(TypedDict): + # Used to store an opened progress bar in `_lfs_log_progress` + bar: tqdm + past_bytes: int + + +@contextmanager +def _lfs_log_progress(): + """ + This is a context manager that will log the Git LFS progress of cleaning, + smudging, pulling and pushing. + """ + + if logger.getEffectiveLevel() >= logging.ERROR: + try: + yield + except Exception: + pass + return + + def output_progress(stopping_event: threading.Event): + """ + To be launched as a separate thread with an event meaning it should stop + the tail. + """ + # Key is tuple(state, filename), value is a dict(tqdm bar and a previous value) + pbars: Dict[Tuple[str, str], PbarT] = {} + + def close_pbars(): + for pbar in pbars.values(): + pbar["bar"].update(pbar["bar"].total - pbar["past_bytes"]) + pbar["bar"].refresh() + pbar["bar"].close() + + def tail_file(filename) -> Iterator[str]: + """ + Creates a generator to be iterated through, which will return each + line one by one. Will stop tailing the file if the stopping_event is + set. + """ + with open(filename, "r") as file: + current_line = "" + while True: + if stopping_event.is_set(): + close_pbars() + break + + line_bit = file.readline() + if line_bit is not None and not len(line_bit.strip()) == 0: + current_line += line_bit + if current_line.endswith("\n"): + yield current_line + current_line = "" + else: + time.sleep(1) + + # If the file isn't created yet, wait for a few seconds before trying again. + # Can be interrupted with the stopping_event. + while not os.path.exists(os.environ["GIT_LFS_PROGRESS"]): + if stopping_event.is_set(): + close_pbars() + return + + time.sleep(2) + + for line in tail_file(os.environ["GIT_LFS_PROGRESS"]): + try: + state, file_progress, byte_progress, filename = line.split() + except ValueError as error: + # Try/except to ease debugging. See https://github.com/huggingface/huggingface_hub/issues/1373. + raise ValueError(f"Cannot unpack LFS progress line:\n{line}") from error + description = f"{state.capitalize()} file {filename}" + + current_bytes, total_bytes = byte_progress.split("/") + current_bytes_int = int(current_bytes) + total_bytes_int = int(total_bytes) + + pbar = pbars.get((state, filename)) + if pbar is None: + # Initialize progress bar + pbars[(state, filename)] = { + "bar": tqdm( + desc=description, + initial=current_bytes_int, + total=total_bytes_int, + unit="B", + unit_scale=True, + unit_divisor=1024, + name="huggingface_hub.lfs_upload", + ), + "past_bytes": int(current_bytes), + } + else: + # Update progress bar + pbar["bar"].update(current_bytes_int - pbar["past_bytes"]) + pbar["past_bytes"] = current_bytes_int + + current_lfs_progress_value = os.environ.get("GIT_LFS_PROGRESS", "") + + with SoftTemporaryDirectory() as tmpdir: + os.environ["GIT_LFS_PROGRESS"] = os.path.join(tmpdir, "lfs_progress") + logger.debug(f"Following progress in {os.environ['GIT_LFS_PROGRESS']}") + + exit_event = threading.Event() + x = threading.Thread(target=output_progress, args=(exit_event,), daemon=True) + x.start() + + try: + yield + finally: + exit_event.set() + x.join() + + os.environ["GIT_LFS_PROGRESS"] = current_lfs_progress_value + + +class Repository: + """ + Helper class to wrap the git and git-lfs commands. + + The aim is to facilitate interacting with huggingface.co hosted model or + dataset repos, though not a lot here (if any) is actually specific to + huggingface.co. + + > [!WARNING] + > [`Repository`] is deprecated in favor of the http-based alternatives implemented in + > [`HfApi`]. Given its large adoption in legacy code, the complete removal of + > [`Repository`] will only happen in release `v1.0`. For more details, please read + > https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http. + """ + + command_queue: List[CommandInProgress] + + @validate_hf_hub_args + @_deprecate_method( + version="1.0", + message=( + "Please prefer the http-based alternatives instead. Given its large adoption in legacy code, the complete" + " removal is only planned on next major release.\nFor more details, please read" + " https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http." + ), + ) + def __init__( + self, + local_dir: Union[str, Path], + clone_from: Optional[str] = None, + repo_type: Optional[str] = None, + token: Union[bool, str] = True, + git_user: Optional[str] = None, + git_email: Optional[str] = None, + revision: Optional[str] = None, + skip_lfs_files: bool = False, + client: Optional[HfApi] = None, + ): + """ + Instantiate a local clone of a git repo. + + If `clone_from` is set, the repo will be cloned from an existing remote repository. + If the remote repo does not exist, a `EnvironmentError` exception will be thrown. + Please create the remote repo first using [`create_repo`]. + + `Repository` uses the local git credentials by default. If explicitly set, the `token` + or the `git_user`/`git_email` pair will be used instead. + + Args: + local_dir (`str` or `Path`): + path (e.g. `'my_trained_model/'`) to the local directory, where + the `Repository` will be initialized. + clone_from (`str`, *optional*): + Either a repository url or `repo_id`. + Example: + - `"https://huggingface.co/philschmid/playground-tests"` + - `"philschmid/playground-tests"` + repo_type (`str`, *optional*): + To set when cloning a repo from a repo_id. Default is model. + token (`bool` or `str`, *optional*): + A valid authentication token (see https://huggingface.co/settings/token). + If `None` or `True` and machine is logged in (through `hf auth login` + or [`~huggingface_hub.login`]), token will be retrieved from the cache. + If `False`, token is not sent in the request header. + git_user (`str`, *optional*): + will override the `git config user.name` for committing and + pushing files to the hub. + git_email (`str`, *optional*): + will override the `git config user.email` for committing and + pushing files to the hub. + revision (`str`, *optional*): + Revision to checkout after initializing the repository. If the + revision doesn't exist, a branch will be created with that + revision name from the default branch's current HEAD. + skip_lfs_files (`bool`, *optional*, defaults to `False`): + whether to skip git-LFS files or not. + client (`HfApi`, *optional*): + Instance of [`HfApi`] to use when calling the HF Hub API. A new + instance will be created if this is left to `None`. + + Raises: + [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) + If the remote repository set in `clone_from` does not exist. + """ + if isinstance(local_dir, Path): + local_dir = str(local_dir) + os.makedirs(local_dir, exist_ok=True) + self.local_dir = os.path.join(os.getcwd(), local_dir) + self._repo_type = repo_type + self.command_queue = [] + self.skip_lfs_files = skip_lfs_files + self.client = client if client is not None else HfApi() + + self.check_git_versions() + + if isinstance(token, str): + self.huggingface_token: Optional[str] = token + elif token is False: + self.huggingface_token = None + else: + # if `True` -> explicit use of the cached token + # if `None` -> implicit use of the cached token + self.huggingface_token = get_token() + + if clone_from is not None: + self.clone_from(repo_url=clone_from) + else: + if is_git_repo(self.local_dir): + logger.debug("[Repository] is a valid git repo") + else: + raise ValueError("If not specifying `clone_from`, you need to pass Repository a valid git clone.") + + if self.huggingface_token is not None and (git_email is None or git_user is None): + user = self.client.whoami(self.huggingface_token) + + if git_email is None: + git_email = user.get("email") + + if git_user is None: + git_user = user.get("fullname") + + if git_user is not None or git_email is not None: + self.git_config_username_and_email(git_user, git_email) + + self.lfs_enable_largefiles() + self.git_credential_helper_store() + + if revision is not None: + self.git_checkout(revision, create_branch_ok=True) + + # This ensures that all commands exit before exiting the Python runtime. + # This will ensure all pushes register on the hub, even if other errors happen in subsequent operations. + atexit.register(self.wait_for_commands) + + @property + def current_branch(self) -> str: + """ + Returns the current checked out branch. + + Returns: + `str`: Current checked out branch. + """ + try: + result = run_subprocess("git rev-parse --abbrev-ref HEAD", self.local_dir).stdout.strip() + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + return result + + def check_git_versions(self): + """ + Checks that `git` and `git-lfs` can be run. + + Raises: + [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) + If `git` or `git-lfs` are not installed. + """ + try: + git_version = run_subprocess("git --version", self.local_dir).stdout.strip() + except FileNotFoundError: + raise EnvironmentError("Looks like you do not have git installed, please install.") + + try: + lfs_version = run_subprocess("git-lfs --version", self.local_dir).stdout.strip() + except FileNotFoundError: + raise EnvironmentError( + "Looks like you do not have git-lfs installed, please install." + " You can install from https://git-lfs.github.com/." + " Then run `git lfs install` (you only have to do this once)." + ) + logger.info(git_version + "\n" + lfs_version) + + @validate_hf_hub_args + def clone_from(self, repo_url: str, token: Union[bool, str, None] = None): + """ + Clone from a remote. If the folder already exists, will try to clone the + repository within it. + + If this folder is a git repository with linked history, will try to + update the repository. + + Args: + repo_url (`str`): + The URL from which to clone the repository + token (`Union[str, bool]`, *optional*): + Whether to use the authentication token. It can be: + - a string which is the token itself + - `False`, which would not use the authentication token + - `True`, which would fetch the authentication token from the + local folder and use it (you should be logged in for this to + work). + - `None`, which would retrieve the value of + `self.huggingface_token`. + + > [!TIP] + > Raises the following error: + > + > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + > if an organization token (starts with "api_org") is passed. Use must use + > your own personal access token (see https://hf.co/settings/tokens). + > + > - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) + > if you are trying to clone the repository in a non-empty folder, or if the + > `git` operations raise errors. + """ + token = ( + token # str -> use it + if isinstance(token, str) + else ( + None # `False` -> explicit no token + if token is False + else self.huggingface_token # `None` or `True` -> use default + ) + ) + if token is not None and token.startswith("api_org"): + raise ValueError( + "You must use your personal access token, not an Organization token" + " (see https://hf.co/settings/tokens)." + ) + + hub_url = self.client.endpoint + if hub_url in repo_url or ("http" not in repo_url and len(repo_url.split("/")) <= 2): + repo_type, namespace, repo_name = repo_type_and_id_from_hf_id(repo_url, hub_url=hub_url) + repo_id = f"{namespace}/{repo_name}" if namespace is not None else repo_name + + if repo_type is not None: + self._repo_type = repo_type + + repo_url = hub_url + "/" + + if self._repo_type in constants.REPO_TYPES_URL_PREFIXES: + repo_url += constants.REPO_TYPES_URL_PREFIXES[self._repo_type] + + if token is not None: + # Add token in git url when provided + scheme = urlparse(repo_url).scheme + repo_url = repo_url.replace(f"{scheme}://", f"{scheme}://user:{token}@") + + repo_url += repo_id + + # For error messages, it's cleaner to show the repo url without the token. + clean_repo_url = re.sub(r"(https?)://.*@", r"\1://", repo_url) + try: + run_subprocess("git lfs install", self.local_dir) + + # checks if repository is initialized in a empty repository or in one with files + if len(os.listdir(self.local_dir)) == 0: + logger.warning(f"Cloning {clean_repo_url} into local empty directory.") + + with _lfs_log_progress(): + env = os.environ.copy() + + if self.skip_lfs_files: + env.update({"GIT_LFS_SKIP_SMUDGE": "1"}) + + run_subprocess( + # 'git lfs clone' is deprecated (will display a warning in the terminal) + # but we still use it as it provides a nicer UX when downloading large + # files (shows progress). + f"{'git clone' if self.skip_lfs_files else 'git lfs clone'} {repo_url} .", + self.local_dir, + env=env, + ) + else: + # Check if the folder is the root of a git repository + if not is_git_repo(self.local_dir): + raise EnvironmentError( + "Tried to clone a repository in a non-empty folder that isn't" + f" a git repository ('{self.local_dir}'). If you really want to" + f" do this, do it manually:\n cd {self.local_dir} && git init" + " && git remote add origin && git pull origin main\n or clone" + " repo to a new folder and move your existing files there" + " afterwards." + ) + + if is_local_clone(self.local_dir, repo_url): + logger.warning( + f"{self.local_dir} is already a clone of {clean_repo_url}." + " Make sure you pull the latest changes with" + " `repo.git_pull()`." + ) + else: + output = run_subprocess("git remote get-url origin", self.local_dir, check=False) + + error_msg = ( + f"Tried to clone {clean_repo_url} in an unrelated git" + " repository.\nIf you believe this is an error, please add" + f" a remote with the following URL: {clean_repo_url}." + ) + if output.returncode == 0: + clean_local_remote_url = re.sub(r"https://.*@", "https://", output.stdout) + error_msg += f"\nLocal path has its origin defined as: {clean_local_remote_url}" + raise EnvironmentError(error_msg) + + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + def git_config_username_and_email(self, git_user: Optional[str] = None, git_email: Optional[str] = None): + """ + Sets git username and email (only in the current repo). + + Args: + git_user (`str`, *optional*): + The username to register through `git`. + git_email (`str`, *optional*): + The email to register through `git`. + """ + try: + if git_user is not None: + run_subprocess("git config user.name".split() + [git_user], self.local_dir) + + if git_email is not None: + run_subprocess(f"git config user.email {git_email}".split(), self.local_dir) + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + def git_credential_helper_store(self): + """ + Sets the git credential helper to `store` + """ + try: + run_subprocess("git config credential.helper store", self.local_dir) + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + def git_head_hash(self) -> str: + """ + Get commit sha on top of HEAD. + + Returns: + `str`: The current checked out commit SHA. + """ + try: + p = run_subprocess("git rev-parse HEAD", self.local_dir) + return p.stdout.strip() + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + def git_remote_url(self) -> str: + """ + Get URL to origin remote. + + Returns: + `str`: The URL of the `origin` remote. + """ + try: + p = run_subprocess("git config --get remote.origin.url", self.local_dir) + url = p.stdout.strip() + # Strip basic auth info. + return re.sub(r"https://.*@", "https://", url) + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + def git_head_commit_url(self) -> str: + """ + Get URL to last commit on HEAD. We assume it's been pushed, and the url + scheme is the same one as for GitHub or HuggingFace. + + Returns: + `str`: The URL to the current checked-out commit. + """ + sha = self.git_head_hash() + url = self.git_remote_url() + if url.endswith("/"): + url = url[:-1] + return f"{url}/commit/{sha}" + + def list_deleted_files(self) -> List[str]: + """ + Returns a list of the files that are deleted in the working directory or + index. + + Returns: + `List[str]`: A list of files that have been deleted in the working + directory or index. + """ + try: + git_status = run_subprocess("git status -s", self.local_dir).stdout.strip() + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + if len(git_status) == 0: + return [] + + # Receives a status like the following + # D .gitignore + # D new_file.json + # AD new_file1.json + # ?? new_file2.json + # ?? new_file4.json + + # Strip each line of whitespaces + modified_files_statuses = [status.strip() for status in git_status.split("\n")] + + # Only keep files that are deleted using the D prefix + deleted_files_statuses = [status for status in modified_files_statuses if "D" in status.split()[0]] + + # Remove the D prefix and strip to keep only the relevant filename + deleted_files = [status.split()[-1].strip() for status in deleted_files_statuses] + + return deleted_files + + def lfs_track(self, patterns: Union[str, List[str]], filename: bool = False): + """ + Tell git-lfs to track files according to a pattern. + + Setting the `filename` argument to `True` will treat the arguments as + literal filenames, not as patterns. Any special glob characters in the + filename will be escaped when writing to the `.gitattributes` file. + + Args: + patterns (`Union[str, List[str]]`): + The pattern, or list of patterns, to track with git-lfs. + filename (`bool`, *optional*, defaults to `False`): + Whether to use the patterns as literal filenames. + """ + if isinstance(patterns, str): + patterns = [patterns] + try: + for pattern in patterns: + run_subprocess( + f"git lfs track {'--filename' if filename else ''} {pattern}", + self.local_dir, + ) + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + def lfs_untrack(self, patterns: Union[str, List[str]]): + """ + Tell git-lfs to untrack those files. + + Args: + patterns (`Union[str, List[str]]`): + The pattern, or list of patterns, to untrack with git-lfs. + """ + if isinstance(patterns, str): + patterns = [patterns] + try: + for pattern in patterns: + run_subprocess("git lfs untrack".split() + [pattern], self.local_dir) + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + def lfs_enable_largefiles(self): + """ + HF-specific. This enables upload support of files >5GB. + """ + try: + lfs_config = "git config lfs.customtransfer.multipart" + run_subprocess(f"{lfs_config}.path hf", self.local_dir) + run_subprocess( + f"{lfs_config}.args {LFS_MULTIPART_UPLOAD_COMMAND}", + self.local_dir, + ) + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + def auto_track_binary_files(self, pattern: str = ".") -> List[str]: + """ + Automatically track binary files with git-lfs. + + Args: + pattern (`str`, *optional*, defaults to "."): + The pattern with which to track files that are binary. + + Returns: + `List[str]`: List of filenames that are now tracked due to being + binary files + """ + files_to_be_tracked_with_lfs = [] + + deleted_files = self.list_deleted_files() + + for filename in files_to_be_staged(pattern, folder=self.local_dir): + if filename in deleted_files: + continue + + path_to_file = os.path.join(os.getcwd(), self.local_dir, filename) + + if not (is_tracked_with_lfs(path_to_file) or is_git_ignored(path_to_file)): + size_in_mb = os.path.getsize(path_to_file) / (1024 * 1024) + + if size_in_mb >= 10: + logger.warning( + "Parsing a large file to check if binary or not. Tracking large" + " files using `repository.auto_track_large_files` is" + " recommended so as to not load the full file in memory." + ) + + is_binary = is_binary_file(path_to_file) + + if is_binary: + self.lfs_track(filename) + files_to_be_tracked_with_lfs.append(filename) + + # Cleanup the .gitattributes if files were deleted + self.lfs_untrack(deleted_files) + + return files_to_be_tracked_with_lfs + + def auto_track_large_files(self, pattern: str = ".") -> List[str]: + """ + Automatically track large files (files that weigh more than 10MBs) with + git-lfs. + + Args: + pattern (`str`, *optional*, defaults to "."): + The pattern with which to track files that are above 10MBs. + + Returns: + `List[str]`: List of filenames that are now tracked due to their + size. + """ + files_to_be_tracked_with_lfs = [] + + deleted_files = self.list_deleted_files() + + for filename in files_to_be_staged(pattern, folder=self.local_dir): + if filename in deleted_files: + continue + + path_to_file = os.path.join(os.getcwd(), self.local_dir, filename) + size_in_mb = os.path.getsize(path_to_file) / (1024 * 1024) + + if size_in_mb >= 10 and not is_tracked_with_lfs(path_to_file) and not is_git_ignored(path_to_file): + self.lfs_track(filename) + files_to_be_tracked_with_lfs.append(filename) + + # Cleanup the .gitattributes if files were deleted + self.lfs_untrack(deleted_files) + + return files_to_be_tracked_with_lfs + + def lfs_prune(self, recent=False): + """ + git lfs prune + + Args: + recent (`bool`, *optional*, defaults to `False`): + Whether to prune files even if they were referenced by recent + commits. See the following + [link](https://github.com/git-lfs/git-lfs/blob/f3d43f0428a84fc4f1e5405b76b5a73ec2437e65/docs/man/git-lfs-prune.1.ronn#recent-files) + for more information. + """ + try: + with _lfs_log_progress(): + result = run_subprocess(f"git lfs prune {'--recent' if recent else ''}", self.local_dir) + logger.info(result.stdout) + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + def git_pull(self, rebase: bool = False, lfs: bool = False): + """ + git pull + + Args: + rebase (`bool`, *optional*, defaults to `False`): + Whether to rebase the current branch on top of the upstream + branch after fetching. + lfs (`bool`, *optional*, defaults to `False`): + Whether to fetch the LFS files too. This option only changes the + behavior when a repository was cloned without fetching the LFS + files; calling `repo.git_pull(lfs=True)` will then fetch the LFS + file from the remote repository. + """ + command = "git pull" if not lfs else "git lfs pull" + if rebase: + command += " --rebase" + try: + with _lfs_log_progress(): + result = run_subprocess(command, self.local_dir) + logger.info(result.stdout) + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + def git_add(self, pattern: str = ".", auto_lfs_track: bool = False): + """ + git add + + Setting the `auto_lfs_track` parameter to `True` will automatically + track files that are larger than 10MB with `git-lfs`. + + Args: + pattern (`str`, *optional*, defaults to "."): + The pattern with which to add files to staging. + auto_lfs_track (`bool`, *optional*, defaults to `False`): + Whether to automatically track large and binary files with + git-lfs. Any file over 10MB in size, or in binary format, will + be automatically tracked. + """ + if auto_lfs_track: + # Track files according to their size (>=10MB) + tracked_files = self.auto_track_large_files(pattern) + + # Read the remaining files and track them if they're binary + tracked_files.extend(self.auto_track_binary_files(pattern)) + + if tracked_files: + logger.warning( + f"Adding files tracked by Git LFS: {tracked_files}. This may take a" + " bit of time if the files are large." + ) + + try: + result = run_subprocess("git add -v".split() + [pattern], self.local_dir) + logger.info(f"Adding to index:\n{result.stdout}\n") + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + def git_commit(self, commit_message: str = "commit files to HF hub"): + """ + git commit + + Args: + commit_message (`str`, *optional*, defaults to "commit files to HF hub"): + The message attributed to the commit. + """ + try: + result = run_subprocess("git commit -v -m".split() + [commit_message], self.local_dir) + logger.info(f"Committed:\n{result.stdout}\n") + except subprocess.CalledProcessError as exc: + if len(exc.stderr) > 0: + raise EnvironmentError(exc.stderr) + else: + raise EnvironmentError(exc.stdout) + + def git_push( + self, + upstream: Optional[str] = None, + blocking: bool = True, + auto_lfs_prune: bool = False, + ) -> Union[str, Tuple[str, CommandInProgress]]: + """ + git push + + If used without setting `blocking`, will return url to commit on remote + repo. If used with `blocking=True`, will return a tuple containing the + url to commit and the command object to follow for information about the + process. + + Args: + upstream (`str`, *optional*): + Upstream to which this should push. If not specified, will push + to the lastly defined upstream or to the default one (`origin + main`). + blocking (`bool`, *optional*, defaults to `True`): + Whether the function should return only when the push has + finished. Setting this to `False` will return an + `CommandInProgress` object which has an `is_done` property. This + property will be set to `True` when the push is finished. + auto_lfs_prune (`bool`, *optional*, defaults to `False`): + Whether to automatically prune files once they have been pushed + to the remote. + """ + command = "git push" + + if upstream: + command += f" --set-upstream {upstream}" + + number_of_commits = commits_to_push(self.local_dir, upstream) + + if number_of_commits > 1: + logger.warning(f"Several commits ({number_of_commits}) will be pushed upstream.") + if blocking: + logger.warning("The progress bars may be unreliable.") + + try: + with _lfs_log_progress(): + process = subprocess.Popen( + command.split(), + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + encoding="utf-8", + cwd=self.local_dir, + ) + + if blocking: + stdout, stderr = process.communicate() + return_code = process.poll() + process.kill() + + if len(stderr): + logger.warning(stderr) + + if return_code: + raise subprocess.CalledProcessError(return_code, process.args, output=stdout, stderr=stderr) + + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + if not blocking: + + def status_method(): + status = process.poll() + if status is None: + return -1 + else: + return status + + command_in_progress = CommandInProgress( + "push", + is_done_method=lambda: process.poll() is not None, + status_method=status_method, + process=process, + post_method=self.lfs_prune if auto_lfs_prune else None, + ) + + self.command_queue.append(command_in_progress) + + return self.git_head_commit_url(), command_in_progress + + if auto_lfs_prune: + self.lfs_prune() + + return self.git_head_commit_url() + + def git_checkout(self, revision: str, create_branch_ok: bool = False): + """ + git checkout a given revision + + Specifying `create_branch_ok` to `True` will create the branch to the + given revision if that revision doesn't exist. + + Args: + revision (`str`): + The revision to checkout. + create_branch_ok (`str`, *optional*, defaults to `False`): + Whether creating a branch named with the `revision` passed at + the current checked-out reference if `revision` isn't an + existing revision is allowed. + """ + try: + result = run_subprocess(f"git checkout {revision}", self.local_dir) + logger.warning(f"Checked out {revision} from {self.current_branch}.") + logger.warning(result.stdout) + except subprocess.CalledProcessError as exc: + if not create_branch_ok: + raise EnvironmentError(exc.stderr) + else: + try: + result = run_subprocess(f"git checkout -b {revision}", self.local_dir) + logger.warning( + f"Revision `{revision}` does not exist. Created and checked out branch `{revision}`." + ) + logger.warning(result.stdout) + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + def tag_exists(self, tag_name: str, remote: Optional[str] = None) -> bool: + """ + Check if a tag exists or not. + + Args: + tag_name (`str`): + The name of the tag to check. + remote (`str`, *optional*): + Whether to check if the tag exists on a remote. This parameter + should be the identifier of the remote. + + Returns: + `bool`: Whether the tag exists. + """ + if remote: + try: + result = run_subprocess(f"git ls-remote origin refs/tags/{tag_name}", self.local_dir).stdout.strip() + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + return len(result) != 0 + else: + try: + git_tags = run_subprocess("git tag", self.local_dir).stdout.strip() + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + git_tags = git_tags.split("\n") + return tag_name in git_tags + + def delete_tag(self, tag_name: str, remote: Optional[str] = None) -> bool: + """ + Delete a tag, both local and remote, if it exists + + Args: + tag_name (`str`): + The tag name to delete. + remote (`str`, *optional*): + The remote on which to delete the tag. + + Returns: + `bool`: `True` if deleted, `False` if the tag didn't exist. + If remote is not passed, will just be updated locally + """ + delete_locally = True + delete_remotely = True + + if not self.tag_exists(tag_name): + delete_locally = False + + if not self.tag_exists(tag_name, remote=remote): + delete_remotely = False + + if delete_locally: + try: + run_subprocess(["git", "tag", "-d", tag_name], self.local_dir).stdout.strip() + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + if remote and delete_remotely: + try: + run_subprocess(f"git push {remote} --delete {tag_name}", self.local_dir).stdout.strip() + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + return True + + def add_tag(self, tag_name: str, message: Optional[str] = None, remote: Optional[str] = None): + """ + Add a tag at the current head and push it + + If remote is None, will just be updated locally + + If no message is provided, the tag will be lightweight. if a message is + provided, the tag will be annotated. + + Args: + tag_name (`str`): + The name of the tag to be added. + message (`str`, *optional*): + The message that accompanies the tag. The tag will turn into an + annotated tag if a message is passed. + remote (`str`, *optional*): + The remote on which to add the tag. + """ + if message: + tag_args = ["git", "tag", "-a", tag_name, "-m", message] + else: + tag_args = ["git", "tag", tag_name] + + try: + run_subprocess(tag_args, self.local_dir).stdout.strip() + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + if remote: + try: + run_subprocess(f"git push {remote} {tag_name}", self.local_dir).stdout.strip() + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + def is_repo_clean(self) -> bool: + """ + Return whether or not the git status is clean or not + + Returns: + `bool`: `True` if the git status is clean, `False` otherwise. + """ + try: + git_status = run_subprocess("git status --porcelain", self.local_dir).stdout.strip() + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) + + return len(git_status) == 0 + + def push_to_hub( + self, + commit_message: str = "commit files to HF hub", + blocking: bool = True, + clean_ok: bool = True, + auto_lfs_prune: bool = False, + ) -> Union[None, str, Tuple[str, CommandInProgress]]: + """ + Helper to add, commit, and push files to remote repository on the + HuggingFace Hub. Will automatically track large files (>10MB). + + Args: + commit_message (`str`): + Message to use for the commit. + blocking (`bool`, *optional*, defaults to `True`): + Whether the function should return only when the `git push` has + finished. + clean_ok (`bool`, *optional*, defaults to `True`): + If True, this function will return None if the repo is + untouched. Default behavior is to fail because the git command + fails. + auto_lfs_prune (`bool`, *optional*, defaults to `False`): + Whether to automatically prune files once they have been pushed + to the remote. + """ + if clean_ok and self.is_repo_clean(): + logger.info("Repo currently clean. Ignoring push_to_hub") + return None + self.git_add(auto_lfs_track=True) + self.git_commit(commit_message) + return self.git_push( + upstream=f"origin {self.current_branch}", + blocking=blocking, + auto_lfs_prune=auto_lfs_prune, + ) + + @contextmanager + def commit( + self, + commit_message: str, + branch: Optional[str] = None, + track_large_files: bool = True, + blocking: bool = True, + auto_lfs_prune: bool = False, + ): + """ + Context manager utility to handle committing to a repository. This + automatically tracks large files (>10Mb) with git-lfs. Set the + `track_large_files` argument to `False` if you wish to ignore that + behavior. + + Args: + commit_message (`str`): + Message to use for the commit. + branch (`str`, *optional*): + The branch on which the commit will appear. This branch will be + checked-out before any operation. + track_large_files (`bool`, *optional*, defaults to `True`): + Whether to automatically track large files or not. Will do so by + default. + blocking (`bool`, *optional*, defaults to `True`): + Whether the function should return only when the `git push` has + finished. + auto_lfs_prune (`bool`, defaults to `True`): + Whether to automatically prune files once they have been pushed + to the remote. + + Examples: + + ```python + >>> with Repository( + ... "text-files", + ... clone_from="/text-files", + ... token=True, + >>> ).commit("My first file :)"): + ... with open("file.txt", "w+") as f: + ... f.write(json.dumps({"hey": 8})) + + >>> import torch + + >>> model = torch.nn.Transformer() + >>> with Repository( + ... "torch-model", + ... clone_from="/torch-model", + ... token=True, + >>> ).commit("My cool model :)"): + ... torch.save(model.state_dict(), "model.pt") + ``` + + """ + + files_to_stage = files_to_be_staged(".", folder=self.local_dir) + + if len(files_to_stage): + files_in_msg = str(files_to_stage[:5])[:-1] + ", ...]" if len(files_to_stage) > 5 else str(files_to_stage) + logger.error( + "There exists some updated files in the local repository that are not" + f" committed: {files_in_msg}. This may lead to errors if checking out" + " a branch. These files and their modifications will be added to the" + " current commit." + ) + + if branch is not None: + self.git_checkout(branch, create_branch_ok=True) + + if is_tracked_upstream(self.local_dir): + logger.warning("Pulling changes ...") + self.git_pull(rebase=True) + else: + logger.warning(f"The current branch has no upstream branch. Will push to 'origin {self.current_branch}'") + + current_working_directory = os.getcwd() + os.chdir(os.path.join(current_working_directory, self.local_dir)) + + try: + yield self + finally: + self.git_add(auto_lfs_track=track_large_files) + + try: + self.git_commit(commit_message) + except OSError as e: + # If no changes are detected, there is nothing to commit. + if "nothing to commit" not in str(e): + raise e + + try: + self.git_push( + upstream=f"origin {self.current_branch}", + blocking=blocking, + auto_lfs_prune=auto_lfs_prune, + ) + except OSError as e: + # If no changes are detected, there is nothing to commit. + if "could not read Username" in str(e): + raise OSError("Couldn't authenticate user for push. Did you set `token` to `True`?") from e + else: + raise e + + os.chdir(current_working_directory) + + def repocard_metadata_load(self) -> Optional[Dict]: + filepath = os.path.join(self.local_dir, constants.REPOCARD_NAME) + if os.path.isfile(filepath): + return metadata_load(filepath) + return None + + def repocard_metadata_save(self, data: Dict) -> None: + return metadata_save(os.path.join(self.local_dir, constants.REPOCARD_NAME), data) + + @property + def commands_failed(self): + """ + Returns the asynchronous commands that failed. + """ + return [c for c in self.command_queue if c.status > 0] + + @property + def commands_in_progress(self): + """ + Returns the asynchronous commands that are currently in progress. + """ + return [c for c in self.command_queue if not c.is_done] + + def wait_for_commands(self): + """ + Blocking method: blocks all subsequent execution until all commands have + been processed. + """ + index = 0 + for command_failed in self.commands_failed: + logger.error(f"The {command_failed.title} command with PID {command_failed._process.pid} failed.") + logger.error(command_failed.stderr) + + while self.commands_in_progress: + if index % 10 == 0: + logger.warning( + f"Waiting for the following commands to finish before shutting down: {self.commands_in_progress}." + ) + + index += 1 + + time.sleep(1) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/idna-3.10.dist-info/INSTALLER b/URSA/.venv_ursa/lib/python3.12/site-packages/idna-3.10.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/idna-3.10.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/idna-3.10.dist-info/LICENSE.md b/URSA/.venv_ursa/lib/python3.12/site-packages/idna-3.10.dist-info/LICENSE.md new file mode 100644 index 0000000000000000000000000000000000000000..19b6b45242c16a1025465309eec2ca5009319de3 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/idna-3.10.dist-info/LICENSE.md @@ -0,0 +1,31 @@ +BSD 3-Clause License + +Copyright (c) 2013-2024, Kim Davies and contributors. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/idna-3.10.dist-info/METADATA b/URSA/.venv_ursa/lib/python3.12/site-packages/idna-3.10.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..c42623e9423c23b555d9d352bc5dab518ede02c2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/idna-3.10.dist-info/METADATA @@ -0,0 +1,250 @@ +Metadata-Version: 2.1 +Name: idna +Version: 3.10 +Summary: Internationalized Domain Names in Applications (IDNA) +Author-email: Kim Davies +Requires-Python: >=3.6 +Description-Content-Type: text/x-rst +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: System Administrators +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Internet :: Name Service (DNS) +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Utilities +Requires-Dist: ruff >= 0.6.2 ; extra == "all" +Requires-Dist: mypy >= 1.11.2 ; extra == "all" +Requires-Dist: pytest >= 8.3.2 ; extra == "all" +Requires-Dist: flake8 >= 7.1.1 ; extra == "all" +Project-URL: Changelog, https://github.com/kjd/idna/blob/master/HISTORY.rst +Project-URL: Issue tracker, https://github.com/kjd/idna/issues +Project-URL: Source, https://github.com/kjd/idna +Provides-Extra: all + +Internationalized Domain Names in Applications (IDNA) +===================================================== + +Support for the Internationalized Domain Names in +Applications (IDNA) protocol as specified in `RFC 5891 +`_. This is the latest version of +the protocol and is sometimes referred to as “IDNA 2008”. + +This library also provides support for Unicode Technical +Standard 46, `Unicode IDNA Compatibility Processing +`_. + +This acts as a suitable replacement for the “encodings.idna” +module that comes with the Python standard library, but which +only supports the older superseded IDNA specification (`RFC 3490 +`_). + +Basic functions are simply executed: + +.. code-block:: pycon + + >>> import idna + >>> idna.encode('ドメイン.テスト') + b'xn--eckwd4c7c.xn--zckzah' + >>> print(idna.decode('xn--eckwd4c7c.xn--zckzah')) + ドメイン.テスト + + +Installation +------------ + +This package is available for installation from PyPI: + +.. code-block:: bash + + $ python3 -m pip install idna + + +Usage +----- + +For typical usage, the ``encode`` and ``decode`` functions will take a +domain name argument and perform a conversion to A-labels or U-labels +respectively. + +.. code-block:: pycon + + >>> import idna + >>> idna.encode('ドメイン.テスト') + b'xn--eckwd4c7c.xn--zckzah' + >>> print(idna.decode('xn--eckwd4c7c.xn--zckzah')) + ドメイン.テスト + +You may use the codec encoding and decoding methods using the +``idna.codec`` module: + +.. code-block:: pycon + + >>> import idna.codec + >>> print('домен.испытание'.encode('idna2008')) + b'xn--d1acufc.xn--80akhbyknj4f' + >>> print(b'xn--d1acufc.xn--80akhbyknj4f'.decode('idna2008')) + домен.испытание + +Conversions can be applied at a per-label basis using the ``ulabel`` or +``alabel`` functions if necessary: + +.. code-block:: pycon + + >>> idna.alabel('测试') + b'xn--0zwm56d' + +Compatibility Mapping (UTS #46) ++++++++++++++++++++++++++++++++ + +As described in `RFC 5895 `_, the +IDNA specification does not normalize input from different potential +ways a user may input a domain name. This functionality, known as +a “mapping”, is considered by the specification to be a local +user-interface issue distinct from IDNA conversion functionality. + +This library provides one such mapping that was developed by the +Unicode Consortium. Known as `Unicode IDNA Compatibility Processing +`_, it provides for both a regular +mapping for typical applications, as well as a transitional mapping to +help migrate from older IDNA 2003 applications. Strings are +preprocessed according to Section 4.4 “Preprocessing for IDNA2008” +prior to the IDNA operations. + +For example, “Königsgäßchen” is not a permissible label as *LATIN +CAPITAL LETTER K* is not allowed (nor are capital letters in general). +UTS 46 will convert this into lower case prior to applying the IDNA +conversion. + +.. code-block:: pycon + + >>> import idna + >>> idna.encode('Königsgäßchen') + ... + idna.core.InvalidCodepoint: Codepoint U+004B at position 1 of 'Königsgäßchen' not allowed + >>> idna.encode('Königsgäßchen', uts46=True) + b'xn--knigsgchen-b4a3dun' + >>> print(idna.decode('xn--knigsgchen-b4a3dun')) + königsgäßchen + +Transitional processing provides conversions to help transition from +the older 2003 standard to the current standard. For example, in the +original IDNA specification, the *LATIN SMALL LETTER SHARP S* (ß) was +converted into two *LATIN SMALL LETTER S* (ss), whereas in the current +IDNA specification this conversion is not performed. + +.. code-block:: pycon + + >>> idna.encode('Königsgäßchen', uts46=True, transitional=True) + 'xn--knigsgsschen-lcb0w' + +Implementers should use transitional processing with caution, only in +rare cases where conversion from legacy labels to current labels must be +performed (i.e. IDNA implementations that pre-date 2008). For typical +applications that just need to convert labels, transitional processing +is unlikely to be beneficial and could produce unexpected incompatible +results. + +``encodings.idna`` Compatibility +++++++++++++++++++++++++++++++++ + +Function calls from the Python built-in ``encodings.idna`` module are +mapped to their IDNA 2008 equivalents using the ``idna.compat`` module. +Simply substitute the ``import`` clause in your code to refer to the new +module name. + +Exceptions +---------- + +All errors raised during the conversion following the specification +should raise an exception derived from the ``idna.IDNAError`` base +class. + +More specific exceptions that may be generated as ``idna.IDNABidiError`` +when the error reflects an illegal combination of left-to-right and +right-to-left characters in a label; ``idna.InvalidCodepoint`` when +a specific codepoint is an illegal character in an IDN label (i.e. +INVALID); and ``idna.InvalidCodepointContext`` when the codepoint is +illegal based on its positional context (i.e. it is CONTEXTO or CONTEXTJ +but the contextual requirements are not satisfied.) + +Building and Diagnostics +------------------------ + +The IDNA and UTS 46 functionality relies upon pre-calculated lookup +tables for performance. These tables are derived from computing against +eligibility criteria in the respective standards. These tables are +computed using the command-line script ``tools/idna-data``. + +This tool will fetch relevant codepoint data from the Unicode repository +and perform the required calculations to identify eligibility. There are +three main modes: + +* ``idna-data make-libdata``. Generates ``idnadata.py`` and + ``uts46data.py``, the pre-calculated lookup tables used for IDNA and + UTS 46 conversions. Implementers who wish to track this library against + a different Unicode version may use this tool to manually generate a + different version of the ``idnadata.py`` and ``uts46data.py`` files. + +* ``idna-data make-table``. Generate a table of the IDNA disposition + (e.g. PVALID, CONTEXTJ, CONTEXTO) in the format found in Appendix + B.1 of RFC 5892 and the pre-computed tables published by `IANA + `_. + +* ``idna-data U+0061``. Prints debugging output on the various + properties associated with an individual Unicode codepoint (in this + case, U+0061), that are used to assess the IDNA and UTS 46 status of a + codepoint. This is helpful in debugging or analysis. + +The tool accepts a number of arguments, described using ``idna-data +-h``. Most notably, the ``--version`` argument allows the specification +of the version of Unicode to be used in computing the table data. For +example, ``idna-data --version 9.0.0 make-libdata`` will generate +library data against Unicode 9.0.0. + + +Additional Notes +---------------- + +* **Packages**. The latest tagged release version is published in the + `Python Package Index `_. + +* **Version support**. This library supports Python 3.6 and higher. + As this library serves as a low-level toolkit for a variety of + applications, many of which strive for broad compatibility with older + Python versions, there is no rush to remove older interpreter support. + Removing support for older versions should be well justified in that the + maintenance burden has become too high. + +* **Python 2**. Python 2 is supported by version 2.x of this library. + Use "idna<3" in your requirements file if you need this library for + a Python 2 application. Be advised that these versions are no longer + actively developed. + +* **Testing**. The library has a test suite based on each rule of the + IDNA specification, as well as tests that are provided as part of the + Unicode Technical Standard 46, `Unicode IDNA Compatibility Processing + `_. + +* **Emoji**. It is an occasional request to support emoji domains in + this library. Encoding of symbols like emoji is expressly prohibited by + the technical standard IDNA 2008 and emoji domains are broadly phased + out across the domain industry due to associated security risks. For + now, applications that need to support these non-compliant labels + may wish to consider trying the encode/decode operation in this library + first, and then falling back to using `encodings.idna`. See `the Github + project `_ for more discussion. + diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/idna-3.10.dist-info/RECORD b/URSA/.venv_ursa/lib/python3.12/site-packages/idna-3.10.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..9cfce7f9f4c1a64d85642f865f9d0a3d138a82c6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/idna-3.10.dist-info/RECORD @@ -0,0 +1,22 @@ +idna-3.10.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +idna-3.10.dist-info/LICENSE.md,sha256=pZ8LDvNjWHQQmkRhykT_enDVBpboFHZ7-vch1Mmw2w8,1541 +idna-3.10.dist-info/METADATA,sha256=URR5ZyDfQ1PCEGhkYoojqfi2Ra0tau2--lhwG4XSfjI,10158 +idna-3.10.dist-info/RECORD,, +idna-3.10.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81 +idna/__init__.py,sha256=MPqNDLZbXqGaNdXxAFhiqFPKEQXju2jNQhCey6-5eJM,868 +idna/__pycache__/__init__.cpython-312.pyc,, +idna/__pycache__/codec.cpython-312.pyc,, +idna/__pycache__/compat.cpython-312.pyc,, +idna/__pycache__/core.cpython-312.pyc,, +idna/__pycache__/idnadata.cpython-312.pyc,, +idna/__pycache__/intranges.cpython-312.pyc,, +idna/__pycache__/package_data.cpython-312.pyc,, +idna/__pycache__/uts46data.cpython-312.pyc,, +idna/codec.py,sha256=PEew3ItwzjW4hymbasnty2N2OXvNcgHB-JjrBuxHPYY,3422 +idna/compat.py,sha256=RzLy6QQCdl9784aFhb2EX9EKGCJjg0P3PilGdeXXcx8,316 +idna/core.py,sha256=YJYyAMnwiQEPjVC4-Fqu_p4CJ6yKKuDGmppBNQNQpFs,13239 +idna/idnadata.py,sha256=W30GcIGvtOWYwAjZj4ZjuouUutC6ffgNuyjJy7fZ-lo,78306 +idna/intranges.py,sha256=amUtkdhYcQG8Zr-CoMM_kVRacxkivC1WgxN1b63KKdU,1898 +idna/package_data.py,sha256=q59S3OXsc5VI8j6vSD0sGBMyk6zZ4vWFREE88yCJYKs,21 +idna/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +idna/uts46data.py,sha256=rt90K9J40gUSwppDPCrhjgi5AA6pWM65dEGRSf6rIhM,239289 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/idna-3.10.dist-info/WHEEL b/URSA/.venv_ursa/lib/python3.12/site-packages/idna-3.10.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..3b5e64b5e6c4a210201d1676a891fd57b15cda99 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/idna-3.10.dist-info/WHEEL @@ -0,0 +1,4 @@ +Wheel-Version: 1.0 +Generator: flit 3.9.0 +Root-Is-Purelib: true +Tag: py3-none-any diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/idna/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/idna/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cfdc030a751b089fc7e38fc88093b791605d501d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/idna/__init__.py @@ -0,0 +1,45 @@ +from .core import ( + IDNABidiError, + IDNAError, + InvalidCodepoint, + InvalidCodepointContext, + alabel, + check_bidi, + check_hyphen_ok, + check_initial_combiner, + check_label, + check_nfc, + decode, + encode, + ulabel, + uts46_remap, + valid_contextj, + valid_contexto, + valid_label_length, + valid_string_length, +) +from .intranges import intranges_contain +from .package_data import __version__ + +__all__ = [ + "__version__", + "IDNABidiError", + "IDNAError", + "InvalidCodepoint", + "InvalidCodepointContext", + "alabel", + "check_bidi", + "check_hyphen_ok", + "check_initial_combiner", + "check_label", + "check_nfc", + "decode", + "encode", + "intranges_contain", + "ulabel", + "uts46_remap", + "valid_contextj", + "valid_contexto", + "valid_label_length", + "valid_string_length", +] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/idna/codec.py b/URSA/.venv_ursa/lib/python3.12/site-packages/idna/codec.py new file mode 100644 index 0000000000000000000000000000000000000000..913abfd6a23ce547f84de2adc41221012f1007d6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/idna/codec.py @@ -0,0 +1,122 @@ +import codecs +import re +from typing import Any, Optional, Tuple + +from .core import IDNAError, alabel, decode, encode, ulabel + +_unicode_dots_re = re.compile("[\u002e\u3002\uff0e\uff61]") + + +class Codec(codecs.Codec): + def encode(self, data: str, errors: str = "strict") -> Tuple[bytes, int]: + if errors != "strict": + raise IDNAError('Unsupported error handling "{}"'.format(errors)) + + if not data: + return b"", 0 + + return encode(data), len(data) + + def decode(self, data: bytes, errors: str = "strict") -> Tuple[str, int]: + if errors != "strict": + raise IDNAError('Unsupported error handling "{}"'.format(errors)) + + if not data: + return "", 0 + + return decode(data), len(data) + + +class IncrementalEncoder(codecs.BufferedIncrementalEncoder): + def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[bytes, int]: + if errors != "strict": + raise IDNAError('Unsupported error handling "{}"'.format(errors)) + + if not data: + return b"", 0 + + labels = _unicode_dots_re.split(data) + trailing_dot = b"" + if labels: + if not labels[-1]: + trailing_dot = b"." + del labels[-1] + elif not final: + # Keep potentially unfinished label until the next call + del labels[-1] + if labels: + trailing_dot = b"." + + result = [] + size = 0 + for label in labels: + result.append(alabel(label)) + if size: + size += 1 + size += len(label) + + # Join with U+002E + result_bytes = b".".join(result) + trailing_dot + size += len(trailing_dot) + return result_bytes, size + + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + def _buffer_decode(self, data: Any, errors: str, final: bool) -> Tuple[str, int]: + if errors != "strict": + raise IDNAError('Unsupported error handling "{}"'.format(errors)) + + if not data: + return ("", 0) + + if not isinstance(data, str): + data = str(data, "ascii") + + labels = _unicode_dots_re.split(data) + trailing_dot = "" + if labels: + if not labels[-1]: + trailing_dot = "." + del labels[-1] + elif not final: + # Keep potentially unfinished label until the next call + del labels[-1] + if labels: + trailing_dot = "." + + result = [] + size = 0 + for label in labels: + result.append(ulabel(label)) + if size: + size += 1 + size += len(label) + + result_str = ".".join(result) + trailing_dot + size += len(trailing_dot) + return (result_str, size) + + +class StreamWriter(Codec, codecs.StreamWriter): + pass + + +class StreamReader(Codec, codecs.StreamReader): + pass + + +def search_function(name: str) -> Optional[codecs.CodecInfo]: + if name != "idna2008": + return None + return codecs.CodecInfo( + name=name, + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamwriter=StreamWriter, + streamreader=StreamReader, + ) + + +codecs.register(search_function) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/idna/compat.py b/URSA/.venv_ursa/lib/python3.12/site-packages/idna/compat.py new file mode 100644 index 0000000000000000000000000000000000000000..1df9f2a70e6815908f2784e88897a9a359eef84c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/idna/compat.py @@ -0,0 +1,15 @@ +from typing import Any, Union + +from .core import decode, encode + + +def ToASCII(label: str) -> bytes: + return encode(label) + + +def ToUnicode(label: Union[bytes, bytearray]) -> str: + return decode(label) + + +def nameprep(s: Any) -> None: + raise NotImplementedError("IDNA 2008 does not utilise nameprep protocol") diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/idna/core.py b/URSA/.venv_ursa/lib/python3.12/site-packages/idna/core.py new file mode 100644 index 0000000000000000000000000000000000000000..9115f123f0274832af5ba1cf3c5481cc5353eecd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/idna/core.py @@ -0,0 +1,437 @@ +import bisect +import re +import unicodedata +from typing import Optional, Union + +from . import idnadata +from .intranges import intranges_contain + +_virama_combining_class = 9 +_alabel_prefix = b"xn--" +_unicode_dots_re = re.compile("[\u002e\u3002\uff0e\uff61]") + + +class IDNAError(UnicodeError): + """Base exception for all IDNA-encoding related problems""" + + pass + + +class IDNABidiError(IDNAError): + """Exception when bidirectional requirements are not satisfied""" + + pass + + +class InvalidCodepoint(IDNAError): + """Exception when a disallowed or unallocated codepoint is used""" + + pass + + +class InvalidCodepointContext(IDNAError): + """Exception when the codepoint is not valid in the context it is used""" + + pass + + +def _combining_class(cp: int) -> int: + v = unicodedata.combining(chr(cp)) + if v == 0: + if not unicodedata.name(chr(cp)): + raise ValueError("Unknown character in unicodedata") + return v + + +def _is_script(cp: str, script: str) -> bool: + return intranges_contain(ord(cp), idnadata.scripts[script]) + + +def _punycode(s: str) -> bytes: + return s.encode("punycode") + + +def _unot(s: int) -> str: + return "U+{:04X}".format(s) + + +def valid_label_length(label: Union[bytes, str]) -> bool: + if len(label) > 63: + return False + return True + + +def valid_string_length(label: Union[bytes, str], trailing_dot: bool) -> bool: + if len(label) > (254 if trailing_dot else 253): + return False + return True + + +def check_bidi(label: str, check_ltr: bool = False) -> bool: + # Bidi rules should only be applied if string contains RTL characters + bidi_label = False + for idx, cp in enumerate(label, 1): + direction = unicodedata.bidirectional(cp) + if direction == "": + # String likely comes from a newer version of Unicode + raise IDNABidiError("Unknown directionality in label {} at position {}".format(repr(label), idx)) + if direction in ["R", "AL", "AN"]: + bidi_label = True + if not bidi_label and not check_ltr: + return True + + # Bidi rule 1 + direction = unicodedata.bidirectional(label[0]) + if direction in ["R", "AL"]: + rtl = True + elif direction == "L": + rtl = False + else: + raise IDNABidiError("First codepoint in label {} must be directionality L, R or AL".format(repr(label))) + + valid_ending = False + number_type: Optional[str] = None + for idx, cp in enumerate(label, 1): + direction = unicodedata.bidirectional(cp) + + if rtl: + # Bidi rule 2 + if direction not in [ + "R", + "AL", + "AN", + "EN", + "ES", + "CS", + "ET", + "ON", + "BN", + "NSM", + ]: + raise IDNABidiError("Invalid direction for codepoint at position {} in a right-to-left label".format(idx)) + # Bidi rule 3 + if direction in ["R", "AL", "EN", "AN"]: + valid_ending = True + elif direction != "NSM": + valid_ending = False + # Bidi rule 4 + if direction in ["AN", "EN"]: + if not number_type: + number_type = direction + else: + if number_type != direction: + raise IDNABidiError("Can not mix numeral types in a right-to-left label") + else: + # Bidi rule 5 + if direction not in ["L", "EN", "ES", "CS", "ET", "ON", "BN", "NSM"]: + raise IDNABidiError("Invalid direction for codepoint at position {} in a left-to-right label".format(idx)) + # Bidi rule 6 + if direction in ["L", "EN"]: + valid_ending = True + elif direction != "NSM": + valid_ending = False + + if not valid_ending: + raise IDNABidiError("Label ends with illegal codepoint directionality") + + return True + + +def check_initial_combiner(label: str) -> bool: + if unicodedata.category(label[0])[0] == "M": + raise IDNAError("Label begins with an illegal combining character") + return True + + +def check_hyphen_ok(label: str) -> bool: + if label[2:4] == "--": + raise IDNAError("Label has disallowed hyphens in 3rd and 4th position") + if label[0] == "-" or label[-1] == "-": + raise IDNAError("Label must not start or end with a hyphen") + return True + + +def check_nfc(label: str) -> None: + if unicodedata.normalize("NFC", label) != label: + raise IDNAError("Label must be in Normalization Form C") + + +def valid_contextj(label: str, pos: int) -> bool: + cp_value = ord(label[pos]) + + if cp_value == 0x200C: + if pos > 0: + if _combining_class(ord(label[pos - 1])) == _virama_combining_class: + return True + + ok = False + for i in range(pos - 1, -1, -1): + joining_type = idnadata.joining_types.get(ord(label[i])) + if joining_type == ord("T"): + continue + elif joining_type in [ord("L"), ord("D")]: + ok = True + break + else: + break + + if not ok: + return False + + ok = False + for i in range(pos + 1, len(label)): + joining_type = idnadata.joining_types.get(ord(label[i])) + if joining_type == ord("T"): + continue + elif joining_type in [ord("R"), ord("D")]: + ok = True + break + else: + break + return ok + + if cp_value == 0x200D: + if pos > 0: + if _combining_class(ord(label[pos - 1])) == _virama_combining_class: + return True + return False + + else: + return False + + +def valid_contexto(label: str, pos: int, exception: bool = False) -> bool: + cp_value = ord(label[pos]) + + if cp_value == 0x00B7: + if 0 < pos < len(label) - 1: + if ord(label[pos - 1]) == 0x006C and ord(label[pos + 1]) == 0x006C: + return True + return False + + elif cp_value == 0x0375: + if pos < len(label) - 1 and len(label) > 1: + return _is_script(label[pos + 1], "Greek") + return False + + elif cp_value == 0x05F3 or cp_value == 0x05F4: + if pos > 0: + return _is_script(label[pos - 1], "Hebrew") + return False + + elif cp_value == 0x30FB: + for cp in label: + if cp == "\u30fb": + continue + if _is_script(cp, "Hiragana") or _is_script(cp, "Katakana") or _is_script(cp, "Han"): + return True + return False + + elif 0x660 <= cp_value <= 0x669: + for cp in label: + if 0x6F0 <= ord(cp) <= 0x06F9: + return False + return True + + elif 0x6F0 <= cp_value <= 0x6F9: + for cp in label: + if 0x660 <= ord(cp) <= 0x0669: + return False + return True + + return False + + +def check_label(label: Union[str, bytes, bytearray]) -> None: + if isinstance(label, (bytes, bytearray)): + label = label.decode("utf-8") + if len(label) == 0: + raise IDNAError("Empty Label") + + check_nfc(label) + check_hyphen_ok(label) + check_initial_combiner(label) + + for pos, cp in enumerate(label): + cp_value = ord(cp) + if intranges_contain(cp_value, idnadata.codepoint_classes["PVALID"]): + continue + elif intranges_contain(cp_value, idnadata.codepoint_classes["CONTEXTJ"]): + try: + if not valid_contextj(label, pos): + raise InvalidCodepointContext( + "Joiner {} not allowed at position {} in {}".format(_unot(cp_value), pos + 1, repr(label)) + ) + except ValueError: + raise IDNAError( + "Unknown codepoint adjacent to joiner {} at position {} in {}".format( + _unot(cp_value), pos + 1, repr(label) + ) + ) + elif intranges_contain(cp_value, idnadata.codepoint_classes["CONTEXTO"]): + if not valid_contexto(label, pos): + raise InvalidCodepointContext( + "Codepoint {} not allowed at position {} in {}".format(_unot(cp_value), pos + 1, repr(label)) + ) + else: + raise InvalidCodepoint( + "Codepoint {} at position {} of {} not allowed".format(_unot(cp_value), pos + 1, repr(label)) + ) + + check_bidi(label) + + +def alabel(label: str) -> bytes: + try: + label_bytes = label.encode("ascii") + ulabel(label_bytes) + if not valid_label_length(label_bytes): + raise IDNAError("Label too long") + return label_bytes + except UnicodeEncodeError: + pass + + check_label(label) + label_bytes = _alabel_prefix + _punycode(label) + + if not valid_label_length(label_bytes): + raise IDNAError("Label too long") + + return label_bytes + + +def ulabel(label: Union[str, bytes, bytearray]) -> str: + if not isinstance(label, (bytes, bytearray)): + try: + label_bytes = label.encode("ascii") + except UnicodeEncodeError: + check_label(label) + return label + else: + label_bytes = label + + label_bytes = label_bytes.lower() + if label_bytes.startswith(_alabel_prefix): + label_bytes = label_bytes[len(_alabel_prefix) :] + if not label_bytes: + raise IDNAError("Malformed A-label, no Punycode eligible content found") + if label_bytes.decode("ascii")[-1] == "-": + raise IDNAError("A-label must not end with a hyphen") + else: + check_label(label_bytes) + return label_bytes.decode("ascii") + + try: + label = label_bytes.decode("punycode") + except UnicodeError: + raise IDNAError("Invalid A-label") + check_label(label) + return label + + +def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False) -> str: + """Re-map the characters in the string according to UTS46 processing.""" + from .uts46data import uts46data + + output = "" + + for pos, char in enumerate(domain): + code_point = ord(char) + try: + uts46row = uts46data[code_point if code_point < 256 else bisect.bisect_left(uts46data, (code_point, "Z")) - 1] + status = uts46row[1] + replacement: Optional[str] = None + if len(uts46row) == 3: + replacement = uts46row[2] + if ( + status == "V" + or (status == "D" and not transitional) + or (status == "3" and not std3_rules and replacement is None) + ): + output += char + elif replacement is not None and ( + status == "M" or (status == "3" and not std3_rules) or (status == "D" and transitional) + ): + output += replacement + elif status != "I": + raise IndexError() + except IndexError: + raise InvalidCodepoint( + "Codepoint {} not allowed at position {} in {}".format(_unot(code_point), pos + 1, repr(domain)) + ) + + return unicodedata.normalize("NFC", output) + + +def encode( + s: Union[str, bytes, bytearray], + strict: bool = False, + uts46: bool = False, + std3_rules: bool = False, + transitional: bool = False, +) -> bytes: + if not isinstance(s, str): + try: + s = str(s, "ascii") + except UnicodeDecodeError: + raise IDNAError("should pass a unicode string to the function rather than a byte string.") + if uts46: + s = uts46_remap(s, std3_rules, transitional) + trailing_dot = False + result = [] + if strict: + labels = s.split(".") + else: + labels = _unicode_dots_re.split(s) + if not labels or labels == [""]: + raise IDNAError("Empty domain") + if labels[-1] == "": + del labels[-1] + trailing_dot = True + for label in labels: + s = alabel(label) + if s: + result.append(s) + else: + raise IDNAError("Empty label") + if trailing_dot: + result.append(b"") + s = b".".join(result) + if not valid_string_length(s, trailing_dot): + raise IDNAError("Domain too long") + return s + + +def decode( + s: Union[str, bytes, bytearray], + strict: bool = False, + uts46: bool = False, + std3_rules: bool = False, +) -> str: + try: + if not isinstance(s, str): + s = str(s, "ascii") + except UnicodeDecodeError: + raise IDNAError("Invalid ASCII in A-label") + if uts46: + s = uts46_remap(s, std3_rules, False) + trailing_dot = False + result = [] + if not strict: + labels = _unicode_dots_re.split(s) + else: + labels = s.split(".") + if not labels or labels == [""]: + raise IDNAError("Empty domain") + if not labels[-1]: + del labels[-1] + trailing_dot = True + for label in labels: + s = ulabel(label) + if s: + result.append(s) + else: + raise IDNAError("Empty label") + if trailing_dot: + result.append("") + return ".".join(result) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/idna/idnadata.py b/URSA/.venv_ursa/lib/python3.12/site-packages/idna/idnadata.py new file mode 100644 index 0000000000000000000000000000000000000000..4be6004622efcdc36a8d15efc0ac3e138a4bae02 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/idna/idnadata.py @@ -0,0 +1,4243 @@ +# This file is automatically generated by tools/idna-data + +__version__ = "15.1.0" +scripts = { + "Greek": ( + 0x37000000374, + 0x37500000378, + 0x37A0000037E, + 0x37F00000380, + 0x38400000385, + 0x38600000387, + 0x3880000038B, + 0x38C0000038D, + 0x38E000003A2, + 0x3A3000003E2, + 0x3F000000400, + 0x1D2600001D2B, + 0x1D5D00001D62, + 0x1D6600001D6B, + 0x1DBF00001DC0, + 0x1F0000001F16, + 0x1F1800001F1E, + 0x1F2000001F46, + 0x1F4800001F4E, + 0x1F5000001F58, + 0x1F5900001F5A, + 0x1F5B00001F5C, + 0x1F5D00001F5E, + 0x1F5F00001F7E, + 0x1F8000001FB5, + 0x1FB600001FC5, + 0x1FC600001FD4, + 0x1FD600001FDC, + 0x1FDD00001FF0, + 0x1FF200001FF5, + 0x1FF600001FFF, + 0x212600002127, + 0xAB650000AB66, + 0x101400001018F, + 0x101A0000101A1, + 0x1D2000001D246, + ), + "Han": ( + 0x2E8000002E9A, + 0x2E9B00002EF4, + 0x2F0000002FD6, + 0x300500003006, + 0x300700003008, + 0x30210000302A, + 0x30380000303C, + 0x340000004DC0, + 0x4E000000A000, + 0xF9000000FA6E, + 0xFA700000FADA, + 0x16FE200016FE4, + 0x16FF000016FF2, + 0x200000002A6E0, + 0x2A7000002B73A, + 0x2B7400002B81E, + 0x2B8200002CEA2, + 0x2CEB00002EBE1, + 0x2EBF00002EE5E, + 0x2F8000002FA1E, + 0x300000003134B, + 0x31350000323B0, + ), + "Hebrew": ( + 0x591000005C8, + 0x5D0000005EB, + 0x5EF000005F5, + 0xFB1D0000FB37, + 0xFB380000FB3D, + 0xFB3E0000FB3F, + 0xFB400000FB42, + 0xFB430000FB45, + 0xFB460000FB50, + ), + "Hiragana": ( + 0x304100003097, + 0x309D000030A0, + 0x1B0010001B120, + 0x1B1320001B133, + 0x1B1500001B153, + 0x1F2000001F201, + ), + "Katakana": ( + 0x30A1000030FB, + 0x30FD00003100, + 0x31F000003200, + 0x32D0000032FF, + 0x330000003358, + 0xFF660000FF70, + 0xFF710000FF9E, + 0x1AFF00001AFF4, + 0x1AFF50001AFFC, + 0x1AFFD0001AFFF, + 0x1B0000001B001, + 0x1B1200001B123, + 0x1B1550001B156, + 0x1B1640001B168, + ), +} +joining_types = { + 0xAD: 84, + 0x300: 84, + 0x301: 84, + 0x302: 84, + 0x303: 84, + 0x304: 84, + 0x305: 84, + 0x306: 84, + 0x307: 84, + 0x308: 84, + 0x309: 84, + 0x30A: 84, + 0x30B: 84, + 0x30C: 84, + 0x30D: 84, + 0x30E: 84, + 0x30F: 84, + 0x310: 84, + 0x311: 84, + 0x312: 84, + 0x313: 84, + 0x314: 84, + 0x315: 84, + 0x316: 84, + 0x317: 84, + 0x318: 84, + 0x319: 84, + 0x31A: 84, + 0x31B: 84, + 0x31C: 84, + 0x31D: 84, + 0x31E: 84, + 0x31F: 84, + 0x320: 84, + 0x321: 84, + 0x322: 84, + 0x323: 84, + 0x324: 84, + 0x325: 84, + 0x326: 84, + 0x327: 84, + 0x328: 84, + 0x329: 84, + 0x32A: 84, + 0x32B: 84, + 0x32C: 84, + 0x32D: 84, + 0x32E: 84, + 0x32F: 84, + 0x330: 84, + 0x331: 84, + 0x332: 84, + 0x333: 84, + 0x334: 84, + 0x335: 84, + 0x336: 84, + 0x337: 84, + 0x338: 84, + 0x339: 84, + 0x33A: 84, + 0x33B: 84, + 0x33C: 84, + 0x33D: 84, + 0x33E: 84, + 0x33F: 84, + 0x340: 84, + 0x341: 84, + 0x342: 84, + 0x343: 84, + 0x344: 84, + 0x345: 84, + 0x346: 84, + 0x347: 84, + 0x348: 84, + 0x349: 84, + 0x34A: 84, + 0x34B: 84, + 0x34C: 84, + 0x34D: 84, + 0x34E: 84, + 0x34F: 84, + 0x350: 84, + 0x351: 84, + 0x352: 84, + 0x353: 84, + 0x354: 84, + 0x355: 84, + 0x356: 84, + 0x357: 84, + 0x358: 84, + 0x359: 84, + 0x35A: 84, + 0x35B: 84, + 0x35C: 84, + 0x35D: 84, + 0x35E: 84, + 0x35F: 84, + 0x360: 84, + 0x361: 84, + 0x362: 84, + 0x363: 84, + 0x364: 84, + 0x365: 84, + 0x366: 84, + 0x367: 84, + 0x368: 84, + 0x369: 84, + 0x36A: 84, + 0x36B: 84, + 0x36C: 84, + 0x36D: 84, + 0x36E: 84, + 0x36F: 84, + 0x483: 84, + 0x484: 84, + 0x485: 84, + 0x486: 84, + 0x487: 84, + 0x488: 84, + 0x489: 84, + 0x591: 84, + 0x592: 84, + 0x593: 84, + 0x594: 84, + 0x595: 84, + 0x596: 84, + 0x597: 84, + 0x598: 84, + 0x599: 84, + 0x59A: 84, + 0x59B: 84, + 0x59C: 84, + 0x59D: 84, + 0x59E: 84, + 0x59F: 84, + 0x5A0: 84, + 0x5A1: 84, + 0x5A2: 84, + 0x5A3: 84, + 0x5A4: 84, + 0x5A5: 84, + 0x5A6: 84, + 0x5A7: 84, + 0x5A8: 84, + 0x5A9: 84, + 0x5AA: 84, + 0x5AB: 84, + 0x5AC: 84, + 0x5AD: 84, + 0x5AE: 84, + 0x5AF: 84, + 0x5B0: 84, + 0x5B1: 84, + 0x5B2: 84, + 0x5B3: 84, + 0x5B4: 84, + 0x5B5: 84, + 0x5B6: 84, + 0x5B7: 84, + 0x5B8: 84, + 0x5B9: 84, + 0x5BA: 84, + 0x5BB: 84, + 0x5BC: 84, + 0x5BD: 84, + 0x5BF: 84, + 0x5C1: 84, + 0x5C2: 84, + 0x5C4: 84, + 0x5C5: 84, + 0x5C7: 84, + 0x610: 84, + 0x611: 84, + 0x612: 84, + 0x613: 84, + 0x614: 84, + 0x615: 84, + 0x616: 84, + 0x617: 84, + 0x618: 84, + 0x619: 84, + 0x61A: 84, + 0x61C: 84, + 0x620: 68, + 0x622: 82, + 0x623: 82, + 0x624: 82, + 0x625: 82, + 0x626: 68, + 0x627: 82, + 0x628: 68, + 0x629: 82, + 0x62A: 68, + 0x62B: 68, + 0x62C: 68, + 0x62D: 68, + 0x62E: 68, + 0x62F: 82, + 0x630: 82, + 0x631: 82, + 0x632: 82, + 0x633: 68, + 0x634: 68, + 0x635: 68, + 0x636: 68, + 0x637: 68, + 0x638: 68, + 0x639: 68, + 0x63A: 68, + 0x63B: 68, + 0x63C: 68, + 0x63D: 68, + 0x63E: 68, + 0x63F: 68, + 0x640: 67, + 0x641: 68, + 0x642: 68, + 0x643: 68, + 0x644: 68, + 0x645: 68, + 0x646: 68, + 0x647: 68, + 0x648: 82, + 0x649: 68, + 0x64A: 68, + 0x64B: 84, + 0x64C: 84, + 0x64D: 84, + 0x64E: 84, + 0x64F: 84, + 0x650: 84, + 0x651: 84, + 0x652: 84, + 0x653: 84, + 0x654: 84, + 0x655: 84, + 0x656: 84, + 0x657: 84, + 0x658: 84, + 0x659: 84, + 0x65A: 84, + 0x65B: 84, + 0x65C: 84, + 0x65D: 84, + 0x65E: 84, + 0x65F: 84, + 0x66E: 68, + 0x66F: 68, + 0x670: 84, + 0x671: 82, + 0x672: 82, + 0x673: 82, + 0x675: 82, + 0x676: 82, + 0x677: 82, + 0x678: 68, + 0x679: 68, + 0x67A: 68, + 0x67B: 68, + 0x67C: 68, + 0x67D: 68, + 0x67E: 68, + 0x67F: 68, + 0x680: 68, + 0x681: 68, + 0x682: 68, + 0x683: 68, + 0x684: 68, + 0x685: 68, + 0x686: 68, + 0x687: 68, + 0x688: 82, + 0x689: 82, + 0x68A: 82, + 0x68B: 82, + 0x68C: 82, + 0x68D: 82, + 0x68E: 82, + 0x68F: 82, + 0x690: 82, + 0x691: 82, + 0x692: 82, + 0x693: 82, + 0x694: 82, + 0x695: 82, + 0x696: 82, + 0x697: 82, + 0x698: 82, + 0x699: 82, + 0x69A: 68, + 0x69B: 68, + 0x69C: 68, + 0x69D: 68, + 0x69E: 68, + 0x69F: 68, + 0x6A0: 68, + 0x6A1: 68, + 0x6A2: 68, + 0x6A3: 68, + 0x6A4: 68, + 0x6A5: 68, + 0x6A6: 68, + 0x6A7: 68, + 0x6A8: 68, + 0x6A9: 68, + 0x6AA: 68, + 0x6AB: 68, + 0x6AC: 68, + 0x6AD: 68, + 0x6AE: 68, + 0x6AF: 68, + 0x6B0: 68, + 0x6B1: 68, + 0x6B2: 68, + 0x6B3: 68, + 0x6B4: 68, + 0x6B5: 68, + 0x6B6: 68, + 0x6B7: 68, + 0x6B8: 68, + 0x6B9: 68, + 0x6BA: 68, + 0x6BB: 68, + 0x6BC: 68, + 0x6BD: 68, + 0x6BE: 68, + 0x6BF: 68, + 0x6C0: 82, + 0x6C1: 68, + 0x6C2: 68, + 0x6C3: 82, + 0x6C4: 82, + 0x6C5: 82, + 0x6C6: 82, + 0x6C7: 82, + 0x6C8: 82, + 0x6C9: 82, + 0x6CA: 82, + 0x6CB: 82, + 0x6CC: 68, + 0x6CD: 82, + 0x6CE: 68, + 0x6CF: 82, + 0x6D0: 68, + 0x6D1: 68, + 0x6D2: 82, + 0x6D3: 82, + 0x6D5: 82, + 0x6D6: 84, + 0x6D7: 84, + 0x6D8: 84, + 0x6D9: 84, + 0x6DA: 84, + 0x6DB: 84, + 0x6DC: 84, + 0x6DF: 84, + 0x6E0: 84, + 0x6E1: 84, + 0x6E2: 84, + 0x6E3: 84, + 0x6E4: 84, + 0x6E7: 84, + 0x6E8: 84, + 0x6EA: 84, + 0x6EB: 84, + 0x6EC: 84, + 0x6ED: 84, + 0x6EE: 82, + 0x6EF: 82, + 0x6FA: 68, + 0x6FB: 68, + 0x6FC: 68, + 0x6FF: 68, + 0x70F: 84, + 0x710: 82, + 0x711: 84, + 0x712: 68, + 0x713: 68, + 0x714: 68, + 0x715: 82, + 0x716: 82, + 0x717: 82, + 0x718: 82, + 0x719: 82, + 0x71A: 68, + 0x71B: 68, + 0x71C: 68, + 0x71D: 68, + 0x71E: 82, + 0x71F: 68, + 0x720: 68, + 0x721: 68, + 0x722: 68, + 0x723: 68, + 0x724: 68, + 0x725: 68, + 0x726: 68, + 0x727: 68, + 0x728: 82, + 0x729: 68, + 0x72A: 82, + 0x72B: 68, + 0x72C: 82, + 0x72D: 68, + 0x72E: 68, + 0x72F: 82, + 0x730: 84, + 0x731: 84, + 0x732: 84, + 0x733: 84, + 0x734: 84, + 0x735: 84, + 0x736: 84, + 0x737: 84, + 0x738: 84, + 0x739: 84, + 0x73A: 84, + 0x73B: 84, + 0x73C: 84, + 0x73D: 84, + 0x73E: 84, + 0x73F: 84, + 0x740: 84, + 0x741: 84, + 0x742: 84, + 0x743: 84, + 0x744: 84, + 0x745: 84, + 0x746: 84, + 0x747: 84, + 0x748: 84, + 0x749: 84, + 0x74A: 84, + 0x74D: 82, + 0x74E: 68, + 0x74F: 68, + 0x750: 68, + 0x751: 68, + 0x752: 68, + 0x753: 68, + 0x754: 68, + 0x755: 68, + 0x756: 68, + 0x757: 68, + 0x758: 68, + 0x759: 82, + 0x75A: 82, + 0x75B: 82, + 0x75C: 68, + 0x75D: 68, + 0x75E: 68, + 0x75F: 68, + 0x760: 68, + 0x761: 68, + 0x762: 68, + 0x763: 68, + 0x764: 68, + 0x765: 68, + 0x766: 68, + 0x767: 68, + 0x768: 68, + 0x769: 68, + 0x76A: 68, + 0x76B: 82, + 0x76C: 82, + 0x76D: 68, + 0x76E: 68, + 0x76F: 68, + 0x770: 68, + 0x771: 82, + 0x772: 68, + 0x773: 82, + 0x774: 82, + 0x775: 68, + 0x776: 68, + 0x777: 68, + 0x778: 82, + 0x779: 82, + 0x77A: 68, + 0x77B: 68, + 0x77C: 68, + 0x77D: 68, + 0x77E: 68, + 0x77F: 68, + 0x7A6: 84, + 0x7A7: 84, + 0x7A8: 84, + 0x7A9: 84, + 0x7AA: 84, + 0x7AB: 84, + 0x7AC: 84, + 0x7AD: 84, + 0x7AE: 84, + 0x7AF: 84, + 0x7B0: 84, + 0x7CA: 68, + 0x7CB: 68, + 0x7CC: 68, + 0x7CD: 68, + 0x7CE: 68, + 0x7CF: 68, + 0x7D0: 68, + 0x7D1: 68, + 0x7D2: 68, + 0x7D3: 68, + 0x7D4: 68, + 0x7D5: 68, + 0x7D6: 68, + 0x7D7: 68, + 0x7D8: 68, + 0x7D9: 68, + 0x7DA: 68, + 0x7DB: 68, + 0x7DC: 68, + 0x7DD: 68, + 0x7DE: 68, + 0x7DF: 68, + 0x7E0: 68, + 0x7E1: 68, + 0x7E2: 68, + 0x7E3: 68, + 0x7E4: 68, + 0x7E5: 68, + 0x7E6: 68, + 0x7E7: 68, + 0x7E8: 68, + 0x7E9: 68, + 0x7EA: 68, + 0x7EB: 84, + 0x7EC: 84, + 0x7ED: 84, + 0x7EE: 84, + 0x7EF: 84, + 0x7F0: 84, + 0x7F1: 84, + 0x7F2: 84, + 0x7F3: 84, + 0x7FA: 67, + 0x7FD: 84, + 0x816: 84, + 0x817: 84, + 0x818: 84, + 0x819: 84, + 0x81B: 84, + 0x81C: 84, + 0x81D: 84, + 0x81E: 84, + 0x81F: 84, + 0x820: 84, + 0x821: 84, + 0x822: 84, + 0x823: 84, + 0x825: 84, + 0x826: 84, + 0x827: 84, + 0x829: 84, + 0x82A: 84, + 0x82B: 84, + 0x82C: 84, + 0x82D: 84, + 0x840: 82, + 0x841: 68, + 0x842: 68, + 0x843: 68, + 0x844: 68, + 0x845: 68, + 0x846: 82, + 0x847: 82, + 0x848: 68, + 0x849: 82, + 0x84A: 68, + 0x84B: 68, + 0x84C: 68, + 0x84D: 68, + 0x84E: 68, + 0x84F: 68, + 0x850: 68, + 0x851: 68, + 0x852: 68, + 0x853: 68, + 0x854: 82, + 0x855: 68, + 0x856: 82, + 0x857: 82, + 0x858: 82, + 0x859: 84, + 0x85A: 84, + 0x85B: 84, + 0x860: 68, + 0x862: 68, + 0x863: 68, + 0x864: 68, + 0x865: 68, + 0x867: 82, + 0x868: 68, + 0x869: 82, + 0x86A: 82, + 0x870: 82, + 0x871: 82, + 0x872: 82, + 0x873: 82, + 0x874: 82, + 0x875: 82, + 0x876: 82, + 0x877: 82, + 0x878: 82, + 0x879: 82, + 0x87A: 82, + 0x87B: 82, + 0x87C: 82, + 0x87D: 82, + 0x87E: 82, + 0x87F: 82, + 0x880: 82, + 0x881: 82, + 0x882: 82, + 0x883: 67, + 0x884: 67, + 0x885: 67, + 0x886: 68, + 0x889: 68, + 0x88A: 68, + 0x88B: 68, + 0x88C: 68, + 0x88D: 68, + 0x88E: 82, + 0x898: 84, + 0x899: 84, + 0x89A: 84, + 0x89B: 84, + 0x89C: 84, + 0x89D: 84, + 0x89E: 84, + 0x89F: 84, + 0x8A0: 68, + 0x8A1: 68, + 0x8A2: 68, + 0x8A3: 68, + 0x8A4: 68, + 0x8A5: 68, + 0x8A6: 68, + 0x8A7: 68, + 0x8A8: 68, + 0x8A9: 68, + 0x8AA: 82, + 0x8AB: 82, + 0x8AC: 82, + 0x8AE: 82, + 0x8AF: 68, + 0x8B0: 68, + 0x8B1: 82, + 0x8B2: 82, + 0x8B3: 68, + 0x8B4: 68, + 0x8B5: 68, + 0x8B6: 68, + 0x8B7: 68, + 0x8B8: 68, + 0x8B9: 82, + 0x8BA: 68, + 0x8BB: 68, + 0x8BC: 68, + 0x8BD: 68, + 0x8BE: 68, + 0x8BF: 68, + 0x8C0: 68, + 0x8C1: 68, + 0x8C2: 68, + 0x8C3: 68, + 0x8C4: 68, + 0x8C5: 68, + 0x8C6: 68, + 0x8C7: 68, + 0x8C8: 68, + 0x8CA: 84, + 0x8CB: 84, + 0x8CC: 84, + 0x8CD: 84, + 0x8CE: 84, + 0x8CF: 84, + 0x8D0: 84, + 0x8D1: 84, + 0x8D2: 84, + 0x8D3: 84, + 0x8D4: 84, + 0x8D5: 84, + 0x8D6: 84, + 0x8D7: 84, + 0x8D8: 84, + 0x8D9: 84, + 0x8DA: 84, + 0x8DB: 84, + 0x8DC: 84, + 0x8DD: 84, + 0x8DE: 84, + 0x8DF: 84, + 0x8E0: 84, + 0x8E1: 84, + 0x8E3: 84, + 0x8E4: 84, + 0x8E5: 84, + 0x8E6: 84, + 0x8E7: 84, + 0x8E8: 84, + 0x8E9: 84, + 0x8EA: 84, + 0x8EB: 84, + 0x8EC: 84, + 0x8ED: 84, + 0x8EE: 84, + 0x8EF: 84, + 0x8F0: 84, + 0x8F1: 84, + 0x8F2: 84, + 0x8F3: 84, + 0x8F4: 84, + 0x8F5: 84, + 0x8F6: 84, + 0x8F7: 84, + 0x8F8: 84, + 0x8F9: 84, + 0x8FA: 84, + 0x8FB: 84, + 0x8FC: 84, + 0x8FD: 84, + 0x8FE: 84, + 0x8FF: 84, + 0x900: 84, + 0x901: 84, + 0x902: 84, + 0x93A: 84, + 0x93C: 84, + 0x941: 84, + 0x942: 84, + 0x943: 84, + 0x944: 84, + 0x945: 84, + 0x946: 84, + 0x947: 84, + 0x948: 84, + 0x94D: 84, + 0x951: 84, + 0x952: 84, + 0x953: 84, + 0x954: 84, + 0x955: 84, + 0x956: 84, + 0x957: 84, + 0x962: 84, + 0x963: 84, + 0x981: 84, + 0x9BC: 84, + 0x9C1: 84, + 0x9C2: 84, + 0x9C3: 84, + 0x9C4: 84, + 0x9CD: 84, + 0x9E2: 84, + 0x9E3: 84, + 0x9FE: 84, + 0xA01: 84, + 0xA02: 84, + 0xA3C: 84, + 0xA41: 84, + 0xA42: 84, + 0xA47: 84, + 0xA48: 84, + 0xA4B: 84, + 0xA4C: 84, + 0xA4D: 84, + 0xA51: 84, + 0xA70: 84, + 0xA71: 84, + 0xA75: 84, + 0xA81: 84, + 0xA82: 84, + 0xABC: 84, + 0xAC1: 84, + 0xAC2: 84, + 0xAC3: 84, + 0xAC4: 84, + 0xAC5: 84, + 0xAC7: 84, + 0xAC8: 84, + 0xACD: 84, + 0xAE2: 84, + 0xAE3: 84, + 0xAFA: 84, + 0xAFB: 84, + 0xAFC: 84, + 0xAFD: 84, + 0xAFE: 84, + 0xAFF: 84, + 0xB01: 84, + 0xB3C: 84, + 0xB3F: 84, + 0xB41: 84, + 0xB42: 84, + 0xB43: 84, + 0xB44: 84, + 0xB4D: 84, + 0xB55: 84, + 0xB56: 84, + 0xB62: 84, + 0xB63: 84, + 0xB82: 84, + 0xBC0: 84, + 0xBCD: 84, + 0xC00: 84, + 0xC04: 84, + 0xC3C: 84, + 0xC3E: 84, + 0xC3F: 84, + 0xC40: 84, + 0xC46: 84, + 0xC47: 84, + 0xC48: 84, + 0xC4A: 84, + 0xC4B: 84, + 0xC4C: 84, + 0xC4D: 84, + 0xC55: 84, + 0xC56: 84, + 0xC62: 84, + 0xC63: 84, + 0xC81: 84, + 0xCBC: 84, + 0xCBF: 84, + 0xCC6: 84, + 0xCCC: 84, + 0xCCD: 84, + 0xCE2: 84, + 0xCE3: 84, + 0xD00: 84, + 0xD01: 84, + 0xD3B: 84, + 0xD3C: 84, + 0xD41: 84, + 0xD42: 84, + 0xD43: 84, + 0xD44: 84, + 0xD4D: 84, + 0xD62: 84, + 0xD63: 84, + 0xD81: 84, + 0xDCA: 84, + 0xDD2: 84, + 0xDD3: 84, + 0xDD4: 84, + 0xDD6: 84, + 0xE31: 84, + 0xE34: 84, + 0xE35: 84, + 0xE36: 84, + 0xE37: 84, + 0xE38: 84, + 0xE39: 84, + 0xE3A: 84, + 0xE47: 84, + 0xE48: 84, + 0xE49: 84, + 0xE4A: 84, + 0xE4B: 84, + 0xE4C: 84, + 0xE4D: 84, + 0xE4E: 84, + 0xEB1: 84, + 0xEB4: 84, + 0xEB5: 84, + 0xEB6: 84, + 0xEB7: 84, + 0xEB8: 84, + 0xEB9: 84, + 0xEBA: 84, + 0xEBB: 84, + 0xEBC: 84, + 0xEC8: 84, + 0xEC9: 84, + 0xECA: 84, + 0xECB: 84, + 0xECC: 84, + 0xECD: 84, + 0xECE: 84, + 0xF18: 84, + 0xF19: 84, + 0xF35: 84, + 0xF37: 84, + 0xF39: 84, + 0xF71: 84, + 0xF72: 84, + 0xF73: 84, + 0xF74: 84, + 0xF75: 84, + 0xF76: 84, + 0xF77: 84, + 0xF78: 84, + 0xF79: 84, + 0xF7A: 84, + 0xF7B: 84, + 0xF7C: 84, + 0xF7D: 84, + 0xF7E: 84, + 0xF80: 84, + 0xF81: 84, + 0xF82: 84, + 0xF83: 84, + 0xF84: 84, + 0xF86: 84, + 0xF87: 84, + 0xF8D: 84, + 0xF8E: 84, + 0xF8F: 84, + 0xF90: 84, + 0xF91: 84, + 0xF92: 84, + 0xF93: 84, + 0xF94: 84, + 0xF95: 84, + 0xF96: 84, + 0xF97: 84, + 0xF99: 84, + 0xF9A: 84, + 0xF9B: 84, + 0xF9C: 84, + 0xF9D: 84, + 0xF9E: 84, + 0xF9F: 84, + 0xFA0: 84, + 0xFA1: 84, + 0xFA2: 84, + 0xFA3: 84, + 0xFA4: 84, + 0xFA5: 84, + 0xFA6: 84, + 0xFA7: 84, + 0xFA8: 84, + 0xFA9: 84, + 0xFAA: 84, + 0xFAB: 84, + 0xFAC: 84, + 0xFAD: 84, + 0xFAE: 84, + 0xFAF: 84, + 0xFB0: 84, + 0xFB1: 84, + 0xFB2: 84, + 0xFB3: 84, + 0xFB4: 84, + 0xFB5: 84, + 0xFB6: 84, + 0xFB7: 84, + 0xFB8: 84, + 0xFB9: 84, + 0xFBA: 84, + 0xFBB: 84, + 0xFBC: 84, + 0xFC6: 84, + 0x102D: 84, + 0x102E: 84, + 0x102F: 84, + 0x1030: 84, + 0x1032: 84, + 0x1033: 84, + 0x1034: 84, + 0x1035: 84, + 0x1036: 84, + 0x1037: 84, + 0x1039: 84, + 0x103A: 84, + 0x103D: 84, + 0x103E: 84, + 0x1058: 84, + 0x1059: 84, + 0x105E: 84, + 0x105F: 84, + 0x1060: 84, + 0x1071: 84, + 0x1072: 84, + 0x1073: 84, + 0x1074: 84, + 0x1082: 84, + 0x1085: 84, + 0x1086: 84, + 0x108D: 84, + 0x109D: 84, + 0x135D: 84, + 0x135E: 84, + 0x135F: 84, + 0x1712: 84, + 0x1713: 84, + 0x1714: 84, + 0x1732: 84, + 0x1733: 84, + 0x1752: 84, + 0x1753: 84, + 0x1772: 84, + 0x1773: 84, + 0x17B4: 84, + 0x17B5: 84, + 0x17B7: 84, + 0x17B8: 84, + 0x17B9: 84, + 0x17BA: 84, + 0x17BB: 84, + 0x17BC: 84, + 0x17BD: 84, + 0x17C6: 84, + 0x17C9: 84, + 0x17CA: 84, + 0x17CB: 84, + 0x17CC: 84, + 0x17CD: 84, + 0x17CE: 84, + 0x17CF: 84, + 0x17D0: 84, + 0x17D1: 84, + 0x17D2: 84, + 0x17D3: 84, + 0x17DD: 84, + 0x1807: 68, + 0x180A: 67, + 0x180B: 84, + 0x180C: 84, + 0x180D: 84, + 0x180F: 84, + 0x1820: 68, + 0x1821: 68, + 0x1822: 68, + 0x1823: 68, + 0x1824: 68, + 0x1825: 68, + 0x1826: 68, + 0x1827: 68, + 0x1828: 68, + 0x1829: 68, + 0x182A: 68, + 0x182B: 68, + 0x182C: 68, + 0x182D: 68, + 0x182E: 68, + 0x182F: 68, + 0x1830: 68, + 0x1831: 68, + 0x1832: 68, + 0x1833: 68, + 0x1834: 68, + 0x1835: 68, + 0x1836: 68, + 0x1837: 68, + 0x1838: 68, + 0x1839: 68, + 0x183A: 68, + 0x183B: 68, + 0x183C: 68, + 0x183D: 68, + 0x183E: 68, + 0x183F: 68, + 0x1840: 68, + 0x1841: 68, + 0x1842: 68, + 0x1843: 68, + 0x1844: 68, + 0x1845: 68, + 0x1846: 68, + 0x1847: 68, + 0x1848: 68, + 0x1849: 68, + 0x184A: 68, + 0x184B: 68, + 0x184C: 68, + 0x184D: 68, + 0x184E: 68, + 0x184F: 68, + 0x1850: 68, + 0x1851: 68, + 0x1852: 68, + 0x1853: 68, + 0x1854: 68, + 0x1855: 68, + 0x1856: 68, + 0x1857: 68, + 0x1858: 68, + 0x1859: 68, + 0x185A: 68, + 0x185B: 68, + 0x185C: 68, + 0x185D: 68, + 0x185E: 68, + 0x185F: 68, + 0x1860: 68, + 0x1861: 68, + 0x1862: 68, + 0x1863: 68, + 0x1864: 68, + 0x1865: 68, + 0x1866: 68, + 0x1867: 68, + 0x1868: 68, + 0x1869: 68, + 0x186A: 68, + 0x186B: 68, + 0x186C: 68, + 0x186D: 68, + 0x186E: 68, + 0x186F: 68, + 0x1870: 68, + 0x1871: 68, + 0x1872: 68, + 0x1873: 68, + 0x1874: 68, + 0x1875: 68, + 0x1876: 68, + 0x1877: 68, + 0x1878: 68, + 0x1885: 84, + 0x1886: 84, + 0x1887: 68, + 0x1888: 68, + 0x1889: 68, + 0x188A: 68, + 0x188B: 68, + 0x188C: 68, + 0x188D: 68, + 0x188E: 68, + 0x188F: 68, + 0x1890: 68, + 0x1891: 68, + 0x1892: 68, + 0x1893: 68, + 0x1894: 68, + 0x1895: 68, + 0x1896: 68, + 0x1897: 68, + 0x1898: 68, + 0x1899: 68, + 0x189A: 68, + 0x189B: 68, + 0x189C: 68, + 0x189D: 68, + 0x189E: 68, + 0x189F: 68, + 0x18A0: 68, + 0x18A1: 68, + 0x18A2: 68, + 0x18A3: 68, + 0x18A4: 68, + 0x18A5: 68, + 0x18A6: 68, + 0x18A7: 68, + 0x18A8: 68, + 0x18A9: 84, + 0x18AA: 68, + 0x1920: 84, + 0x1921: 84, + 0x1922: 84, + 0x1927: 84, + 0x1928: 84, + 0x1932: 84, + 0x1939: 84, + 0x193A: 84, + 0x193B: 84, + 0x1A17: 84, + 0x1A18: 84, + 0x1A1B: 84, + 0x1A56: 84, + 0x1A58: 84, + 0x1A59: 84, + 0x1A5A: 84, + 0x1A5B: 84, + 0x1A5C: 84, + 0x1A5D: 84, + 0x1A5E: 84, + 0x1A60: 84, + 0x1A62: 84, + 0x1A65: 84, + 0x1A66: 84, + 0x1A67: 84, + 0x1A68: 84, + 0x1A69: 84, + 0x1A6A: 84, + 0x1A6B: 84, + 0x1A6C: 84, + 0x1A73: 84, + 0x1A74: 84, + 0x1A75: 84, + 0x1A76: 84, + 0x1A77: 84, + 0x1A78: 84, + 0x1A79: 84, + 0x1A7A: 84, + 0x1A7B: 84, + 0x1A7C: 84, + 0x1A7F: 84, + 0x1AB0: 84, + 0x1AB1: 84, + 0x1AB2: 84, + 0x1AB3: 84, + 0x1AB4: 84, + 0x1AB5: 84, + 0x1AB6: 84, + 0x1AB7: 84, + 0x1AB8: 84, + 0x1AB9: 84, + 0x1ABA: 84, + 0x1ABB: 84, + 0x1ABC: 84, + 0x1ABD: 84, + 0x1ABE: 84, + 0x1ABF: 84, + 0x1AC0: 84, + 0x1AC1: 84, + 0x1AC2: 84, + 0x1AC3: 84, + 0x1AC4: 84, + 0x1AC5: 84, + 0x1AC6: 84, + 0x1AC7: 84, + 0x1AC8: 84, + 0x1AC9: 84, + 0x1ACA: 84, + 0x1ACB: 84, + 0x1ACC: 84, + 0x1ACD: 84, + 0x1ACE: 84, + 0x1B00: 84, + 0x1B01: 84, + 0x1B02: 84, + 0x1B03: 84, + 0x1B34: 84, + 0x1B36: 84, + 0x1B37: 84, + 0x1B38: 84, + 0x1B39: 84, + 0x1B3A: 84, + 0x1B3C: 84, + 0x1B42: 84, + 0x1B6B: 84, + 0x1B6C: 84, + 0x1B6D: 84, + 0x1B6E: 84, + 0x1B6F: 84, + 0x1B70: 84, + 0x1B71: 84, + 0x1B72: 84, + 0x1B73: 84, + 0x1B80: 84, + 0x1B81: 84, + 0x1BA2: 84, + 0x1BA3: 84, + 0x1BA4: 84, + 0x1BA5: 84, + 0x1BA8: 84, + 0x1BA9: 84, + 0x1BAB: 84, + 0x1BAC: 84, + 0x1BAD: 84, + 0x1BE6: 84, + 0x1BE8: 84, + 0x1BE9: 84, + 0x1BED: 84, + 0x1BEF: 84, + 0x1BF0: 84, + 0x1BF1: 84, + 0x1C2C: 84, + 0x1C2D: 84, + 0x1C2E: 84, + 0x1C2F: 84, + 0x1C30: 84, + 0x1C31: 84, + 0x1C32: 84, + 0x1C33: 84, + 0x1C36: 84, + 0x1C37: 84, + 0x1CD0: 84, + 0x1CD1: 84, + 0x1CD2: 84, + 0x1CD4: 84, + 0x1CD5: 84, + 0x1CD6: 84, + 0x1CD7: 84, + 0x1CD8: 84, + 0x1CD9: 84, + 0x1CDA: 84, + 0x1CDB: 84, + 0x1CDC: 84, + 0x1CDD: 84, + 0x1CDE: 84, + 0x1CDF: 84, + 0x1CE0: 84, + 0x1CE2: 84, + 0x1CE3: 84, + 0x1CE4: 84, + 0x1CE5: 84, + 0x1CE6: 84, + 0x1CE7: 84, + 0x1CE8: 84, + 0x1CED: 84, + 0x1CF4: 84, + 0x1CF8: 84, + 0x1CF9: 84, + 0x1DC0: 84, + 0x1DC1: 84, + 0x1DC2: 84, + 0x1DC3: 84, + 0x1DC4: 84, + 0x1DC5: 84, + 0x1DC6: 84, + 0x1DC7: 84, + 0x1DC8: 84, + 0x1DC9: 84, + 0x1DCA: 84, + 0x1DCB: 84, + 0x1DCC: 84, + 0x1DCD: 84, + 0x1DCE: 84, + 0x1DCF: 84, + 0x1DD0: 84, + 0x1DD1: 84, + 0x1DD2: 84, + 0x1DD3: 84, + 0x1DD4: 84, + 0x1DD5: 84, + 0x1DD6: 84, + 0x1DD7: 84, + 0x1DD8: 84, + 0x1DD9: 84, + 0x1DDA: 84, + 0x1DDB: 84, + 0x1DDC: 84, + 0x1DDD: 84, + 0x1DDE: 84, + 0x1DDF: 84, + 0x1DE0: 84, + 0x1DE1: 84, + 0x1DE2: 84, + 0x1DE3: 84, + 0x1DE4: 84, + 0x1DE5: 84, + 0x1DE6: 84, + 0x1DE7: 84, + 0x1DE8: 84, + 0x1DE9: 84, + 0x1DEA: 84, + 0x1DEB: 84, + 0x1DEC: 84, + 0x1DED: 84, + 0x1DEE: 84, + 0x1DEF: 84, + 0x1DF0: 84, + 0x1DF1: 84, + 0x1DF2: 84, + 0x1DF3: 84, + 0x1DF4: 84, + 0x1DF5: 84, + 0x1DF6: 84, + 0x1DF7: 84, + 0x1DF8: 84, + 0x1DF9: 84, + 0x1DFA: 84, + 0x1DFB: 84, + 0x1DFC: 84, + 0x1DFD: 84, + 0x1DFE: 84, + 0x1DFF: 84, + 0x200B: 84, + 0x200D: 67, + 0x200E: 84, + 0x200F: 84, + 0x202A: 84, + 0x202B: 84, + 0x202C: 84, + 0x202D: 84, + 0x202E: 84, + 0x2060: 84, + 0x2061: 84, + 0x2062: 84, + 0x2063: 84, + 0x2064: 84, + 0x206A: 84, + 0x206B: 84, + 0x206C: 84, + 0x206D: 84, + 0x206E: 84, + 0x206F: 84, + 0x20D0: 84, + 0x20D1: 84, + 0x20D2: 84, + 0x20D3: 84, + 0x20D4: 84, + 0x20D5: 84, + 0x20D6: 84, + 0x20D7: 84, + 0x20D8: 84, + 0x20D9: 84, + 0x20DA: 84, + 0x20DB: 84, + 0x20DC: 84, + 0x20DD: 84, + 0x20DE: 84, + 0x20DF: 84, + 0x20E0: 84, + 0x20E1: 84, + 0x20E2: 84, + 0x20E3: 84, + 0x20E4: 84, + 0x20E5: 84, + 0x20E6: 84, + 0x20E7: 84, + 0x20E8: 84, + 0x20E9: 84, + 0x20EA: 84, + 0x20EB: 84, + 0x20EC: 84, + 0x20ED: 84, + 0x20EE: 84, + 0x20EF: 84, + 0x20F0: 84, + 0x2CEF: 84, + 0x2CF0: 84, + 0x2CF1: 84, + 0x2D7F: 84, + 0x2DE0: 84, + 0x2DE1: 84, + 0x2DE2: 84, + 0x2DE3: 84, + 0x2DE4: 84, + 0x2DE5: 84, + 0x2DE6: 84, + 0x2DE7: 84, + 0x2DE8: 84, + 0x2DE9: 84, + 0x2DEA: 84, + 0x2DEB: 84, + 0x2DEC: 84, + 0x2DED: 84, + 0x2DEE: 84, + 0x2DEF: 84, + 0x2DF0: 84, + 0x2DF1: 84, + 0x2DF2: 84, + 0x2DF3: 84, + 0x2DF4: 84, + 0x2DF5: 84, + 0x2DF6: 84, + 0x2DF7: 84, + 0x2DF8: 84, + 0x2DF9: 84, + 0x2DFA: 84, + 0x2DFB: 84, + 0x2DFC: 84, + 0x2DFD: 84, + 0x2DFE: 84, + 0x2DFF: 84, + 0x302A: 84, + 0x302B: 84, + 0x302C: 84, + 0x302D: 84, + 0x3099: 84, + 0x309A: 84, + 0xA66F: 84, + 0xA670: 84, + 0xA671: 84, + 0xA672: 84, + 0xA674: 84, + 0xA675: 84, + 0xA676: 84, + 0xA677: 84, + 0xA678: 84, + 0xA679: 84, + 0xA67A: 84, + 0xA67B: 84, + 0xA67C: 84, + 0xA67D: 84, + 0xA69E: 84, + 0xA69F: 84, + 0xA6F0: 84, + 0xA6F1: 84, + 0xA802: 84, + 0xA806: 84, + 0xA80B: 84, + 0xA825: 84, + 0xA826: 84, + 0xA82C: 84, + 0xA840: 68, + 0xA841: 68, + 0xA842: 68, + 0xA843: 68, + 0xA844: 68, + 0xA845: 68, + 0xA846: 68, + 0xA847: 68, + 0xA848: 68, + 0xA849: 68, + 0xA84A: 68, + 0xA84B: 68, + 0xA84C: 68, + 0xA84D: 68, + 0xA84E: 68, + 0xA84F: 68, + 0xA850: 68, + 0xA851: 68, + 0xA852: 68, + 0xA853: 68, + 0xA854: 68, + 0xA855: 68, + 0xA856: 68, + 0xA857: 68, + 0xA858: 68, + 0xA859: 68, + 0xA85A: 68, + 0xA85B: 68, + 0xA85C: 68, + 0xA85D: 68, + 0xA85E: 68, + 0xA85F: 68, + 0xA860: 68, + 0xA861: 68, + 0xA862: 68, + 0xA863: 68, + 0xA864: 68, + 0xA865: 68, + 0xA866: 68, + 0xA867: 68, + 0xA868: 68, + 0xA869: 68, + 0xA86A: 68, + 0xA86B: 68, + 0xA86C: 68, + 0xA86D: 68, + 0xA86E: 68, + 0xA86F: 68, + 0xA870: 68, + 0xA871: 68, + 0xA872: 76, + 0xA8C4: 84, + 0xA8C5: 84, + 0xA8E0: 84, + 0xA8E1: 84, + 0xA8E2: 84, + 0xA8E3: 84, + 0xA8E4: 84, + 0xA8E5: 84, + 0xA8E6: 84, + 0xA8E7: 84, + 0xA8E8: 84, + 0xA8E9: 84, + 0xA8EA: 84, + 0xA8EB: 84, + 0xA8EC: 84, + 0xA8ED: 84, + 0xA8EE: 84, + 0xA8EF: 84, + 0xA8F0: 84, + 0xA8F1: 84, + 0xA8FF: 84, + 0xA926: 84, + 0xA927: 84, + 0xA928: 84, + 0xA929: 84, + 0xA92A: 84, + 0xA92B: 84, + 0xA92C: 84, + 0xA92D: 84, + 0xA947: 84, + 0xA948: 84, + 0xA949: 84, + 0xA94A: 84, + 0xA94B: 84, + 0xA94C: 84, + 0xA94D: 84, + 0xA94E: 84, + 0xA94F: 84, + 0xA950: 84, + 0xA951: 84, + 0xA980: 84, + 0xA981: 84, + 0xA982: 84, + 0xA9B3: 84, + 0xA9B6: 84, + 0xA9B7: 84, + 0xA9B8: 84, + 0xA9B9: 84, + 0xA9BC: 84, + 0xA9BD: 84, + 0xA9E5: 84, + 0xAA29: 84, + 0xAA2A: 84, + 0xAA2B: 84, + 0xAA2C: 84, + 0xAA2D: 84, + 0xAA2E: 84, + 0xAA31: 84, + 0xAA32: 84, + 0xAA35: 84, + 0xAA36: 84, + 0xAA43: 84, + 0xAA4C: 84, + 0xAA7C: 84, + 0xAAB0: 84, + 0xAAB2: 84, + 0xAAB3: 84, + 0xAAB4: 84, + 0xAAB7: 84, + 0xAAB8: 84, + 0xAABE: 84, + 0xAABF: 84, + 0xAAC1: 84, + 0xAAEC: 84, + 0xAAED: 84, + 0xAAF6: 84, + 0xABE5: 84, + 0xABE8: 84, + 0xABED: 84, + 0xFB1E: 84, + 0xFE00: 84, + 0xFE01: 84, + 0xFE02: 84, + 0xFE03: 84, + 0xFE04: 84, + 0xFE05: 84, + 0xFE06: 84, + 0xFE07: 84, + 0xFE08: 84, + 0xFE09: 84, + 0xFE0A: 84, + 0xFE0B: 84, + 0xFE0C: 84, + 0xFE0D: 84, + 0xFE0E: 84, + 0xFE0F: 84, + 0xFE20: 84, + 0xFE21: 84, + 0xFE22: 84, + 0xFE23: 84, + 0xFE24: 84, + 0xFE25: 84, + 0xFE26: 84, + 0xFE27: 84, + 0xFE28: 84, + 0xFE29: 84, + 0xFE2A: 84, + 0xFE2B: 84, + 0xFE2C: 84, + 0xFE2D: 84, + 0xFE2E: 84, + 0xFE2F: 84, + 0xFEFF: 84, + 0xFFF9: 84, + 0xFFFA: 84, + 0xFFFB: 84, + 0x101FD: 84, + 0x102E0: 84, + 0x10376: 84, + 0x10377: 84, + 0x10378: 84, + 0x10379: 84, + 0x1037A: 84, + 0x10A01: 84, + 0x10A02: 84, + 0x10A03: 84, + 0x10A05: 84, + 0x10A06: 84, + 0x10A0C: 84, + 0x10A0D: 84, + 0x10A0E: 84, + 0x10A0F: 84, + 0x10A38: 84, + 0x10A39: 84, + 0x10A3A: 84, + 0x10A3F: 84, + 0x10AC0: 68, + 0x10AC1: 68, + 0x10AC2: 68, + 0x10AC3: 68, + 0x10AC4: 68, + 0x10AC5: 82, + 0x10AC7: 82, + 0x10AC9: 82, + 0x10ACA: 82, + 0x10ACD: 76, + 0x10ACE: 82, + 0x10ACF: 82, + 0x10AD0: 82, + 0x10AD1: 82, + 0x10AD2: 82, + 0x10AD3: 68, + 0x10AD4: 68, + 0x10AD5: 68, + 0x10AD6: 68, + 0x10AD7: 76, + 0x10AD8: 68, + 0x10AD9: 68, + 0x10ADA: 68, + 0x10ADB: 68, + 0x10ADC: 68, + 0x10ADD: 82, + 0x10ADE: 68, + 0x10ADF: 68, + 0x10AE0: 68, + 0x10AE1: 82, + 0x10AE4: 82, + 0x10AE5: 84, + 0x10AE6: 84, + 0x10AEB: 68, + 0x10AEC: 68, + 0x10AED: 68, + 0x10AEE: 68, + 0x10AEF: 82, + 0x10B80: 68, + 0x10B81: 82, + 0x10B82: 68, + 0x10B83: 82, + 0x10B84: 82, + 0x10B85: 82, + 0x10B86: 68, + 0x10B87: 68, + 0x10B88: 68, + 0x10B89: 82, + 0x10B8A: 68, + 0x10B8B: 68, + 0x10B8C: 82, + 0x10B8D: 68, + 0x10B8E: 82, + 0x10B8F: 82, + 0x10B90: 68, + 0x10B91: 82, + 0x10BA9: 82, + 0x10BAA: 82, + 0x10BAB: 82, + 0x10BAC: 82, + 0x10BAD: 68, + 0x10BAE: 68, + 0x10D00: 76, + 0x10D01: 68, + 0x10D02: 68, + 0x10D03: 68, + 0x10D04: 68, + 0x10D05: 68, + 0x10D06: 68, + 0x10D07: 68, + 0x10D08: 68, + 0x10D09: 68, + 0x10D0A: 68, + 0x10D0B: 68, + 0x10D0C: 68, + 0x10D0D: 68, + 0x10D0E: 68, + 0x10D0F: 68, + 0x10D10: 68, + 0x10D11: 68, + 0x10D12: 68, + 0x10D13: 68, + 0x10D14: 68, + 0x10D15: 68, + 0x10D16: 68, + 0x10D17: 68, + 0x10D18: 68, + 0x10D19: 68, + 0x10D1A: 68, + 0x10D1B: 68, + 0x10D1C: 68, + 0x10D1D: 68, + 0x10D1E: 68, + 0x10D1F: 68, + 0x10D20: 68, + 0x10D21: 68, + 0x10D22: 82, + 0x10D23: 68, + 0x10D24: 84, + 0x10D25: 84, + 0x10D26: 84, + 0x10D27: 84, + 0x10EAB: 84, + 0x10EAC: 84, + 0x10EFD: 84, + 0x10EFE: 84, + 0x10EFF: 84, + 0x10F30: 68, + 0x10F31: 68, + 0x10F32: 68, + 0x10F33: 82, + 0x10F34: 68, + 0x10F35: 68, + 0x10F36: 68, + 0x10F37: 68, + 0x10F38: 68, + 0x10F39: 68, + 0x10F3A: 68, + 0x10F3B: 68, + 0x10F3C: 68, + 0x10F3D: 68, + 0x10F3E: 68, + 0x10F3F: 68, + 0x10F40: 68, + 0x10F41: 68, + 0x10F42: 68, + 0x10F43: 68, + 0x10F44: 68, + 0x10F46: 84, + 0x10F47: 84, + 0x10F48: 84, + 0x10F49: 84, + 0x10F4A: 84, + 0x10F4B: 84, + 0x10F4C: 84, + 0x10F4D: 84, + 0x10F4E: 84, + 0x10F4F: 84, + 0x10F50: 84, + 0x10F51: 68, + 0x10F52: 68, + 0x10F53: 68, + 0x10F54: 82, + 0x10F70: 68, + 0x10F71: 68, + 0x10F72: 68, + 0x10F73: 68, + 0x10F74: 82, + 0x10F75: 82, + 0x10F76: 68, + 0x10F77: 68, + 0x10F78: 68, + 0x10F79: 68, + 0x10F7A: 68, + 0x10F7B: 68, + 0x10F7C: 68, + 0x10F7D: 68, + 0x10F7E: 68, + 0x10F7F: 68, + 0x10F80: 68, + 0x10F81: 68, + 0x10F82: 84, + 0x10F83: 84, + 0x10F84: 84, + 0x10F85: 84, + 0x10FB0: 68, + 0x10FB2: 68, + 0x10FB3: 68, + 0x10FB4: 82, + 0x10FB5: 82, + 0x10FB6: 82, + 0x10FB8: 68, + 0x10FB9: 82, + 0x10FBA: 82, + 0x10FBB: 68, + 0x10FBC: 68, + 0x10FBD: 82, + 0x10FBE: 68, + 0x10FBF: 68, + 0x10FC1: 68, + 0x10FC2: 82, + 0x10FC3: 82, + 0x10FC4: 68, + 0x10FC9: 82, + 0x10FCA: 68, + 0x10FCB: 76, + 0x11001: 84, + 0x11038: 84, + 0x11039: 84, + 0x1103A: 84, + 0x1103B: 84, + 0x1103C: 84, + 0x1103D: 84, + 0x1103E: 84, + 0x1103F: 84, + 0x11040: 84, + 0x11041: 84, + 0x11042: 84, + 0x11043: 84, + 0x11044: 84, + 0x11045: 84, + 0x11046: 84, + 0x11070: 84, + 0x11073: 84, + 0x11074: 84, + 0x1107F: 84, + 0x11080: 84, + 0x11081: 84, + 0x110B3: 84, + 0x110B4: 84, + 0x110B5: 84, + 0x110B6: 84, + 0x110B9: 84, + 0x110BA: 84, + 0x110C2: 84, + 0x11100: 84, + 0x11101: 84, + 0x11102: 84, + 0x11127: 84, + 0x11128: 84, + 0x11129: 84, + 0x1112A: 84, + 0x1112B: 84, + 0x1112D: 84, + 0x1112E: 84, + 0x1112F: 84, + 0x11130: 84, + 0x11131: 84, + 0x11132: 84, + 0x11133: 84, + 0x11134: 84, + 0x11173: 84, + 0x11180: 84, + 0x11181: 84, + 0x111B6: 84, + 0x111B7: 84, + 0x111B8: 84, + 0x111B9: 84, + 0x111BA: 84, + 0x111BB: 84, + 0x111BC: 84, + 0x111BD: 84, + 0x111BE: 84, + 0x111C9: 84, + 0x111CA: 84, + 0x111CB: 84, + 0x111CC: 84, + 0x111CF: 84, + 0x1122F: 84, + 0x11230: 84, + 0x11231: 84, + 0x11234: 84, + 0x11236: 84, + 0x11237: 84, + 0x1123E: 84, + 0x11241: 84, + 0x112DF: 84, + 0x112E3: 84, + 0x112E4: 84, + 0x112E5: 84, + 0x112E6: 84, + 0x112E7: 84, + 0x112E8: 84, + 0x112E9: 84, + 0x112EA: 84, + 0x11300: 84, + 0x11301: 84, + 0x1133B: 84, + 0x1133C: 84, + 0x11340: 84, + 0x11366: 84, + 0x11367: 84, + 0x11368: 84, + 0x11369: 84, + 0x1136A: 84, + 0x1136B: 84, + 0x1136C: 84, + 0x11370: 84, + 0x11371: 84, + 0x11372: 84, + 0x11373: 84, + 0x11374: 84, + 0x11438: 84, + 0x11439: 84, + 0x1143A: 84, + 0x1143B: 84, + 0x1143C: 84, + 0x1143D: 84, + 0x1143E: 84, + 0x1143F: 84, + 0x11442: 84, + 0x11443: 84, + 0x11444: 84, + 0x11446: 84, + 0x1145E: 84, + 0x114B3: 84, + 0x114B4: 84, + 0x114B5: 84, + 0x114B6: 84, + 0x114B7: 84, + 0x114B8: 84, + 0x114BA: 84, + 0x114BF: 84, + 0x114C0: 84, + 0x114C2: 84, + 0x114C3: 84, + 0x115B2: 84, + 0x115B3: 84, + 0x115B4: 84, + 0x115B5: 84, + 0x115BC: 84, + 0x115BD: 84, + 0x115BF: 84, + 0x115C0: 84, + 0x115DC: 84, + 0x115DD: 84, + 0x11633: 84, + 0x11634: 84, + 0x11635: 84, + 0x11636: 84, + 0x11637: 84, + 0x11638: 84, + 0x11639: 84, + 0x1163A: 84, + 0x1163D: 84, + 0x1163F: 84, + 0x11640: 84, + 0x116AB: 84, + 0x116AD: 84, + 0x116B0: 84, + 0x116B1: 84, + 0x116B2: 84, + 0x116B3: 84, + 0x116B4: 84, + 0x116B5: 84, + 0x116B7: 84, + 0x1171D: 84, + 0x1171E: 84, + 0x1171F: 84, + 0x11722: 84, + 0x11723: 84, + 0x11724: 84, + 0x11725: 84, + 0x11727: 84, + 0x11728: 84, + 0x11729: 84, + 0x1172A: 84, + 0x1172B: 84, + 0x1182F: 84, + 0x11830: 84, + 0x11831: 84, + 0x11832: 84, + 0x11833: 84, + 0x11834: 84, + 0x11835: 84, + 0x11836: 84, + 0x11837: 84, + 0x11839: 84, + 0x1183A: 84, + 0x1193B: 84, + 0x1193C: 84, + 0x1193E: 84, + 0x11943: 84, + 0x119D4: 84, + 0x119D5: 84, + 0x119D6: 84, + 0x119D7: 84, + 0x119DA: 84, + 0x119DB: 84, + 0x119E0: 84, + 0x11A01: 84, + 0x11A02: 84, + 0x11A03: 84, + 0x11A04: 84, + 0x11A05: 84, + 0x11A06: 84, + 0x11A07: 84, + 0x11A08: 84, + 0x11A09: 84, + 0x11A0A: 84, + 0x11A33: 84, + 0x11A34: 84, + 0x11A35: 84, + 0x11A36: 84, + 0x11A37: 84, + 0x11A38: 84, + 0x11A3B: 84, + 0x11A3C: 84, + 0x11A3D: 84, + 0x11A3E: 84, + 0x11A47: 84, + 0x11A51: 84, + 0x11A52: 84, + 0x11A53: 84, + 0x11A54: 84, + 0x11A55: 84, + 0x11A56: 84, + 0x11A59: 84, + 0x11A5A: 84, + 0x11A5B: 84, + 0x11A8A: 84, + 0x11A8B: 84, + 0x11A8C: 84, + 0x11A8D: 84, + 0x11A8E: 84, + 0x11A8F: 84, + 0x11A90: 84, + 0x11A91: 84, + 0x11A92: 84, + 0x11A93: 84, + 0x11A94: 84, + 0x11A95: 84, + 0x11A96: 84, + 0x11A98: 84, + 0x11A99: 84, + 0x11C30: 84, + 0x11C31: 84, + 0x11C32: 84, + 0x11C33: 84, + 0x11C34: 84, + 0x11C35: 84, + 0x11C36: 84, + 0x11C38: 84, + 0x11C39: 84, + 0x11C3A: 84, + 0x11C3B: 84, + 0x11C3C: 84, + 0x11C3D: 84, + 0x11C3F: 84, + 0x11C92: 84, + 0x11C93: 84, + 0x11C94: 84, + 0x11C95: 84, + 0x11C96: 84, + 0x11C97: 84, + 0x11C98: 84, + 0x11C99: 84, + 0x11C9A: 84, + 0x11C9B: 84, + 0x11C9C: 84, + 0x11C9D: 84, + 0x11C9E: 84, + 0x11C9F: 84, + 0x11CA0: 84, + 0x11CA1: 84, + 0x11CA2: 84, + 0x11CA3: 84, + 0x11CA4: 84, + 0x11CA5: 84, + 0x11CA6: 84, + 0x11CA7: 84, + 0x11CAA: 84, + 0x11CAB: 84, + 0x11CAC: 84, + 0x11CAD: 84, + 0x11CAE: 84, + 0x11CAF: 84, + 0x11CB0: 84, + 0x11CB2: 84, + 0x11CB3: 84, + 0x11CB5: 84, + 0x11CB6: 84, + 0x11D31: 84, + 0x11D32: 84, + 0x11D33: 84, + 0x11D34: 84, + 0x11D35: 84, + 0x11D36: 84, + 0x11D3A: 84, + 0x11D3C: 84, + 0x11D3D: 84, + 0x11D3F: 84, + 0x11D40: 84, + 0x11D41: 84, + 0x11D42: 84, + 0x11D43: 84, + 0x11D44: 84, + 0x11D45: 84, + 0x11D47: 84, + 0x11D90: 84, + 0x11D91: 84, + 0x11D95: 84, + 0x11D97: 84, + 0x11EF3: 84, + 0x11EF4: 84, + 0x11F00: 84, + 0x11F01: 84, + 0x11F36: 84, + 0x11F37: 84, + 0x11F38: 84, + 0x11F39: 84, + 0x11F3A: 84, + 0x11F40: 84, + 0x11F42: 84, + 0x13430: 84, + 0x13431: 84, + 0x13432: 84, + 0x13433: 84, + 0x13434: 84, + 0x13435: 84, + 0x13436: 84, + 0x13437: 84, + 0x13438: 84, + 0x13439: 84, + 0x1343A: 84, + 0x1343B: 84, + 0x1343C: 84, + 0x1343D: 84, + 0x1343E: 84, + 0x1343F: 84, + 0x13440: 84, + 0x13447: 84, + 0x13448: 84, + 0x13449: 84, + 0x1344A: 84, + 0x1344B: 84, + 0x1344C: 84, + 0x1344D: 84, + 0x1344E: 84, + 0x1344F: 84, + 0x13450: 84, + 0x13451: 84, + 0x13452: 84, + 0x13453: 84, + 0x13454: 84, + 0x13455: 84, + 0x16AF0: 84, + 0x16AF1: 84, + 0x16AF2: 84, + 0x16AF3: 84, + 0x16AF4: 84, + 0x16B30: 84, + 0x16B31: 84, + 0x16B32: 84, + 0x16B33: 84, + 0x16B34: 84, + 0x16B35: 84, + 0x16B36: 84, + 0x16F4F: 84, + 0x16F8F: 84, + 0x16F90: 84, + 0x16F91: 84, + 0x16F92: 84, + 0x16FE4: 84, + 0x1BC9D: 84, + 0x1BC9E: 84, + 0x1BCA0: 84, + 0x1BCA1: 84, + 0x1BCA2: 84, + 0x1BCA3: 84, + 0x1CF00: 84, + 0x1CF01: 84, + 0x1CF02: 84, + 0x1CF03: 84, + 0x1CF04: 84, + 0x1CF05: 84, + 0x1CF06: 84, + 0x1CF07: 84, + 0x1CF08: 84, + 0x1CF09: 84, + 0x1CF0A: 84, + 0x1CF0B: 84, + 0x1CF0C: 84, + 0x1CF0D: 84, + 0x1CF0E: 84, + 0x1CF0F: 84, + 0x1CF10: 84, + 0x1CF11: 84, + 0x1CF12: 84, + 0x1CF13: 84, + 0x1CF14: 84, + 0x1CF15: 84, + 0x1CF16: 84, + 0x1CF17: 84, + 0x1CF18: 84, + 0x1CF19: 84, + 0x1CF1A: 84, + 0x1CF1B: 84, + 0x1CF1C: 84, + 0x1CF1D: 84, + 0x1CF1E: 84, + 0x1CF1F: 84, + 0x1CF20: 84, + 0x1CF21: 84, + 0x1CF22: 84, + 0x1CF23: 84, + 0x1CF24: 84, + 0x1CF25: 84, + 0x1CF26: 84, + 0x1CF27: 84, + 0x1CF28: 84, + 0x1CF29: 84, + 0x1CF2A: 84, + 0x1CF2B: 84, + 0x1CF2C: 84, + 0x1CF2D: 84, + 0x1CF30: 84, + 0x1CF31: 84, + 0x1CF32: 84, + 0x1CF33: 84, + 0x1CF34: 84, + 0x1CF35: 84, + 0x1CF36: 84, + 0x1CF37: 84, + 0x1CF38: 84, + 0x1CF39: 84, + 0x1CF3A: 84, + 0x1CF3B: 84, + 0x1CF3C: 84, + 0x1CF3D: 84, + 0x1CF3E: 84, + 0x1CF3F: 84, + 0x1CF40: 84, + 0x1CF41: 84, + 0x1CF42: 84, + 0x1CF43: 84, + 0x1CF44: 84, + 0x1CF45: 84, + 0x1CF46: 84, + 0x1D167: 84, + 0x1D168: 84, + 0x1D169: 84, + 0x1D173: 84, + 0x1D174: 84, + 0x1D175: 84, + 0x1D176: 84, + 0x1D177: 84, + 0x1D178: 84, + 0x1D179: 84, + 0x1D17A: 84, + 0x1D17B: 84, + 0x1D17C: 84, + 0x1D17D: 84, + 0x1D17E: 84, + 0x1D17F: 84, + 0x1D180: 84, + 0x1D181: 84, + 0x1D182: 84, + 0x1D185: 84, + 0x1D186: 84, + 0x1D187: 84, + 0x1D188: 84, + 0x1D189: 84, + 0x1D18A: 84, + 0x1D18B: 84, + 0x1D1AA: 84, + 0x1D1AB: 84, + 0x1D1AC: 84, + 0x1D1AD: 84, + 0x1D242: 84, + 0x1D243: 84, + 0x1D244: 84, + 0x1DA00: 84, + 0x1DA01: 84, + 0x1DA02: 84, + 0x1DA03: 84, + 0x1DA04: 84, + 0x1DA05: 84, + 0x1DA06: 84, + 0x1DA07: 84, + 0x1DA08: 84, + 0x1DA09: 84, + 0x1DA0A: 84, + 0x1DA0B: 84, + 0x1DA0C: 84, + 0x1DA0D: 84, + 0x1DA0E: 84, + 0x1DA0F: 84, + 0x1DA10: 84, + 0x1DA11: 84, + 0x1DA12: 84, + 0x1DA13: 84, + 0x1DA14: 84, + 0x1DA15: 84, + 0x1DA16: 84, + 0x1DA17: 84, + 0x1DA18: 84, + 0x1DA19: 84, + 0x1DA1A: 84, + 0x1DA1B: 84, + 0x1DA1C: 84, + 0x1DA1D: 84, + 0x1DA1E: 84, + 0x1DA1F: 84, + 0x1DA20: 84, + 0x1DA21: 84, + 0x1DA22: 84, + 0x1DA23: 84, + 0x1DA24: 84, + 0x1DA25: 84, + 0x1DA26: 84, + 0x1DA27: 84, + 0x1DA28: 84, + 0x1DA29: 84, + 0x1DA2A: 84, + 0x1DA2B: 84, + 0x1DA2C: 84, + 0x1DA2D: 84, + 0x1DA2E: 84, + 0x1DA2F: 84, + 0x1DA30: 84, + 0x1DA31: 84, + 0x1DA32: 84, + 0x1DA33: 84, + 0x1DA34: 84, + 0x1DA35: 84, + 0x1DA36: 84, + 0x1DA3B: 84, + 0x1DA3C: 84, + 0x1DA3D: 84, + 0x1DA3E: 84, + 0x1DA3F: 84, + 0x1DA40: 84, + 0x1DA41: 84, + 0x1DA42: 84, + 0x1DA43: 84, + 0x1DA44: 84, + 0x1DA45: 84, + 0x1DA46: 84, + 0x1DA47: 84, + 0x1DA48: 84, + 0x1DA49: 84, + 0x1DA4A: 84, + 0x1DA4B: 84, + 0x1DA4C: 84, + 0x1DA4D: 84, + 0x1DA4E: 84, + 0x1DA4F: 84, + 0x1DA50: 84, + 0x1DA51: 84, + 0x1DA52: 84, + 0x1DA53: 84, + 0x1DA54: 84, + 0x1DA55: 84, + 0x1DA56: 84, + 0x1DA57: 84, + 0x1DA58: 84, + 0x1DA59: 84, + 0x1DA5A: 84, + 0x1DA5B: 84, + 0x1DA5C: 84, + 0x1DA5D: 84, + 0x1DA5E: 84, + 0x1DA5F: 84, + 0x1DA60: 84, + 0x1DA61: 84, + 0x1DA62: 84, + 0x1DA63: 84, + 0x1DA64: 84, + 0x1DA65: 84, + 0x1DA66: 84, + 0x1DA67: 84, + 0x1DA68: 84, + 0x1DA69: 84, + 0x1DA6A: 84, + 0x1DA6B: 84, + 0x1DA6C: 84, + 0x1DA75: 84, + 0x1DA84: 84, + 0x1DA9B: 84, + 0x1DA9C: 84, + 0x1DA9D: 84, + 0x1DA9E: 84, + 0x1DA9F: 84, + 0x1DAA1: 84, + 0x1DAA2: 84, + 0x1DAA3: 84, + 0x1DAA4: 84, + 0x1DAA5: 84, + 0x1DAA6: 84, + 0x1DAA7: 84, + 0x1DAA8: 84, + 0x1DAA9: 84, + 0x1DAAA: 84, + 0x1DAAB: 84, + 0x1DAAC: 84, + 0x1DAAD: 84, + 0x1DAAE: 84, + 0x1DAAF: 84, + 0x1E000: 84, + 0x1E001: 84, + 0x1E002: 84, + 0x1E003: 84, + 0x1E004: 84, + 0x1E005: 84, + 0x1E006: 84, + 0x1E008: 84, + 0x1E009: 84, + 0x1E00A: 84, + 0x1E00B: 84, + 0x1E00C: 84, + 0x1E00D: 84, + 0x1E00E: 84, + 0x1E00F: 84, + 0x1E010: 84, + 0x1E011: 84, + 0x1E012: 84, + 0x1E013: 84, + 0x1E014: 84, + 0x1E015: 84, + 0x1E016: 84, + 0x1E017: 84, + 0x1E018: 84, + 0x1E01B: 84, + 0x1E01C: 84, + 0x1E01D: 84, + 0x1E01E: 84, + 0x1E01F: 84, + 0x1E020: 84, + 0x1E021: 84, + 0x1E023: 84, + 0x1E024: 84, + 0x1E026: 84, + 0x1E027: 84, + 0x1E028: 84, + 0x1E029: 84, + 0x1E02A: 84, + 0x1E08F: 84, + 0x1E130: 84, + 0x1E131: 84, + 0x1E132: 84, + 0x1E133: 84, + 0x1E134: 84, + 0x1E135: 84, + 0x1E136: 84, + 0x1E2AE: 84, + 0x1E2EC: 84, + 0x1E2ED: 84, + 0x1E2EE: 84, + 0x1E2EF: 84, + 0x1E4EC: 84, + 0x1E4ED: 84, + 0x1E4EE: 84, + 0x1E4EF: 84, + 0x1E8D0: 84, + 0x1E8D1: 84, + 0x1E8D2: 84, + 0x1E8D3: 84, + 0x1E8D4: 84, + 0x1E8D5: 84, + 0x1E8D6: 84, + 0x1E900: 68, + 0x1E901: 68, + 0x1E902: 68, + 0x1E903: 68, + 0x1E904: 68, + 0x1E905: 68, + 0x1E906: 68, + 0x1E907: 68, + 0x1E908: 68, + 0x1E909: 68, + 0x1E90A: 68, + 0x1E90B: 68, + 0x1E90C: 68, + 0x1E90D: 68, + 0x1E90E: 68, + 0x1E90F: 68, + 0x1E910: 68, + 0x1E911: 68, + 0x1E912: 68, + 0x1E913: 68, + 0x1E914: 68, + 0x1E915: 68, + 0x1E916: 68, + 0x1E917: 68, + 0x1E918: 68, + 0x1E919: 68, + 0x1E91A: 68, + 0x1E91B: 68, + 0x1E91C: 68, + 0x1E91D: 68, + 0x1E91E: 68, + 0x1E91F: 68, + 0x1E920: 68, + 0x1E921: 68, + 0x1E922: 68, + 0x1E923: 68, + 0x1E924: 68, + 0x1E925: 68, + 0x1E926: 68, + 0x1E927: 68, + 0x1E928: 68, + 0x1E929: 68, + 0x1E92A: 68, + 0x1E92B: 68, + 0x1E92C: 68, + 0x1E92D: 68, + 0x1E92E: 68, + 0x1E92F: 68, + 0x1E930: 68, + 0x1E931: 68, + 0x1E932: 68, + 0x1E933: 68, + 0x1E934: 68, + 0x1E935: 68, + 0x1E936: 68, + 0x1E937: 68, + 0x1E938: 68, + 0x1E939: 68, + 0x1E93A: 68, + 0x1E93B: 68, + 0x1E93C: 68, + 0x1E93D: 68, + 0x1E93E: 68, + 0x1E93F: 68, + 0x1E940: 68, + 0x1E941: 68, + 0x1E942: 68, + 0x1E943: 68, + 0x1E944: 84, + 0x1E945: 84, + 0x1E946: 84, + 0x1E947: 84, + 0x1E948: 84, + 0x1E949: 84, + 0x1E94A: 84, + 0x1E94B: 84, + 0xE0001: 84, + 0xE0020: 84, + 0xE0021: 84, + 0xE0022: 84, + 0xE0023: 84, + 0xE0024: 84, + 0xE0025: 84, + 0xE0026: 84, + 0xE0027: 84, + 0xE0028: 84, + 0xE0029: 84, + 0xE002A: 84, + 0xE002B: 84, + 0xE002C: 84, + 0xE002D: 84, + 0xE002E: 84, + 0xE002F: 84, + 0xE0030: 84, + 0xE0031: 84, + 0xE0032: 84, + 0xE0033: 84, + 0xE0034: 84, + 0xE0035: 84, + 0xE0036: 84, + 0xE0037: 84, + 0xE0038: 84, + 0xE0039: 84, + 0xE003A: 84, + 0xE003B: 84, + 0xE003C: 84, + 0xE003D: 84, + 0xE003E: 84, + 0xE003F: 84, + 0xE0040: 84, + 0xE0041: 84, + 0xE0042: 84, + 0xE0043: 84, + 0xE0044: 84, + 0xE0045: 84, + 0xE0046: 84, + 0xE0047: 84, + 0xE0048: 84, + 0xE0049: 84, + 0xE004A: 84, + 0xE004B: 84, + 0xE004C: 84, + 0xE004D: 84, + 0xE004E: 84, + 0xE004F: 84, + 0xE0050: 84, + 0xE0051: 84, + 0xE0052: 84, + 0xE0053: 84, + 0xE0054: 84, + 0xE0055: 84, + 0xE0056: 84, + 0xE0057: 84, + 0xE0058: 84, + 0xE0059: 84, + 0xE005A: 84, + 0xE005B: 84, + 0xE005C: 84, + 0xE005D: 84, + 0xE005E: 84, + 0xE005F: 84, + 0xE0060: 84, + 0xE0061: 84, + 0xE0062: 84, + 0xE0063: 84, + 0xE0064: 84, + 0xE0065: 84, + 0xE0066: 84, + 0xE0067: 84, + 0xE0068: 84, + 0xE0069: 84, + 0xE006A: 84, + 0xE006B: 84, + 0xE006C: 84, + 0xE006D: 84, + 0xE006E: 84, + 0xE006F: 84, + 0xE0070: 84, + 0xE0071: 84, + 0xE0072: 84, + 0xE0073: 84, + 0xE0074: 84, + 0xE0075: 84, + 0xE0076: 84, + 0xE0077: 84, + 0xE0078: 84, + 0xE0079: 84, + 0xE007A: 84, + 0xE007B: 84, + 0xE007C: 84, + 0xE007D: 84, + 0xE007E: 84, + 0xE007F: 84, + 0xE0100: 84, + 0xE0101: 84, + 0xE0102: 84, + 0xE0103: 84, + 0xE0104: 84, + 0xE0105: 84, + 0xE0106: 84, + 0xE0107: 84, + 0xE0108: 84, + 0xE0109: 84, + 0xE010A: 84, + 0xE010B: 84, + 0xE010C: 84, + 0xE010D: 84, + 0xE010E: 84, + 0xE010F: 84, + 0xE0110: 84, + 0xE0111: 84, + 0xE0112: 84, + 0xE0113: 84, + 0xE0114: 84, + 0xE0115: 84, + 0xE0116: 84, + 0xE0117: 84, + 0xE0118: 84, + 0xE0119: 84, + 0xE011A: 84, + 0xE011B: 84, + 0xE011C: 84, + 0xE011D: 84, + 0xE011E: 84, + 0xE011F: 84, + 0xE0120: 84, + 0xE0121: 84, + 0xE0122: 84, + 0xE0123: 84, + 0xE0124: 84, + 0xE0125: 84, + 0xE0126: 84, + 0xE0127: 84, + 0xE0128: 84, + 0xE0129: 84, + 0xE012A: 84, + 0xE012B: 84, + 0xE012C: 84, + 0xE012D: 84, + 0xE012E: 84, + 0xE012F: 84, + 0xE0130: 84, + 0xE0131: 84, + 0xE0132: 84, + 0xE0133: 84, + 0xE0134: 84, + 0xE0135: 84, + 0xE0136: 84, + 0xE0137: 84, + 0xE0138: 84, + 0xE0139: 84, + 0xE013A: 84, + 0xE013B: 84, + 0xE013C: 84, + 0xE013D: 84, + 0xE013E: 84, + 0xE013F: 84, + 0xE0140: 84, + 0xE0141: 84, + 0xE0142: 84, + 0xE0143: 84, + 0xE0144: 84, + 0xE0145: 84, + 0xE0146: 84, + 0xE0147: 84, + 0xE0148: 84, + 0xE0149: 84, + 0xE014A: 84, + 0xE014B: 84, + 0xE014C: 84, + 0xE014D: 84, + 0xE014E: 84, + 0xE014F: 84, + 0xE0150: 84, + 0xE0151: 84, + 0xE0152: 84, + 0xE0153: 84, + 0xE0154: 84, + 0xE0155: 84, + 0xE0156: 84, + 0xE0157: 84, + 0xE0158: 84, + 0xE0159: 84, + 0xE015A: 84, + 0xE015B: 84, + 0xE015C: 84, + 0xE015D: 84, + 0xE015E: 84, + 0xE015F: 84, + 0xE0160: 84, + 0xE0161: 84, + 0xE0162: 84, + 0xE0163: 84, + 0xE0164: 84, + 0xE0165: 84, + 0xE0166: 84, + 0xE0167: 84, + 0xE0168: 84, + 0xE0169: 84, + 0xE016A: 84, + 0xE016B: 84, + 0xE016C: 84, + 0xE016D: 84, + 0xE016E: 84, + 0xE016F: 84, + 0xE0170: 84, + 0xE0171: 84, + 0xE0172: 84, + 0xE0173: 84, + 0xE0174: 84, + 0xE0175: 84, + 0xE0176: 84, + 0xE0177: 84, + 0xE0178: 84, + 0xE0179: 84, + 0xE017A: 84, + 0xE017B: 84, + 0xE017C: 84, + 0xE017D: 84, + 0xE017E: 84, + 0xE017F: 84, + 0xE0180: 84, + 0xE0181: 84, + 0xE0182: 84, + 0xE0183: 84, + 0xE0184: 84, + 0xE0185: 84, + 0xE0186: 84, + 0xE0187: 84, + 0xE0188: 84, + 0xE0189: 84, + 0xE018A: 84, + 0xE018B: 84, + 0xE018C: 84, + 0xE018D: 84, + 0xE018E: 84, + 0xE018F: 84, + 0xE0190: 84, + 0xE0191: 84, + 0xE0192: 84, + 0xE0193: 84, + 0xE0194: 84, + 0xE0195: 84, + 0xE0196: 84, + 0xE0197: 84, + 0xE0198: 84, + 0xE0199: 84, + 0xE019A: 84, + 0xE019B: 84, + 0xE019C: 84, + 0xE019D: 84, + 0xE019E: 84, + 0xE019F: 84, + 0xE01A0: 84, + 0xE01A1: 84, + 0xE01A2: 84, + 0xE01A3: 84, + 0xE01A4: 84, + 0xE01A5: 84, + 0xE01A6: 84, + 0xE01A7: 84, + 0xE01A8: 84, + 0xE01A9: 84, + 0xE01AA: 84, + 0xE01AB: 84, + 0xE01AC: 84, + 0xE01AD: 84, + 0xE01AE: 84, + 0xE01AF: 84, + 0xE01B0: 84, + 0xE01B1: 84, + 0xE01B2: 84, + 0xE01B3: 84, + 0xE01B4: 84, + 0xE01B5: 84, + 0xE01B6: 84, + 0xE01B7: 84, + 0xE01B8: 84, + 0xE01B9: 84, + 0xE01BA: 84, + 0xE01BB: 84, + 0xE01BC: 84, + 0xE01BD: 84, + 0xE01BE: 84, + 0xE01BF: 84, + 0xE01C0: 84, + 0xE01C1: 84, + 0xE01C2: 84, + 0xE01C3: 84, + 0xE01C4: 84, + 0xE01C5: 84, + 0xE01C6: 84, + 0xE01C7: 84, + 0xE01C8: 84, + 0xE01C9: 84, + 0xE01CA: 84, + 0xE01CB: 84, + 0xE01CC: 84, + 0xE01CD: 84, + 0xE01CE: 84, + 0xE01CF: 84, + 0xE01D0: 84, + 0xE01D1: 84, + 0xE01D2: 84, + 0xE01D3: 84, + 0xE01D4: 84, + 0xE01D5: 84, + 0xE01D6: 84, + 0xE01D7: 84, + 0xE01D8: 84, + 0xE01D9: 84, + 0xE01DA: 84, + 0xE01DB: 84, + 0xE01DC: 84, + 0xE01DD: 84, + 0xE01DE: 84, + 0xE01DF: 84, + 0xE01E0: 84, + 0xE01E1: 84, + 0xE01E2: 84, + 0xE01E3: 84, + 0xE01E4: 84, + 0xE01E5: 84, + 0xE01E6: 84, + 0xE01E7: 84, + 0xE01E8: 84, + 0xE01E9: 84, + 0xE01EA: 84, + 0xE01EB: 84, + 0xE01EC: 84, + 0xE01ED: 84, + 0xE01EE: 84, + 0xE01EF: 84, +} +codepoint_classes = { + "PVALID": ( + 0x2D0000002E, + 0x300000003A, + 0x610000007B, + 0xDF000000F7, + 0xF800000100, + 0x10100000102, + 0x10300000104, + 0x10500000106, + 0x10700000108, + 0x1090000010A, + 0x10B0000010C, + 0x10D0000010E, + 0x10F00000110, + 0x11100000112, + 0x11300000114, + 0x11500000116, + 0x11700000118, + 0x1190000011A, + 0x11B0000011C, + 0x11D0000011E, + 0x11F00000120, + 0x12100000122, + 0x12300000124, + 0x12500000126, + 0x12700000128, + 0x1290000012A, + 0x12B0000012C, + 0x12D0000012E, + 0x12F00000130, + 0x13100000132, + 0x13500000136, + 0x13700000139, + 0x13A0000013B, + 0x13C0000013D, + 0x13E0000013F, + 0x14200000143, + 0x14400000145, + 0x14600000147, + 0x14800000149, + 0x14B0000014C, + 0x14D0000014E, + 0x14F00000150, + 0x15100000152, + 0x15300000154, + 0x15500000156, + 0x15700000158, + 0x1590000015A, + 0x15B0000015C, + 0x15D0000015E, + 0x15F00000160, + 0x16100000162, + 0x16300000164, + 0x16500000166, + 0x16700000168, + 0x1690000016A, + 0x16B0000016C, + 0x16D0000016E, + 0x16F00000170, + 0x17100000172, + 0x17300000174, + 0x17500000176, + 0x17700000178, + 0x17A0000017B, + 0x17C0000017D, + 0x17E0000017F, + 0x18000000181, + 0x18300000184, + 0x18500000186, + 0x18800000189, + 0x18C0000018E, + 0x19200000193, + 0x19500000196, + 0x1990000019C, + 0x19E0000019F, + 0x1A1000001A2, + 0x1A3000001A4, + 0x1A5000001A6, + 0x1A8000001A9, + 0x1AA000001AC, + 0x1AD000001AE, + 0x1B0000001B1, + 0x1B4000001B5, + 0x1B6000001B7, + 0x1B9000001BC, + 0x1BD000001C4, + 0x1CE000001CF, + 0x1D0000001D1, + 0x1D2000001D3, + 0x1D4000001D5, + 0x1D6000001D7, + 0x1D8000001D9, + 0x1DA000001DB, + 0x1DC000001DE, + 0x1DF000001E0, + 0x1E1000001E2, + 0x1E3000001E4, + 0x1E5000001E6, + 0x1E7000001E8, + 0x1E9000001EA, + 0x1EB000001EC, + 0x1ED000001EE, + 0x1EF000001F1, + 0x1F5000001F6, + 0x1F9000001FA, + 0x1FB000001FC, + 0x1FD000001FE, + 0x1FF00000200, + 0x20100000202, + 0x20300000204, + 0x20500000206, + 0x20700000208, + 0x2090000020A, + 0x20B0000020C, + 0x20D0000020E, + 0x20F00000210, + 0x21100000212, + 0x21300000214, + 0x21500000216, + 0x21700000218, + 0x2190000021A, + 0x21B0000021C, + 0x21D0000021E, + 0x21F00000220, + 0x22100000222, + 0x22300000224, + 0x22500000226, + 0x22700000228, + 0x2290000022A, + 0x22B0000022C, + 0x22D0000022E, + 0x22F00000230, + 0x23100000232, + 0x2330000023A, + 0x23C0000023D, + 0x23F00000241, + 0x24200000243, + 0x24700000248, + 0x2490000024A, + 0x24B0000024C, + 0x24D0000024E, + 0x24F000002B0, + 0x2B9000002C2, + 0x2C6000002D2, + 0x2EC000002ED, + 0x2EE000002EF, + 0x30000000340, + 0x34200000343, + 0x3460000034F, + 0x35000000370, + 0x37100000372, + 0x37300000374, + 0x37700000378, + 0x37B0000037E, + 0x39000000391, + 0x3AC000003CF, + 0x3D7000003D8, + 0x3D9000003DA, + 0x3DB000003DC, + 0x3DD000003DE, + 0x3DF000003E0, + 0x3E1000003E2, + 0x3E3000003E4, + 0x3E5000003E6, + 0x3E7000003E8, + 0x3E9000003EA, + 0x3EB000003EC, + 0x3ED000003EE, + 0x3EF000003F0, + 0x3F3000003F4, + 0x3F8000003F9, + 0x3FB000003FD, + 0x43000000460, + 0x46100000462, + 0x46300000464, + 0x46500000466, + 0x46700000468, + 0x4690000046A, + 0x46B0000046C, + 0x46D0000046E, + 0x46F00000470, + 0x47100000472, + 0x47300000474, + 0x47500000476, + 0x47700000478, + 0x4790000047A, + 0x47B0000047C, + 0x47D0000047E, + 0x47F00000480, + 0x48100000482, + 0x48300000488, + 0x48B0000048C, + 0x48D0000048E, + 0x48F00000490, + 0x49100000492, + 0x49300000494, + 0x49500000496, + 0x49700000498, + 0x4990000049A, + 0x49B0000049C, + 0x49D0000049E, + 0x49F000004A0, + 0x4A1000004A2, + 0x4A3000004A4, + 0x4A5000004A6, + 0x4A7000004A8, + 0x4A9000004AA, + 0x4AB000004AC, + 0x4AD000004AE, + 0x4AF000004B0, + 0x4B1000004B2, + 0x4B3000004B4, + 0x4B5000004B6, + 0x4B7000004B8, + 0x4B9000004BA, + 0x4BB000004BC, + 0x4BD000004BE, + 0x4BF000004C0, + 0x4C2000004C3, + 0x4C4000004C5, + 0x4C6000004C7, + 0x4C8000004C9, + 0x4CA000004CB, + 0x4CC000004CD, + 0x4CE000004D0, + 0x4D1000004D2, + 0x4D3000004D4, + 0x4D5000004D6, + 0x4D7000004D8, + 0x4D9000004DA, + 0x4DB000004DC, + 0x4DD000004DE, + 0x4DF000004E0, + 0x4E1000004E2, + 0x4E3000004E4, + 0x4E5000004E6, + 0x4E7000004E8, + 0x4E9000004EA, + 0x4EB000004EC, + 0x4ED000004EE, + 0x4EF000004F0, + 0x4F1000004F2, + 0x4F3000004F4, + 0x4F5000004F6, + 0x4F7000004F8, + 0x4F9000004FA, + 0x4FB000004FC, + 0x4FD000004FE, + 0x4FF00000500, + 0x50100000502, + 0x50300000504, + 0x50500000506, + 0x50700000508, + 0x5090000050A, + 0x50B0000050C, + 0x50D0000050E, + 0x50F00000510, + 0x51100000512, + 0x51300000514, + 0x51500000516, + 0x51700000518, + 0x5190000051A, + 0x51B0000051C, + 0x51D0000051E, + 0x51F00000520, + 0x52100000522, + 0x52300000524, + 0x52500000526, + 0x52700000528, + 0x5290000052A, + 0x52B0000052C, + 0x52D0000052E, + 0x52F00000530, + 0x5590000055A, + 0x56000000587, + 0x58800000589, + 0x591000005BE, + 0x5BF000005C0, + 0x5C1000005C3, + 0x5C4000005C6, + 0x5C7000005C8, + 0x5D0000005EB, + 0x5EF000005F3, + 0x6100000061B, + 0x62000000640, + 0x64100000660, + 0x66E00000675, + 0x679000006D4, + 0x6D5000006DD, + 0x6DF000006E9, + 0x6EA000006F0, + 0x6FA00000700, + 0x7100000074B, + 0x74D000007B2, + 0x7C0000007F6, + 0x7FD000007FE, + 0x8000000082E, + 0x8400000085C, + 0x8600000086B, + 0x87000000888, + 0x8890000088F, + 0x898000008E2, + 0x8E300000958, + 0x96000000964, + 0x96600000970, + 0x97100000984, + 0x9850000098D, + 0x98F00000991, + 0x993000009A9, + 0x9AA000009B1, + 0x9B2000009B3, + 0x9B6000009BA, + 0x9BC000009C5, + 0x9C7000009C9, + 0x9CB000009CF, + 0x9D7000009D8, + 0x9E0000009E4, + 0x9E6000009F2, + 0x9FC000009FD, + 0x9FE000009FF, + 0xA0100000A04, + 0xA0500000A0B, + 0xA0F00000A11, + 0xA1300000A29, + 0xA2A00000A31, + 0xA3200000A33, + 0xA3500000A36, + 0xA3800000A3A, + 0xA3C00000A3D, + 0xA3E00000A43, + 0xA4700000A49, + 0xA4B00000A4E, + 0xA5100000A52, + 0xA5C00000A5D, + 0xA6600000A76, + 0xA8100000A84, + 0xA8500000A8E, + 0xA8F00000A92, + 0xA9300000AA9, + 0xAAA00000AB1, + 0xAB200000AB4, + 0xAB500000ABA, + 0xABC00000AC6, + 0xAC700000ACA, + 0xACB00000ACE, + 0xAD000000AD1, + 0xAE000000AE4, + 0xAE600000AF0, + 0xAF900000B00, + 0xB0100000B04, + 0xB0500000B0D, + 0xB0F00000B11, + 0xB1300000B29, + 0xB2A00000B31, + 0xB3200000B34, + 0xB3500000B3A, + 0xB3C00000B45, + 0xB4700000B49, + 0xB4B00000B4E, + 0xB5500000B58, + 0xB5F00000B64, + 0xB6600000B70, + 0xB7100000B72, + 0xB8200000B84, + 0xB8500000B8B, + 0xB8E00000B91, + 0xB9200000B96, + 0xB9900000B9B, + 0xB9C00000B9D, + 0xB9E00000BA0, + 0xBA300000BA5, + 0xBA800000BAB, + 0xBAE00000BBA, + 0xBBE00000BC3, + 0xBC600000BC9, + 0xBCA00000BCE, + 0xBD000000BD1, + 0xBD700000BD8, + 0xBE600000BF0, + 0xC0000000C0D, + 0xC0E00000C11, + 0xC1200000C29, + 0xC2A00000C3A, + 0xC3C00000C45, + 0xC4600000C49, + 0xC4A00000C4E, + 0xC5500000C57, + 0xC5800000C5B, + 0xC5D00000C5E, + 0xC6000000C64, + 0xC6600000C70, + 0xC8000000C84, + 0xC8500000C8D, + 0xC8E00000C91, + 0xC9200000CA9, + 0xCAA00000CB4, + 0xCB500000CBA, + 0xCBC00000CC5, + 0xCC600000CC9, + 0xCCA00000CCE, + 0xCD500000CD7, + 0xCDD00000CDF, + 0xCE000000CE4, + 0xCE600000CF0, + 0xCF100000CF4, + 0xD0000000D0D, + 0xD0E00000D11, + 0xD1200000D45, + 0xD4600000D49, + 0xD4A00000D4F, + 0xD5400000D58, + 0xD5F00000D64, + 0xD6600000D70, + 0xD7A00000D80, + 0xD8100000D84, + 0xD8500000D97, + 0xD9A00000DB2, + 0xDB300000DBC, + 0xDBD00000DBE, + 0xDC000000DC7, + 0xDCA00000DCB, + 0xDCF00000DD5, + 0xDD600000DD7, + 0xDD800000DE0, + 0xDE600000DF0, + 0xDF200000DF4, + 0xE0100000E33, + 0xE3400000E3B, + 0xE4000000E4F, + 0xE5000000E5A, + 0xE8100000E83, + 0xE8400000E85, + 0xE8600000E8B, + 0xE8C00000EA4, + 0xEA500000EA6, + 0xEA700000EB3, + 0xEB400000EBE, + 0xEC000000EC5, + 0xEC600000EC7, + 0xEC800000ECF, + 0xED000000EDA, + 0xEDE00000EE0, + 0xF0000000F01, + 0xF0B00000F0C, + 0xF1800000F1A, + 0xF2000000F2A, + 0xF3500000F36, + 0xF3700000F38, + 0xF3900000F3A, + 0xF3E00000F43, + 0xF4400000F48, + 0xF4900000F4D, + 0xF4E00000F52, + 0xF5300000F57, + 0xF5800000F5C, + 0xF5D00000F69, + 0xF6A00000F6D, + 0xF7100000F73, + 0xF7400000F75, + 0xF7A00000F81, + 0xF8200000F85, + 0xF8600000F93, + 0xF9400000F98, + 0xF9900000F9D, + 0xF9E00000FA2, + 0xFA300000FA7, + 0xFA800000FAC, + 0xFAD00000FB9, + 0xFBA00000FBD, + 0xFC600000FC7, + 0x10000000104A, + 0x10500000109E, + 0x10D0000010FB, + 0x10FD00001100, + 0x120000001249, + 0x124A0000124E, + 0x125000001257, + 0x125800001259, + 0x125A0000125E, + 0x126000001289, + 0x128A0000128E, + 0x1290000012B1, + 0x12B2000012B6, + 0x12B8000012BF, + 0x12C0000012C1, + 0x12C2000012C6, + 0x12C8000012D7, + 0x12D800001311, + 0x131200001316, + 0x13180000135B, + 0x135D00001360, + 0x138000001390, + 0x13A0000013F6, + 0x14010000166D, + 0x166F00001680, + 0x16810000169B, + 0x16A0000016EB, + 0x16F1000016F9, + 0x170000001716, + 0x171F00001735, + 0x174000001754, + 0x17600000176D, + 0x176E00001771, + 0x177200001774, + 0x1780000017B4, + 0x17B6000017D4, + 0x17D7000017D8, + 0x17DC000017DE, + 0x17E0000017EA, + 0x18100000181A, + 0x182000001879, + 0x1880000018AB, + 0x18B0000018F6, + 0x19000000191F, + 0x19200000192C, + 0x19300000193C, + 0x19460000196E, + 0x197000001975, + 0x1980000019AC, + 0x19B0000019CA, + 0x19D0000019DA, + 0x1A0000001A1C, + 0x1A2000001A5F, + 0x1A6000001A7D, + 0x1A7F00001A8A, + 0x1A9000001A9A, + 0x1AA700001AA8, + 0x1AB000001ABE, + 0x1ABF00001ACF, + 0x1B0000001B4D, + 0x1B5000001B5A, + 0x1B6B00001B74, + 0x1B8000001BF4, + 0x1C0000001C38, + 0x1C4000001C4A, + 0x1C4D00001C7E, + 0x1CD000001CD3, + 0x1CD400001CFB, + 0x1D0000001D2C, + 0x1D2F00001D30, + 0x1D3B00001D3C, + 0x1D4E00001D4F, + 0x1D6B00001D78, + 0x1D7900001D9B, + 0x1DC000001E00, + 0x1E0100001E02, + 0x1E0300001E04, + 0x1E0500001E06, + 0x1E0700001E08, + 0x1E0900001E0A, + 0x1E0B00001E0C, + 0x1E0D00001E0E, + 0x1E0F00001E10, + 0x1E1100001E12, + 0x1E1300001E14, + 0x1E1500001E16, + 0x1E1700001E18, + 0x1E1900001E1A, + 0x1E1B00001E1C, + 0x1E1D00001E1E, + 0x1E1F00001E20, + 0x1E2100001E22, + 0x1E2300001E24, + 0x1E2500001E26, + 0x1E2700001E28, + 0x1E2900001E2A, + 0x1E2B00001E2C, + 0x1E2D00001E2E, + 0x1E2F00001E30, + 0x1E3100001E32, + 0x1E3300001E34, + 0x1E3500001E36, + 0x1E3700001E38, + 0x1E3900001E3A, + 0x1E3B00001E3C, + 0x1E3D00001E3E, + 0x1E3F00001E40, + 0x1E4100001E42, + 0x1E4300001E44, + 0x1E4500001E46, + 0x1E4700001E48, + 0x1E4900001E4A, + 0x1E4B00001E4C, + 0x1E4D00001E4E, + 0x1E4F00001E50, + 0x1E5100001E52, + 0x1E5300001E54, + 0x1E5500001E56, + 0x1E5700001E58, + 0x1E5900001E5A, + 0x1E5B00001E5C, + 0x1E5D00001E5E, + 0x1E5F00001E60, + 0x1E6100001E62, + 0x1E6300001E64, + 0x1E6500001E66, + 0x1E6700001E68, + 0x1E6900001E6A, + 0x1E6B00001E6C, + 0x1E6D00001E6E, + 0x1E6F00001E70, + 0x1E7100001E72, + 0x1E7300001E74, + 0x1E7500001E76, + 0x1E7700001E78, + 0x1E7900001E7A, + 0x1E7B00001E7C, + 0x1E7D00001E7E, + 0x1E7F00001E80, + 0x1E8100001E82, + 0x1E8300001E84, + 0x1E8500001E86, + 0x1E8700001E88, + 0x1E8900001E8A, + 0x1E8B00001E8C, + 0x1E8D00001E8E, + 0x1E8F00001E90, + 0x1E9100001E92, + 0x1E9300001E94, + 0x1E9500001E9A, + 0x1E9C00001E9E, + 0x1E9F00001EA0, + 0x1EA100001EA2, + 0x1EA300001EA4, + 0x1EA500001EA6, + 0x1EA700001EA8, + 0x1EA900001EAA, + 0x1EAB00001EAC, + 0x1EAD00001EAE, + 0x1EAF00001EB0, + 0x1EB100001EB2, + 0x1EB300001EB4, + 0x1EB500001EB6, + 0x1EB700001EB8, + 0x1EB900001EBA, + 0x1EBB00001EBC, + 0x1EBD00001EBE, + 0x1EBF00001EC0, + 0x1EC100001EC2, + 0x1EC300001EC4, + 0x1EC500001EC6, + 0x1EC700001EC8, + 0x1EC900001ECA, + 0x1ECB00001ECC, + 0x1ECD00001ECE, + 0x1ECF00001ED0, + 0x1ED100001ED2, + 0x1ED300001ED4, + 0x1ED500001ED6, + 0x1ED700001ED8, + 0x1ED900001EDA, + 0x1EDB00001EDC, + 0x1EDD00001EDE, + 0x1EDF00001EE0, + 0x1EE100001EE2, + 0x1EE300001EE4, + 0x1EE500001EE6, + 0x1EE700001EE8, + 0x1EE900001EEA, + 0x1EEB00001EEC, + 0x1EED00001EEE, + 0x1EEF00001EF0, + 0x1EF100001EF2, + 0x1EF300001EF4, + 0x1EF500001EF6, + 0x1EF700001EF8, + 0x1EF900001EFA, + 0x1EFB00001EFC, + 0x1EFD00001EFE, + 0x1EFF00001F08, + 0x1F1000001F16, + 0x1F2000001F28, + 0x1F3000001F38, + 0x1F4000001F46, + 0x1F5000001F58, + 0x1F6000001F68, + 0x1F7000001F71, + 0x1F7200001F73, + 0x1F7400001F75, + 0x1F7600001F77, + 0x1F7800001F79, + 0x1F7A00001F7B, + 0x1F7C00001F7D, + 0x1FB000001FB2, + 0x1FB600001FB7, + 0x1FC600001FC7, + 0x1FD000001FD3, + 0x1FD600001FD8, + 0x1FE000001FE3, + 0x1FE400001FE8, + 0x1FF600001FF7, + 0x214E0000214F, + 0x218400002185, + 0x2C3000002C60, + 0x2C6100002C62, + 0x2C6500002C67, + 0x2C6800002C69, + 0x2C6A00002C6B, + 0x2C6C00002C6D, + 0x2C7100002C72, + 0x2C7300002C75, + 0x2C7600002C7C, + 0x2C8100002C82, + 0x2C8300002C84, + 0x2C8500002C86, + 0x2C8700002C88, + 0x2C8900002C8A, + 0x2C8B00002C8C, + 0x2C8D00002C8E, + 0x2C8F00002C90, + 0x2C9100002C92, + 0x2C9300002C94, + 0x2C9500002C96, + 0x2C9700002C98, + 0x2C9900002C9A, + 0x2C9B00002C9C, + 0x2C9D00002C9E, + 0x2C9F00002CA0, + 0x2CA100002CA2, + 0x2CA300002CA4, + 0x2CA500002CA6, + 0x2CA700002CA8, + 0x2CA900002CAA, + 0x2CAB00002CAC, + 0x2CAD00002CAE, + 0x2CAF00002CB0, + 0x2CB100002CB2, + 0x2CB300002CB4, + 0x2CB500002CB6, + 0x2CB700002CB8, + 0x2CB900002CBA, + 0x2CBB00002CBC, + 0x2CBD00002CBE, + 0x2CBF00002CC0, + 0x2CC100002CC2, + 0x2CC300002CC4, + 0x2CC500002CC6, + 0x2CC700002CC8, + 0x2CC900002CCA, + 0x2CCB00002CCC, + 0x2CCD00002CCE, + 0x2CCF00002CD0, + 0x2CD100002CD2, + 0x2CD300002CD4, + 0x2CD500002CD6, + 0x2CD700002CD8, + 0x2CD900002CDA, + 0x2CDB00002CDC, + 0x2CDD00002CDE, + 0x2CDF00002CE0, + 0x2CE100002CE2, + 0x2CE300002CE5, + 0x2CEC00002CED, + 0x2CEE00002CF2, + 0x2CF300002CF4, + 0x2D0000002D26, + 0x2D2700002D28, + 0x2D2D00002D2E, + 0x2D3000002D68, + 0x2D7F00002D97, + 0x2DA000002DA7, + 0x2DA800002DAF, + 0x2DB000002DB7, + 0x2DB800002DBF, + 0x2DC000002DC7, + 0x2DC800002DCF, + 0x2DD000002DD7, + 0x2DD800002DDF, + 0x2DE000002E00, + 0x2E2F00002E30, + 0x300500003008, + 0x302A0000302E, + 0x303C0000303D, + 0x304100003097, + 0x30990000309B, + 0x309D0000309F, + 0x30A1000030FB, + 0x30FC000030FF, + 0x310500003130, + 0x31A0000031C0, + 0x31F000003200, + 0x340000004DC0, + 0x4E000000A48D, + 0xA4D00000A4FE, + 0xA5000000A60D, + 0xA6100000A62C, + 0xA6410000A642, + 0xA6430000A644, + 0xA6450000A646, + 0xA6470000A648, + 0xA6490000A64A, + 0xA64B0000A64C, + 0xA64D0000A64E, + 0xA64F0000A650, + 0xA6510000A652, + 0xA6530000A654, + 0xA6550000A656, + 0xA6570000A658, + 0xA6590000A65A, + 0xA65B0000A65C, + 0xA65D0000A65E, + 0xA65F0000A660, + 0xA6610000A662, + 0xA6630000A664, + 0xA6650000A666, + 0xA6670000A668, + 0xA6690000A66A, + 0xA66B0000A66C, + 0xA66D0000A670, + 0xA6740000A67E, + 0xA67F0000A680, + 0xA6810000A682, + 0xA6830000A684, + 0xA6850000A686, + 0xA6870000A688, + 0xA6890000A68A, + 0xA68B0000A68C, + 0xA68D0000A68E, + 0xA68F0000A690, + 0xA6910000A692, + 0xA6930000A694, + 0xA6950000A696, + 0xA6970000A698, + 0xA6990000A69A, + 0xA69B0000A69C, + 0xA69E0000A6E6, + 0xA6F00000A6F2, + 0xA7170000A720, + 0xA7230000A724, + 0xA7250000A726, + 0xA7270000A728, + 0xA7290000A72A, + 0xA72B0000A72C, + 0xA72D0000A72E, + 0xA72F0000A732, + 0xA7330000A734, + 0xA7350000A736, + 0xA7370000A738, + 0xA7390000A73A, + 0xA73B0000A73C, + 0xA73D0000A73E, + 0xA73F0000A740, + 0xA7410000A742, + 0xA7430000A744, + 0xA7450000A746, + 0xA7470000A748, + 0xA7490000A74A, + 0xA74B0000A74C, + 0xA74D0000A74E, + 0xA74F0000A750, + 0xA7510000A752, + 0xA7530000A754, + 0xA7550000A756, + 0xA7570000A758, + 0xA7590000A75A, + 0xA75B0000A75C, + 0xA75D0000A75E, + 0xA75F0000A760, + 0xA7610000A762, + 0xA7630000A764, + 0xA7650000A766, + 0xA7670000A768, + 0xA7690000A76A, + 0xA76B0000A76C, + 0xA76D0000A76E, + 0xA76F0000A770, + 0xA7710000A779, + 0xA77A0000A77B, + 0xA77C0000A77D, + 0xA77F0000A780, + 0xA7810000A782, + 0xA7830000A784, + 0xA7850000A786, + 0xA7870000A789, + 0xA78C0000A78D, + 0xA78E0000A790, + 0xA7910000A792, + 0xA7930000A796, + 0xA7970000A798, + 0xA7990000A79A, + 0xA79B0000A79C, + 0xA79D0000A79E, + 0xA79F0000A7A0, + 0xA7A10000A7A2, + 0xA7A30000A7A4, + 0xA7A50000A7A6, + 0xA7A70000A7A8, + 0xA7A90000A7AA, + 0xA7AF0000A7B0, + 0xA7B50000A7B6, + 0xA7B70000A7B8, + 0xA7B90000A7BA, + 0xA7BB0000A7BC, + 0xA7BD0000A7BE, + 0xA7BF0000A7C0, + 0xA7C10000A7C2, + 0xA7C30000A7C4, + 0xA7C80000A7C9, + 0xA7CA0000A7CB, + 0xA7D10000A7D2, + 0xA7D30000A7D4, + 0xA7D50000A7D6, + 0xA7D70000A7D8, + 0xA7D90000A7DA, + 0xA7F60000A7F8, + 0xA7FA0000A828, + 0xA82C0000A82D, + 0xA8400000A874, + 0xA8800000A8C6, + 0xA8D00000A8DA, + 0xA8E00000A8F8, + 0xA8FB0000A8FC, + 0xA8FD0000A92E, + 0xA9300000A954, + 0xA9800000A9C1, + 0xA9CF0000A9DA, + 0xA9E00000A9FF, + 0xAA000000AA37, + 0xAA400000AA4E, + 0xAA500000AA5A, + 0xAA600000AA77, + 0xAA7A0000AAC3, + 0xAADB0000AADE, + 0xAAE00000AAF0, + 0xAAF20000AAF7, + 0xAB010000AB07, + 0xAB090000AB0F, + 0xAB110000AB17, + 0xAB200000AB27, + 0xAB280000AB2F, + 0xAB300000AB5B, + 0xAB600000AB69, + 0xABC00000ABEB, + 0xABEC0000ABEE, + 0xABF00000ABFA, + 0xAC000000D7A4, + 0xFA0E0000FA10, + 0xFA110000FA12, + 0xFA130000FA15, + 0xFA1F0000FA20, + 0xFA210000FA22, + 0xFA230000FA25, + 0xFA270000FA2A, + 0xFB1E0000FB1F, + 0xFE200000FE30, + 0xFE730000FE74, + 0x100000001000C, + 0x1000D00010027, + 0x100280001003B, + 0x1003C0001003E, + 0x1003F0001004E, + 0x100500001005E, + 0x10080000100FB, + 0x101FD000101FE, + 0x102800001029D, + 0x102A0000102D1, + 0x102E0000102E1, + 0x1030000010320, + 0x1032D00010341, + 0x103420001034A, + 0x103500001037B, + 0x103800001039E, + 0x103A0000103C4, + 0x103C8000103D0, + 0x104280001049E, + 0x104A0000104AA, + 0x104D8000104FC, + 0x1050000010528, + 0x1053000010564, + 0x10597000105A2, + 0x105A3000105B2, + 0x105B3000105BA, + 0x105BB000105BD, + 0x1060000010737, + 0x1074000010756, + 0x1076000010768, + 0x1078000010781, + 0x1080000010806, + 0x1080800010809, + 0x1080A00010836, + 0x1083700010839, + 0x1083C0001083D, + 0x1083F00010856, + 0x1086000010877, + 0x108800001089F, + 0x108E0000108F3, + 0x108F4000108F6, + 0x1090000010916, + 0x109200001093A, + 0x10980000109B8, + 0x109BE000109C0, + 0x10A0000010A04, + 0x10A0500010A07, + 0x10A0C00010A14, + 0x10A1500010A18, + 0x10A1900010A36, + 0x10A3800010A3B, + 0x10A3F00010A40, + 0x10A6000010A7D, + 0x10A8000010A9D, + 0x10AC000010AC8, + 0x10AC900010AE7, + 0x10B0000010B36, + 0x10B4000010B56, + 0x10B6000010B73, + 0x10B8000010B92, + 0x10C0000010C49, + 0x10CC000010CF3, + 0x10D0000010D28, + 0x10D3000010D3A, + 0x10E8000010EAA, + 0x10EAB00010EAD, + 0x10EB000010EB2, + 0x10EFD00010F1D, + 0x10F2700010F28, + 0x10F3000010F51, + 0x10F7000010F86, + 0x10FB000010FC5, + 0x10FE000010FF7, + 0x1100000011047, + 0x1106600011076, + 0x1107F000110BB, + 0x110C2000110C3, + 0x110D0000110E9, + 0x110F0000110FA, + 0x1110000011135, + 0x1113600011140, + 0x1114400011148, + 0x1115000011174, + 0x1117600011177, + 0x11180000111C5, + 0x111C9000111CD, + 0x111CE000111DB, + 0x111DC000111DD, + 0x1120000011212, + 0x1121300011238, + 0x1123E00011242, + 0x1128000011287, + 0x1128800011289, + 0x1128A0001128E, + 0x1128F0001129E, + 0x1129F000112A9, + 0x112B0000112EB, + 0x112F0000112FA, + 0x1130000011304, + 0x113050001130D, + 0x1130F00011311, + 0x1131300011329, + 0x1132A00011331, + 0x1133200011334, + 0x113350001133A, + 0x1133B00011345, + 0x1134700011349, + 0x1134B0001134E, + 0x1135000011351, + 0x1135700011358, + 0x1135D00011364, + 0x113660001136D, + 0x1137000011375, + 0x114000001144B, + 0x114500001145A, + 0x1145E00011462, + 0x11480000114C6, + 0x114C7000114C8, + 0x114D0000114DA, + 0x11580000115B6, + 0x115B8000115C1, + 0x115D8000115DE, + 0x1160000011641, + 0x1164400011645, + 0x116500001165A, + 0x11680000116B9, + 0x116C0000116CA, + 0x117000001171B, + 0x1171D0001172C, + 0x117300001173A, + 0x1174000011747, + 0x118000001183B, + 0x118C0000118EA, + 0x118FF00011907, + 0x119090001190A, + 0x1190C00011914, + 0x1191500011917, + 0x1191800011936, + 0x1193700011939, + 0x1193B00011944, + 0x119500001195A, + 0x119A0000119A8, + 0x119AA000119D8, + 0x119DA000119E2, + 0x119E3000119E5, + 0x11A0000011A3F, + 0x11A4700011A48, + 0x11A5000011A9A, + 0x11A9D00011A9E, + 0x11AB000011AF9, + 0x11C0000011C09, + 0x11C0A00011C37, + 0x11C3800011C41, + 0x11C5000011C5A, + 0x11C7200011C90, + 0x11C9200011CA8, + 0x11CA900011CB7, + 0x11D0000011D07, + 0x11D0800011D0A, + 0x11D0B00011D37, + 0x11D3A00011D3B, + 0x11D3C00011D3E, + 0x11D3F00011D48, + 0x11D5000011D5A, + 0x11D6000011D66, + 0x11D6700011D69, + 0x11D6A00011D8F, + 0x11D9000011D92, + 0x11D9300011D99, + 0x11DA000011DAA, + 0x11EE000011EF7, + 0x11F0000011F11, + 0x11F1200011F3B, + 0x11F3E00011F43, + 0x11F5000011F5A, + 0x11FB000011FB1, + 0x120000001239A, + 0x1248000012544, + 0x12F9000012FF1, + 0x1300000013430, + 0x1344000013456, + 0x1440000014647, + 0x1680000016A39, + 0x16A4000016A5F, + 0x16A6000016A6A, + 0x16A7000016ABF, + 0x16AC000016ACA, + 0x16AD000016AEE, + 0x16AF000016AF5, + 0x16B0000016B37, + 0x16B4000016B44, + 0x16B5000016B5A, + 0x16B6300016B78, + 0x16B7D00016B90, + 0x16E6000016E80, + 0x16F0000016F4B, + 0x16F4F00016F88, + 0x16F8F00016FA0, + 0x16FE000016FE2, + 0x16FE300016FE5, + 0x16FF000016FF2, + 0x17000000187F8, + 0x1880000018CD6, + 0x18D0000018D09, + 0x1AFF00001AFF4, + 0x1AFF50001AFFC, + 0x1AFFD0001AFFF, + 0x1B0000001B123, + 0x1B1320001B133, + 0x1B1500001B153, + 0x1B1550001B156, + 0x1B1640001B168, + 0x1B1700001B2FC, + 0x1BC000001BC6B, + 0x1BC700001BC7D, + 0x1BC800001BC89, + 0x1BC900001BC9A, + 0x1BC9D0001BC9F, + 0x1CF000001CF2E, + 0x1CF300001CF47, + 0x1DA000001DA37, + 0x1DA3B0001DA6D, + 0x1DA750001DA76, + 0x1DA840001DA85, + 0x1DA9B0001DAA0, + 0x1DAA10001DAB0, + 0x1DF000001DF1F, + 0x1DF250001DF2B, + 0x1E0000001E007, + 0x1E0080001E019, + 0x1E01B0001E022, + 0x1E0230001E025, + 0x1E0260001E02B, + 0x1E08F0001E090, + 0x1E1000001E12D, + 0x1E1300001E13E, + 0x1E1400001E14A, + 0x1E14E0001E14F, + 0x1E2900001E2AF, + 0x1E2C00001E2FA, + 0x1E4D00001E4FA, + 0x1E7E00001E7E7, + 0x1E7E80001E7EC, + 0x1E7ED0001E7EF, + 0x1E7F00001E7FF, + 0x1E8000001E8C5, + 0x1E8D00001E8D7, + 0x1E9220001E94C, + 0x1E9500001E95A, + 0x200000002A6E0, + 0x2A7000002B73A, + 0x2B7400002B81E, + 0x2B8200002CEA2, + 0x2CEB00002EBE1, + 0x2EBF00002EE5E, + 0x300000003134B, + 0x31350000323B0, + ), + "CONTEXTJ": (0x200C0000200E,), + "CONTEXTO": ( + 0xB7000000B8, + 0x37500000376, + 0x5F3000005F5, + 0x6600000066A, + 0x6F0000006FA, + 0x30FB000030FC, + ), +} diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/idna/intranges.py b/URSA/.venv_ursa/lib/python3.12/site-packages/idna/intranges.py new file mode 100644 index 0000000000000000000000000000000000000000..7bfaa8d80d7dc471d572db0f949460901126e8bd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/idna/intranges.py @@ -0,0 +1,57 @@ +""" +Given a list of integers, made up of (hopefully) a small number of long runs +of consecutive integers, compute a representation of the form +((start1, end1), (start2, end2) ...). Then answer the question "was x present +in the original list?" in time O(log(# runs)). +""" + +import bisect +from typing import List, Tuple + + +def intranges_from_list(list_: List[int]) -> Tuple[int, ...]: + """Represent a list of integers as a sequence of ranges: + ((start_0, end_0), (start_1, end_1), ...), such that the original + integers are exactly those x such that start_i <= x < end_i for some i. + + Ranges are encoded as single integers (start << 32 | end), not as tuples. + """ + + sorted_list = sorted(list_) + ranges = [] + last_write = -1 + for i in range(len(sorted_list)): + if i + 1 < len(sorted_list): + if sorted_list[i] == sorted_list[i + 1] - 1: + continue + current_range = sorted_list[last_write + 1 : i + 1] + ranges.append(_encode_range(current_range[0], current_range[-1] + 1)) + last_write = i + + return tuple(ranges) + + +def _encode_range(start: int, end: int) -> int: + return (start << 32) | end + + +def _decode_range(r: int) -> Tuple[int, int]: + return (r >> 32), (r & ((1 << 32) - 1)) + + +def intranges_contain(int_: int, ranges: Tuple[int, ...]) -> bool: + """Determine if `int_` falls into one of the ranges in `ranges`.""" + tuple_ = _encode_range(int_, 0) + pos = bisect.bisect_left(ranges, tuple_) + # we could be immediately ahead of a tuple (start, end) + # with start < int_ <= end + if pos > 0: + left, right = _decode_range(ranges[pos - 1]) + if left <= int_ < right: + return True + # or we could be immediately behind a tuple (int_, end) + if pos < len(ranges): + left, _ = _decode_range(ranges[pos]) + if left == int_: + return True + return False diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/idna/package_data.py b/URSA/.venv_ursa/lib/python3.12/site-packages/idna/package_data.py new file mode 100644 index 0000000000000000000000000000000000000000..514ff7e2e68b65f309d30a0b06e6b290d2c353a8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/idna/package_data.py @@ -0,0 +1 @@ +__version__ = "3.10" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/idna/py.typed b/URSA/.venv_ursa/lib/python3.12/site-packages/idna/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/idna/uts46data.py b/URSA/.venv_ursa/lib/python3.12/site-packages/idna/uts46data.py new file mode 100644 index 0000000000000000000000000000000000000000..eb894327410debecb64ddf40eddc3131cf8344de --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/idna/uts46data.py @@ -0,0 +1,8681 @@ +# This file is automatically generated by tools/idna-data +# vim: set fileencoding=utf-8 : + +from typing import List, Tuple, Union + +"""IDNA Mapping Table from UTS46.""" + + +__version__ = "15.1.0" + + +def _seg_0() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x0, "3"), + (0x1, "3"), + (0x2, "3"), + (0x3, "3"), + (0x4, "3"), + (0x5, "3"), + (0x6, "3"), + (0x7, "3"), + (0x8, "3"), + (0x9, "3"), + (0xA, "3"), + (0xB, "3"), + (0xC, "3"), + (0xD, "3"), + (0xE, "3"), + (0xF, "3"), + (0x10, "3"), + (0x11, "3"), + (0x12, "3"), + (0x13, "3"), + (0x14, "3"), + (0x15, "3"), + (0x16, "3"), + (0x17, "3"), + (0x18, "3"), + (0x19, "3"), + (0x1A, "3"), + (0x1B, "3"), + (0x1C, "3"), + (0x1D, "3"), + (0x1E, "3"), + (0x1F, "3"), + (0x20, "3"), + (0x21, "3"), + (0x22, "3"), + (0x23, "3"), + (0x24, "3"), + (0x25, "3"), + (0x26, "3"), + (0x27, "3"), + (0x28, "3"), + (0x29, "3"), + (0x2A, "3"), + (0x2B, "3"), + (0x2C, "3"), + (0x2D, "V"), + (0x2E, "V"), + (0x2F, "3"), + (0x30, "V"), + (0x31, "V"), + (0x32, "V"), + (0x33, "V"), + (0x34, "V"), + (0x35, "V"), + (0x36, "V"), + (0x37, "V"), + (0x38, "V"), + (0x39, "V"), + (0x3A, "3"), + (0x3B, "3"), + (0x3C, "3"), + (0x3D, "3"), + (0x3E, "3"), + (0x3F, "3"), + (0x40, "3"), + (0x41, "M", "a"), + (0x42, "M", "b"), + (0x43, "M", "c"), + (0x44, "M", "d"), + (0x45, "M", "e"), + (0x46, "M", "f"), + (0x47, "M", "g"), + (0x48, "M", "h"), + (0x49, "M", "i"), + (0x4A, "M", "j"), + (0x4B, "M", "k"), + (0x4C, "M", "l"), + (0x4D, "M", "m"), + (0x4E, "M", "n"), + (0x4F, "M", "o"), + (0x50, "M", "p"), + (0x51, "M", "q"), + (0x52, "M", "r"), + (0x53, "M", "s"), + (0x54, "M", "t"), + (0x55, "M", "u"), + (0x56, "M", "v"), + (0x57, "M", "w"), + (0x58, "M", "x"), + (0x59, "M", "y"), + (0x5A, "M", "z"), + (0x5B, "3"), + (0x5C, "3"), + (0x5D, "3"), + (0x5E, "3"), + (0x5F, "3"), + (0x60, "3"), + (0x61, "V"), + (0x62, "V"), + (0x63, "V"), + ] + + +def _seg_1() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x64, "V"), + (0x65, "V"), + (0x66, "V"), + (0x67, "V"), + (0x68, "V"), + (0x69, "V"), + (0x6A, "V"), + (0x6B, "V"), + (0x6C, "V"), + (0x6D, "V"), + (0x6E, "V"), + (0x6F, "V"), + (0x70, "V"), + (0x71, "V"), + (0x72, "V"), + (0x73, "V"), + (0x74, "V"), + (0x75, "V"), + (0x76, "V"), + (0x77, "V"), + (0x78, "V"), + (0x79, "V"), + (0x7A, "V"), + (0x7B, "3"), + (0x7C, "3"), + (0x7D, "3"), + (0x7E, "3"), + (0x7F, "3"), + (0x80, "X"), + (0x81, "X"), + (0x82, "X"), + (0x83, "X"), + (0x84, "X"), + (0x85, "X"), + (0x86, "X"), + (0x87, "X"), + (0x88, "X"), + (0x89, "X"), + (0x8A, "X"), + (0x8B, "X"), + (0x8C, "X"), + (0x8D, "X"), + (0x8E, "X"), + (0x8F, "X"), + (0x90, "X"), + (0x91, "X"), + (0x92, "X"), + (0x93, "X"), + (0x94, "X"), + (0x95, "X"), + (0x96, "X"), + (0x97, "X"), + (0x98, "X"), + (0x99, "X"), + (0x9A, "X"), + (0x9B, "X"), + (0x9C, "X"), + (0x9D, "X"), + (0x9E, "X"), + (0x9F, "X"), + (0xA0, "3", " "), + (0xA1, "V"), + (0xA2, "V"), + (0xA3, "V"), + (0xA4, "V"), + (0xA5, "V"), + (0xA6, "V"), + (0xA7, "V"), + (0xA8, "3", " ̈"), + (0xA9, "V"), + (0xAA, "M", "a"), + (0xAB, "V"), + (0xAC, "V"), + (0xAD, "I"), + (0xAE, "V"), + (0xAF, "3", " ̄"), + (0xB0, "V"), + (0xB1, "V"), + (0xB2, "M", "2"), + (0xB3, "M", "3"), + (0xB4, "3", " ́"), + (0xB5, "M", "μ"), + (0xB6, "V"), + (0xB7, "V"), + (0xB8, "3", " ̧"), + (0xB9, "M", "1"), + (0xBA, "M", "o"), + (0xBB, "V"), + (0xBC, "M", "1⁄4"), + (0xBD, "M", "1⁄2"), + (0xBE, "M", "3⁄4"), + (0xBF, "V"), + (0xC0, "M", "à"), + (0xC1, "M", "á"), + (0xC2, "M", "â"), + (0xC3, "M", "ã"), + (0xC4, "M", "ä"), + (0xC5, "M", "å"), + (0xC6, "M", "æ"), + (0xC7, "M", "ç"), + ] + + +def _seg_2() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xC8, "M", "è"), + (0xC9, "M", "é"), + (0xCA, "M", "ê"), + (0xCB, "M", "ë"), + (0xCC, "M", "ì"), + (0xCD, "M", "í"), + (0xCE, "M", "î"), + (0xCF, "M", "ï"), + (0xD0, "M", "ð"), + (0xD1, "M", "ñ"), + (0xD2, "M", "ò"), + (0xD3, "M", "ó"), + (0xD4, "M", "ô"), + (0xD5, "M", "õ"), + (0xD6, "M", "ö"), + (0xD7, "V"), + (0xD8, "M", "ø"), + (0xD9, "M", "ù"), + (0xDA, "M", "ú"), + (0xDB, "M", "û"), + (0xDC, "M", "ü"), + (0xDD, "M", "ý"), + (0xDE, "M", "þ"), + (0xDF, "D", "ss"), + (0xE0, "V"), + (0xE1, "V"), + (0xE2, "V"), + (0xE3, "V"), + (0xE4, "V"), + (0xE5, "V"), + (0xE6, "V"), + (0xE7, "V"), + (0xE8, "V"), + (0xE9, "V"), + (0xEA, "V"), + (0xEB, "V"), + (0xEC, "V"), + (0xED, "V"), + (0xEE, "V"), + (0xEF, "V"), + (0xF0, "V"), + (0xF1, "V"), + (0xF2, "V"), + (0xF3, "V"), + (0xF4, "V"), + (0xF5, "V"), + (0xF6, "V"), + (0xF7, "V"), + (0xF8, "V"), + (0xF9, "V"), + (0xFA, "V"), + (0xFB, "V"), + (0xFC, "V"), + (0xFD, "V"), + (0xFE, "V"), + (0xFF, "V"), + (0x100, "M", "ā"), + (0x101, "V"), + (0x102, "M", "ă"), + (0x103, "V"), + (0x104, "M", "ą"), + (0x105, "V"), + (0x106, "M", "ć"), + (0x107, "V"), + (0x108, "M", "ĉ"), + (0x109, "V"), + (0x10A, "M", "ċ"), + (0x10B, "V"), + (0x10C, "M", "č"), + (0x10D, "V"), + (0x10E, "M", "ď"), + (0x10F, "V"), + (0x110, "M", "đ"), + (0x111, "V"), + (0x112, "M", "ē"), + (0x113, "V"), + (0x114, "M", "ĕ"), + (0x115, "V"), + (0x116, "M", "ė"), + (0x117, "V"), + (0x118, "M", "ę"), + (0x119, "V"), + (0x11A, "M", "ě"), + (0x11B, "V"), + (0x11C, "M", "ĝ"), + (0x11D, "V"), + (0x11E, "M", "ğ"), + (0x11F, "V"), + (0x120, "M", "ġ"), + (0x121, "V"), + (0x122, "M", "ģ"), + (0x123, "V"), + (0x124, "M", "ĥ"), + (0x125, "V"), + (0x126, "M", "ħ"), + (0x127, "V"), + (0x128, "M", "ĩ"), + (0x129, "V"), + (0x12A, "M", "ī"), + (0x12B, "V"), + ] + + +def _seg_3() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x12C, "M", "ĭ"), + (0x12D, "V"), + (0x12E, "M", "į"), + (0x12F, "V"), + (0x130, "M", "i̇"), + (0x131, "V"), + (0x132, "M", "ij"), + (0x134, "M", "ĵ"), + (0x135, "V"), + (0x136, "M", "ķ"), + (0x137, "V"), + (0x139, "M", "ĺ"), + (0x13A, "V"), + (0x13B, "M", "ļ"), + (0x13C, "V"), + (0x13D, "M", "ľ"), + (0x13E, "V"), + (0x13F, "M", "l·"), + (0x141, "M", "ł"), + (0x142, "V"), + (0x143, "M", "ń"), + (0x144, "V"), + (0x145, "M", "ņ"), + (0x146, "V"), + (0x147, "M", "ň"), + (0x148, "V"), + (0x149, "M", "ʼn"), + (0x14A, "M", "ŋ"), + (0x14B, "V"), + (0x14C, "M", "ō"), + (0x14D, "V"), + (0x14E, "M", "ŏ"), + (0x14F, "V"), + (0x150, "M", "ő"), + (0x151, "V"), + (0x152, "M", "œ"), + (0x153, "V"), + (0x154, "M", "ŕ"), + (0x155, "V"), + (0x156, "M", "ŗ"), + (0x157, "V"), + (0x158, "M", "ř"), + (0x159, "V"), + (0x15A, "M", "ś"), + (0x15B, "V"), + (0x15C, "M", "ŝ"), + (0x15D, "V"), + (0x15E, "M", "ş"), + (0x15F, "V"), + (0x160, "M", "š"), + (0x161, "V"), + (0x162, "M", "ţ"), + (0x163, "V"), + (0x164, "M", "ť"), + (0x165, "V"), + (0x166, "M", "ŧ"), + (0x167, "V"), + (0x168, "M", "ũ"), + (0x169, "V"), + (0x16A, "M", "ū"), + (0x16B, "V"), + (0x16C, "M", "ŭ"), + (0x16D, "V"), + (0x16E, "M", "ů"), + (0x16F, "V"), + (0x170, "M", "ű"), + (0x171, "V"), + (0x172, "M", "ų"), + (0x173, "V"), + (0x174, "M", "ŵ"), + (0x175, "V"), + (0x176, "M", "ŷ"), + (0x177, "V"), + (0x178, "M", "ÿ"), + (0x179, "M", "ź"), + (0x17A, "V"), + (0x17B, "M", "ż"), + (0x17C, "V"), + (0x17D, "M", "ž"), + (0x17E, "V"), + (0x17F, "M", "s"), + (0x180, "V"), + (0x181, "M", "ɓ"), + (0x182, "M", "ƃ"), + (0x183, "V"), + (0x184, "M", "ƅ"), + (0x185, "V"), + (0x186, "M", "ɔ"), + (0x187, "M", "ƈ"), + (0x188, "V"), + (0x189, "M", "ɖ"), + (0x18A, "M", "ɗ"), + (0x18B, "M", "ƌ"), + (0x18C, "V"), + (0x18E, "M", "ǝ"), + (0x18F, "M", "ə"), + (0x190, "M", "ɛ"), + (0x191, "M", "ƒ"), + (0x192, "V"), + (0x193, "M", "ɠ"), + ] + + +def _seg_4() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x194, "M", "ɣ"), + (0x195, "V"), + (0x196, "M", "ɩ"), + (0x197, "M", "ɨ"), + (0x198, "M", "ƙ"), + (0x199, "V"), + (0x19C, "M", "ɯ"), + (0x19D, "M", "ɲ"), + (0x19E, "V"), + (0x19F, "M", "ɵ"), + (0x1A0, "M", "ơ"), + (0x1A1, "V"), + (0x1A2, "M", "ƣ"), + (0x1A3, "V"), + (0x1A4, "M", "ƥ"), + (0x1A5, "V"), + (0x1A6, "M", "ʀ"), + (0x1A7, "M", "ƨ"), + (0x1A8, "V"), + (0x1A9, "M", "ʃ"), + (0x1AA, "V"), + (0x1AC, "M", "ƭ"), + (0x1AD, "V"), + (0x1AE, "M", "ʈ"), + (0x1AF, "M", "ư"), + (0x1B0, "V"), + (0x1B1, "M", "ʊ"), + (0x1B2, "M", "ʋ"), + (0x1B3, "M", "ƴ"), + (0x1B4, "V"), + (0x1B5, "M", "ƶ"), + (0x1B6, "V"), + (0x1B7, "M", "ʒ"), + (0x1B8, "M", "ƹ"), + (0x1B9, "V"), + (0x1BC, "M", "ƽ"), + (0x1BD, "V"), + (0x1C4, "M", "dž"), + (0x1C7, "M", "lj"), + (0x1CA, "M", "nj"), + (0x1CD, "M", "ǎ"), + (0x1CE, "V"), + (0x1CF, "M", "ǐ"), + (0x1D0, "V"), + (0x1D1, "M", "ǒ"), + (0x1D2, "V"), + (0x1D3, "M", "ǔ"), + (0x1D4, "V"), + (0x1D5, "M", "ǖ"), + (0x1D6, "V"), + (0x1D7, "M", "ǘ"), + (0x1D8, "V"), + (0x1D9, "M", "ǚ"), + (0x1DA, "V"), + (0x1DB, "M", "ǜ"), + (0x1DC, "V"), + (0x1DE, "M", "ǟ"), + (0x1DF, "V"), + (0x1E0, "M", "ǡ"), + (0x1E1, "V"), + (0x1E2, "M", "ǣ"), + (0x1E3, "V"), + (0x1E4, "M", "ǥ"), + (0x1E5, "V"), + (0x1E6, "M", "ǧ"), + (0x1E7, "V"), + (0x1E8, "M", "ǩ"), + (0x1E9, "V"), + (0x1EA, "M", "ǫ"), + (0x1EB, "V"), + (0x1EC, "M", "ǭ"), + (0x1ED, "V"), + (0x1EE, "M", "ǯ"), + (0x1EF, "V"), + (0x1F1, "M", "dz"), + (0x1F4, "M", "ǵ"), + (0x1F5, "V"), + (0x1F6, "M", "ƕ"), + (0x1F7, "M", "ƿ"), + (0x1F8, "M", "ǹ"), + (0x1F9, "V"), + (0x1FA, "M", "ǻ"), + (0x1FB, "V"), + (0x1FC, "M", "ǽ"), + (0x1FD, "V"), + (0x1FE, "M", "ǿ"), + (0x1FF, "V"), + (0x200, "M", "ȁ"), + (0x201, "V"), + (0x202, "M", "ȃ"), + (0x203, "V"), + (0x204, "M", "ȅ"), + (0x205, "V"), + (0x206, "M", "ȇ"), + (0x207, "V"), + (0x208, "M", "ȉ"), + (0x209, "V"), + (0x20A, "M", "ȋ"), + (0x20B, "V"), + (0x20C, "M", "ȍ"), + ] + + +def _seg_5() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x20D, "V"), + (0x20E, "M", "ȏ"), + (0x20F, "V"), + (0x210, "M", "ȑ"), + (0x211, "V"), + (0x212, "M", "ȓ"), + (0x213, "V"), + (0x214, "M", "ȕ"), + (0x215, "V"), + (0x216, "M", "ȗ"), + (0x217, "V"), + (0x218, "M", "ș"), + (0x219, "V"), + (0x21A, "M", "ț"), + (0x21B, "V"), + (0x21C, "M", "ȝ"), + (0x21D, "V"), + (0x21E, "M", "ȟ"), + (0x21F, "V"), + (0x220, "M", "ƞ"), + (0x221, "V"), + (0x222, "M", "ȣ"), + (0x223, "V"), + (0x224, "M", "ȥ"), + (0x225, "V"), + (0x226, "M", "ȧ"), + (0x227, "V"), + (0x228, "M", "ȩ"), + (0x229, "V"), + (0x22A, "M", "ȫ"), + (0x22B, "V"), + (0x22C, "M", "ȭ"), + (0x22D, "V"), + (0x22E, "M", "ȯ"), + (0x22F, "V"), + (0x230, "M", "ȱ"), + (0x231, "V"), + (0x232, "M", "ȳ"), + (0x233, "V"), + (0x23A, "M", "ⱥ"), + (0x23B, "M", "ȼ"), + (0x23C, "V"), + (0x23D, "M", "ƚ"), + (0x23E, "M", "ⱦ"), + (0x23F, "V"), + (0x241, "M", "ɂ"), + (0x242, "V"), + (0x243, "M", "ƀ"), + (0x244, "M", "ʉ"), + (0x245, "M", "ʌ"), + (0x246, "M", "ɇ"), + (0x247, "V"), + (0x248, "M", "ɉ"), + (0x249, "V"), + (0x24A, "M", "ɋ"), + (0x24B, "V"), + (0x24C, "M", "ɍ"), + (0x24D, "V"), + (0x24E, "M", "ɏ"), + (0x24F, "V"), + (0x2B0, "M", "h"), + (0x2B1, "M", "ɦ"), + (0x2B2, "M", "j"), + (0x2B3, "M", "r"), + (0x2B4, "M", "ɹ"), + (0x2B5, "M", "ɻ"), + (0x2B6, "M", "ʁ"), + (0x2B7, "M", "w"), + (0x2B8, "M", "y"), + (0x2B9, "V"), + (0x2D8, "3", " ̆"), + (0x2D9, "3", " ̇"), + (0x2DA, "3", " ̊"), + (0x2DB, "3", " ̨"), + (0x2DC, "3", " ̃"), + (0x2DD, "3", " ̋"), + (0x2DE, "V"), + (0x2E0, "M", "ɣ"), + (0x2E1, "M", "l"), + (0x2E2, "M", "s"), + (0x2E3, "M", "x"), + (0x2E4, "M", "ʕ"), + (0x2E5, "V"), + (0x340, "M", "̀"), + (0x341, "M", "́"), + (0x342, "V"), + (0x343, "M", "̓"), + (0x344, "M", "̈́"), + (0x345, "M", "ι"), + (0x346, "V"), + (0x34F, "I"), + (0x350, "V"), + (0x370, "M", "ͱ"), + (0x371, "V"), + (0x372, "M", "ͳ"), + (0x373, "V"), + (0x374, "M", "ʹ"), + (0x375, "V"), + (0x376, "M", "ͷ"), + (0x377, "V"), + ] + + +def _seg_6() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x378, "X"), + (0x37A, "3", " ι"), + (0x37B, "V"), + (0x37E, "3", ";"), + (0x37F, "M", "ϳ"), + (0x380, "X"), + (0x384, "3", " ́"), + (0x385, "3", " ̈́"), + (0x386, "M", "ά"), + (0x387, "M", "·"), + (0x388, "M", "έ"), + (0x389, "M", "ή"), + (0x38A, "M", "ί"), + (0x38B, "X"), + (0x38C, "M", "ό"), + (0x38D, "X"), + (0x38E, "M", "ύ"), + (0x38F, "M", "ώ"), + (0x390, "V"), + (0x391, "M", "α"), + (0x392, "M", "β"), + (0x393, "M", "γ"), + (0x394, "M", "δ"), + (0x395, "M", "ε"), + (0x396, "M", "ζ"), + (0x397, "M", "η"), + (0x398, "M", "θ"), + (0x399, "M", "ι"), + (0x39A, "M", "κ"), + (0x39B, "M", "λ"), + (0x39C, "M", "μ"), + (0x39D, "M", "ν"), + (0x39E, "M", "ξ"), + (0x39F, "M", "ο"), + (0x3A0, "M", "π"), + (0x3A1, "M", "ρ"), + (0x3A2, "X"), + (0x3A3, "M", "σ"), + (0x3A4, "M", "τ"), + (0x3A5, "M", "υ"), + (0x3A6, "M", "φ"), + (0x3A7, "M", "χ"), + (0x3A8, "M", "ψ"), + (0x3A9, "M", "ω"), + (0x3AA, "M", "ϊ"), + (0x3AB, "M", "ϋ"), + (0x3AC, "V"), + (0x3C2, "D", "σ"), + (0x3C3, "V"), + (0x3CF, "M", "ϗ"), + (0x3D0, "M", "β"), + (0x3D1, "M", "θ"), + (0x3D2, "M", "υ"), + (0x3D3, "M", "ύ"), + (0x3D4, "M", "ϋ"), + (0x3D5, "M", "φ"), + (0x3D6, "M", "π"), + (0x3D7, "V"), + (0x3D8, "M", "ϙ"), + (0x3D9, "V"), + (0x3DA, "M", "ϛ"), + (0x3DB, "V"), + (0x3DC, "M", "ϝ"), + (0x3DD, "V"), + (0x3DE, "M", "ϟ"), + (0x3DF, "V"), + (0x3E0, "M", "ϡ"), + (0x3E1, "V"), + (0x3E2, "M", "ϣ"), + (0x3E3, "V"), + (0x3E4, "M", "ϥ"), + (0x3E5, "V"), + (0x3E6, "M", "ϧ"), + (0x3E7, "V"), + (0x3E8, "M", "ϩ"), + (0x3E9, "V"), + (0x3EA, "M", "ϫ"), + (0x3EB, "V"), + (0x3EC, "M", "ϭ"), + (0x3ED, "V"), + (0x3EE, "M", "ϯ"), + (0x3EF, "V"), + (0x3F0, "M", "κ"), + (0x3F1, "M", "ρ"), + (0x3F2, "M", "σ"), + (0x3F3, "V"), + (0x3F4, "M", "θ"), + (0x3F5, "M", "ε"), + (0x3F6, "V"), + (0x3F7, "M", "ϸ"), + (0x3F8, "V"), + (0x3F9, "M", "σ"), + (0x3FA, "M", "ϻ"), + (0x3FB, "V"), + (0x3FD, "M", "ͻ"), + (0x3FE, "M", "ͼ"), + (0x3FF, "M", "ͽ"), + (0x400, "M", "ѐ"), + (0x401, "M", "ё"), + (0x402, "M", "ђ"), + ] + + +def _seg_7() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x403, "M", "ѓ"), + (0x404, "M", "є"), + (0x405, "M", "ѕ"), + (0x406, "M", "і"), + (0x407, "M", "ї"), + (0x408, "M", "ј"), + (0x409, "M", "љ"), + (0x40A, "M", "њ"), + (0x40B, "M", "ћ"), + (0x40C, "M", "ќ"), + (0x40D, "M", "ѝ"), + (0x40E, "M", "ў"), + (0x40F, "M", "џ"), + (0x410, "M", "а"), + (0x411, "M", "б"), + (0x412, "M", "в"), + (0x413, "M", "г"), + (0x414, "M", "д"), + (0x415, "M", "е"), + (0x416, "M", "ж"), + (0x417, "M", "з"), + (0x418, "M", "и"), + (0x419, "M", "й"), + (0x41A, "M", "к"), + (0x41B, "M", "л"), + (0x41C, "M", "м"), + (0x41D, "M", "н"), + (0x41E, "M", "о"), + (0x41F, "M", "п"), + (0x420, "M", "р"), + (0x421, "M", "с"), + (0x422, "M", "т"), + (0x423, "M", "у"), + (0x424, "M", "ф"), + (0x425, "M", "х"), + (0x426, "M", "ц"), + (0x427, "M", "ч"), + (0x428, "M", "ш"), + (0x429, "M", "щ"), + (0x42A, "M", "ъ"), + (0x42B, "M", "ы"), + (0x42C, "M", "ь"), + (0x42D, "M", "э"), + (0x42E, "M", "ю"), + (0x42F, "M", "я"), + (0x430, "V"), + (0x460, "M", "ѡ"), + (0x461, "V"), + (0x462, "M", "ѣ"), + (0x463, "V"), + (0x464, "M", "ѥ"), + (0x465, "V"), + (0x466, "M", "ѧ"), + (0x467, "V"), + (0x468, "M", "ѩ"), + (0x469, "V"), + (0x46A, "M", "ѫ"), + (0x46B, "V"), + (0x46C, "M", "ѭ"), + (0x46D, "V"), + (0x46E, "M", "ѯ"), + (0x46F, "V"), + (0x470, "M", "ѱ"), + (0x471, "V"), + (0x472, "M", "ѳ"), + (0x473, "V"), + (0x474, "M", "ѵ"), + (0x475, "V"), + (0x476, "M", "ѷ"), + (0x477, "V"), + (0x478, "M", "ѹ"), + (0x479, "V"), + (0x47A, "M", "ѻ"), + (0x47B, "V"), + (0x47C, "M", "ѽ"), + (0x47D, "V"), + (0x47E, "M", "ѿ"), + (0x47F, "V"), + (0x480, "M", "ҁ"), + (0x481, "V"), + (0x48A, "M", "ҋ"), + (0x48B, "V"), + (0x48C, "M", "ҍ"), + (0x48D, "V"), + (0x48E, "M", "ҏ"), + (0x48F, "V"), + (0x490, "M", "ґ"), + (0x491, "V"), + (0x492, "M", "ғ"), + (0x493, "V"), + (0x494, "M", "ҕ"), + (0x495, "V"), + (0x496, "M", "җ"), + (0x497, "V"), + (0x498, "M", "ҙ"), + (0x499, "V"), + (0x49A, "M", "қ"), + (0x49B, "V"), + (0x49C, "M", "ҝ"), + (0x49D, "V"), + ] + + +def _seg_8() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x49E, "M", "ҟ"), + (0x49F, "V"), + (0x4A0, "M", "ҡ"), + (0x4A1, "V"), + (0x4A2, "M", "ң"), + (0x4A3, "V"), + (0x4A4, "M", "ҥ"), + (0x4A5, "V"), + (0x4A6, "M", "ҧ"), + (0x4A7, "V"), + (0x4A8, "M", "ҩ"), + (0x4A9, "V"), + (0x4AA, "M", "ҫ"), + (0x4AB, "V"), + (0x4AC, "M", "ҭ"), + (0x4AD, "V"), + (0x4AE, "M", "ү"), + (0x4AF, "V"), + (0x4B0, "M", "ұ"), + (0x4B1, "V"), + (0x4B2, "M", "ҳ"), + (0x4B3, "V"), + (0x4B4, "M", "ҵ"), + (0x4B5, "V"), + (0x4B6, "M", "ҷ"), + (0x4B7, "V"), + (0x4B8, "M", "ҹ"), + (0x4B9, "V"), + (0x4BA, "M", "һ"), + (0x4BB, "V"), + (0x4BC, "M", "ҽ"), + (0x4BD, "V"), + (0x4BE, "M", "ҿ"), + (0x4BF, "V"), + (0x4C0, "X"), + (0x4C1, "M", "ӂ"), + (0x4C2, "V"), + (0x4C3, "M", "ӄ"), + (0x4C4, "V"), + (0x4C5, "M", "ӆ"), + (0x4C6, "V"), + (0x4C7, "M", "ӈ"), + (0x4C8, "V"), + (0x4C9, "M", "ӊ"), + (0x4CA, "V"), + (0x4CB, "M", "ӌ"), + (0x4CC, "V"), + (0x4CD, "M", "ӎ"), + (0x4CE, "V"), + (0x4D0, "M", "ӑ"), + (0x4D1, "V"), + (0x4D2, "M", "ӓ"), + (0x4D3, "V"), + (0x4D4, "M", "ӕ"), + (0x4D5, "V"), + (0x4D6, "M", "ӗ"), + (0x4D7, "V"), + (0x4D8, "M", "ә"), + (0x4D9, "V"), + (0x4DA, "M", "ӛ"), + (0x4DB, "V"), + (0x4DC, "M", "ӝ"), + (0x4DD, "V"), + (0x4DE, "M", "ӟ"), + (0x4DF, "V"), + (0x4E0, "M", "ӡ"), + (0x4E1, "V"), + (0x4E2, "M", "ӣ"), + (0x4E3, "V"), + (0x4E4, "M", "ӥ"), + (0x4E5, "V"), + (0x4E6, "M", "ӧ"), + (0x4E7, "V"), + (0x4E8, "M", "ө"), + (0x4E9, "V"), + (0x4EA, "M", "ӫ"), + (0x4EB, "V"), + (0x4EC, "M", "ӭ"), + (0x4ED, "V"), + (0x4EE, "M", "ӯ"), + (0x4EF, "V"), + (0x4F0, "M", "ӱ"), + (0x4F1, "V"), + (0x4F2, "M", "ӳ"), + (0x4F3, "V"), + (0x4F4, "M", "ӵ"), + (0x4F5, "V"), + (0x4F6, "M", "ӷ"), + (0x4F7, "V"), + (0x4F8, "M", "ӹ"), + (0x4F9, "V"), + (0x4FA, "M", "ӻ"), + (0x4FB, "V"), + (0x4FC, "M", "ӽ"), + (0x4FD, "V"), + (0x4FE, "M", "ӿ"), + (0x4FF, "V"), + (0x500, "M", "ԁ"), + (0x501, "V"), + (0x502, "M", "ԃ"), + ] + + +def _seg_9() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x503, "V"), + (0x504, "M", "ԅ"), + (0x505, "V"), + (0x506, "M", "ԇ"), + (0x507, "V"), + (0x508, "M", "ԉ"), + (0x509, "V"), + (0x50A, "M", "ԋ"), + (0x50B, "V"), + (0x50C, "M", "ԍ"), + (0x50D, "V"), + (0x50E, "M", "ԏ"), + (0x50F, "V"), + (0x510, "M", "ԑ"), + (0x511, "V"), + (0x512, "M", "ԓ"), + (0x513, "V"), + (0x514, "M", "ԕ"), + (0x515, "V"), + (0x516, "M", "ԗ"), + (0x517, "V"), + (0x518, "M", "ԙ"), + (0x519, "V"), + (0x51A, "M", "ԛ"), + (0x51B, "V"), + (0x51C, "M", "ԝ"), + (0x51D, "V"), + (0x51E, "M", "ԟ"), + (0x51F, "V"), + (0x520, "M", "ԡ"), + (0x521, "V"), + (0x522, "M", "ԣ"), + (0x523, "V"), + (0x524, "M", "ԥ"), + (0x525, "V"), + (0x526, "M", "ԧ"), + (0x527, "V"), + (0x528, "M", "ԩ"), + (0x529, "V"), + (0x52A, "M", "ԫ"), + (0x52B, "V"), + (0x52C, "M", "ԭ"), + (0x52D, "V"), + (0x52E, "M", "ԯ"), + (0x52F, "V"), + (0x530, "X"), + (0x531, "M", "ա"), + (0x532, "M", "բ"), + (0x533, "M", "գ"), + (0x534, "M", "դ"), + (0x535, "M", "ե"), + (0x536, "M", "զ"), + (0x537, "M", "է"), + (0x538, "M", "ը"), + (0x539, "M", "թ"), + (0x53A, "M", "ժ"), + (0x53B, "M", "ի"), + (0x53C, "M", "լ"), + (0x53D, "M", "խ"), + (0x53E, "M", "ծ"), + (0x53F, "M", "կ"), + (0x540, "M", "հ"), + (0x541, "M", "ձ"), + (0x542, "M", "ղ"), + (0x543, "M", "ճ"), + (0x544, "M", "մ"), + (0x545, "M", "յ"), + (0x546, "M", "ն"), + (0x547, "M", "շ"), + (0x548, "M", "ո"), + (0x549, "M", "չ"), + (0x54A, "M", "պ"), + (0x54B, "M", "ջ"), + (0x54C, "M", "ռ"), + (0x54D, "M", "ս"), + (0x54E, "M", "վ"), + (0x54F, "M", "տ"), + (0x550, "M", "ր"), + (0x551, "M", "ց"), + (0x552, "M", "ւ"), + (0x553, "M", "փ"), + (0x554, "M", "ք"), + (0x555, "M", "օ"), + (0x556, "M", "ֆ"), + (0x557, "X"), + (0x559, "V"), + (0x587, "M", "եւ"), + (0x588, "V"), + (0x58B, "X"), + (0x58D, "V"), + (0x590, "X"), + (0x591, "V"), + (0x5C8, "X"), + (0x5D0, "V"), + (0x5EB, "X"), + (0x5EF, "V"), + (0x5F5, "X"), + (0x606, "V"), + (0x61C, "X"), + (0x61D, "V"), + ] + + +def _seg_10() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x675, "M", "اٴ"), + (0x676, "M", "وٴ"), + (0x677, "M", "ۇٴ"), + (0x678, "M", "يٴ"), + (0x679, "V"), + (0x6DD, "X"), + (0x6DE, "V"), + (0x70E, "X"), + (0x710, "V"), + (0x74B, "X"), + (0x74D, "V"), + (0x7B2, "X"), + (0x7C0, "V"), + (0x7FB, "X"), + (0x7FD, "V"), + (0x82E, "X"), + (0x830, "V"), + (0x83F, "X"), + (0x840, "V"), + (0x85C, "X"), + (0x85E, "V"), + (0x85F, "X"), + (0x860, "V"), + (0x86B, "X"), + (0x870, "V"), + (0x88F, "X"), + (0x898, "V"), + (0x8E2, "X"), + (0x8E3, "V"), + (0x958, "M", "क़"), + (0x959, "M", "ख़"), + (0x95A, "M", "ग़"), + (0x95B, "M", "ज़"), + (0x95C, "M", "ड़"), + (0x95D, "M", "ढ़"), + (0x95E, "M", "फ़"), + (0x95F, "M", "य़"), + (0x960, "V"), + (0x984, "X"), + (0x985, "V"), + (0x98D, "X"), + (0x98F, "V"), + (0x991, "X"), + (0x993, "V"), + (0x9A9, "X"), + (0x9AA, "V"), + (0x9B1, "X"), + (0x9B2, "V"), + (0x9B3, "X"), + (0x9B6, "V"), + (0x9BA, "X"), + (0x9BC, "V"), + (0x9C5, "X"), + (0x9C7, "V"), + (0x9C9, "X"), + (0x9CB, "V"), + (0x9CF, "X"), + (0x9D7, "V"), + (0x9D8, "X"), + (0x9DC, "M", "ড়"), + (0x9DD, "M", "ঢ়"), + (0x9DE, "X"), + (0x9DF, "M", "য়"), + (0x9E0, "V"), + (0x9E4, "X"), + (0x9E6, "V"), + (0x9FF, "X"), + (0xA01, "V"), + (0xA04, "X"), + (0xA05, "V"), + (0xA0B, "X"), + (0xA0F, "V"), + (0xA11, "X"), + (0xA13, "V"), + (0xA29, "X"), + (0xA2A, "V"), + (0xA31, "X"), + (0xA32, "V"), + (0xA33, "M", "ਲ਼"), + (0xA34, "X"), + (0xA35, "V"), + (0xA36, "M", "ਸ਼"), + (0xA37, "X"), + (0xA38, "V"), + (0xA3A, "X"), + (0xA3C, "V"), + (0xA3D, "X"), + (0xA3E, "V"), + (0xA43, "X"), + (0xA47, "V"), + (0xA49, "X"), + (0xA4B, "V"), + (0xA4E, "X"), + (0xA51, "V"), + (0xA52, "X"), + (0xA59, "M", "ਖ਼"), + (0xA5A, "M", "ਗ਼"), + (0xA5B, "M", "ਜ਼"), + (0xA5C, "V"), + (0xA5D, "X"), + ] + + +def _seg_11() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xA5E, "M", "ਫ਼"), + (0xA5F, "X"), + (0xA66, "V"), + (0xA77, "X"), + (0xA81, "V"), + (0xA84, "X"), + (0xA85, "V"), + (0xA8E, "X"), + (0xA8F, "V"), + (0xA92, "X"), + (0xA93, "V"), + (0xAA9, "X"), + (0xAAA, "V"), + (0xAB1, "X"), + (0xAB2, "V"), + (0xAB4, "X"), + (0xAB5, "V"), + (0xABA, "X"), + (0xABC, "V"), + (0xAC6, "X"), + (0xAC7, "V"), + (0xACA, "X"), + (0xACB, "V"), + (0xACE, "X"), + (0xAD0, "V"), + (0xAD1, "X"), + (0xAE0, "V"), + (0xAE4, "X"), + (0xAE6, "V"), + (0xAF2, "X"), + (0xAF9, "V"), + (0xB00, "X"), + (0xB01, "V"), + (0xB04, "X"), + (0xB05, "V"), + (0xB0D, "X"), + (0xB0F, "V"), + (0xB11, "X"), + (0xB13, "V"), + (0xB29, "X"), + (0xB2A, "V"), + (0xB31, "X"), + (0xB32, "V"), + (0xB34, "X"), + (0xB35, "V"), + (0xB3A, "X"), + (0xB3C, "V"), + (0xB45, "X"), + (0xB47, "V"), + (0xB49, "X"), + (0xB4B, "V"), + (0xB4E, "X"), + (0xB55, "V"), + (0xB58, "X"), + (0xB5C, "M", "ଡ଼"), + (0xB5D, "M", "ଢ଼"), + (0xB5E, "X"), + (0xB5F, "V"), + (0xB64, "X"), + (0xB66, "V"), + (0xB78, "X"), + (0xB82, "V"), + (0xB84, "X"), + (0xB85, "V"), + (0xB8B, "X"), + (0xB8E, "V"), + (0xB91, "X"), + (0xB92, "V"), + (0xB96, "X"), + (0xB99, "V"), + (0xB9B, "X"), + (0xB9C, "V"), + (0xB9D, "X"), + (0xB9E, "V"), + (0xBA0, "X"), + (0xBA3, "V"), + (0xBA5, "X"), + (0xBA8, "V"), + (0xBAB, "X"), + (0xBAE, "V"), + (0xBBA, "X"), + (0xBBE, "V"), + (0xBC3, "X"), + (0xBC6, "V"), + (0xBC9, "X"), + (0xBCA, "V"), + (0xBCE, "X"), + (0xBD0, "V"), + (0xBD1, "X"), + (0xBD7, "V"), + (0xBD8, "X"), + (0xBE6, "V"), + (0xBFB, "X"), + (0xC00, "V"), + (0xC0D, "X"), + (0xC0E, "V"), + (0xC11, "X"), + (0xC12, "V"), + (0xC29, "X"), + (0xC2A, "V"), + ] + + +def _seg_12() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xC3A, "X"), + (0xC3C, "V"), + (0xC45, "X"), + (0xC46, "V"), + (0xC49, "X"), + (0xC4A, "V"), + (0xC4E, "X"), + (0xC55, "V"), + (0xC57, "X"), + (0xC58, "V"), + (0xC5B, "X"), + (0xC5D, "V"), + (0xC5E, "X"), + (0xC60, "V"), + (0xC64, "X"), + (0xC66, "V"), + (0xC70, "X"), + (0xC77, "V"), + (0xC8D, "X"), + (0xC8E, "V"), + (0xC91, "X"), + (0xC92, "V"), + (0xCA9, "X"), + (0xCAA, "V"), + (0xCB4, "X"), + (0xCB5, "V"), + (0xCBA, "X"), + (0xCBC, "V"), + (0xCC5, "X"), + (0xCC6, "V"), + (0xCC9, "X"), + (0xCCA, "V"), + (0xCCE, "X"), + (0xCD5, "V"), + (0xCD7, "X"), + (0xCDD, "V"), + (0xCDF, "X"), + (0xCE0, "V"), + (0xCE4, "X"), + (0xCE6, "V"), + (0xCF0, "X"), + (0xCF1, "V"), + (0xCF4, "X"), + (0xD00, "V"), + (0xD0D, "X"), + (0xD0E, "V"), + (0xD11, "X"), + (0xD12, "V"), + (0xD45, "X"), + (0xD46, "V"), + (0xD49, "X"), + (0xD4A, "V"), + (0xD50, "X"), + (0xD54, "V"), + (0xD64, "X"), + (0xD66, "V"), + (0xD80, "X"), + (0xD81, "V"), + (0xD84, "X"), + (0xD85, "V"), + (0xD97, "X"), + (0xD9A, "V"), + (0xDB2, "X"), + (0xDB3, "V"), + (0xDBC, "X"), + (0xDBD, "V"), + (0xDBE, "X"), + (0xDC0, "V"), + (0xDC7, "X"), + (0xDCA, "V"), + (0xDCB, "X"), + (0xDCF, "V"), + (0xDD5, "X"), + (0xDD6, "V"), + (0xDD7, "X"), + (0xDD8, "V"), + (0xDE0, "X"), + (0xDE6, "V"), + (0xDF0, "X"), + (0xDF2, "V"), + (0xDF5, "X"), + (0xE01, "V"), + (0xE33, "M", "ํา"), + (0xE34, "V"), + (0xE3B, "X"), + (0xE3F, "V"), + (0xE5C, "X"), + (0xE81, "V"), + (0xE83, "X"), + (0xE84, "V"), + (0xE85, "X"), + (0xE86, "V"), + (0xE8B, "X"), + (0xE8C, "V"), + (0xEA4, "X"), + (0xEA5, "V"), + (0xEA6, "X"), + (0xEA7, "V"), + (0xEB3, "M", "ໍາ"), + (0xEB4, "V"), + ] + + +def _seg_13() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xEBE, "X"), + (0xEC0, "V"), + (0xEC5, "X"), + (0xEC6, "V"), + (0xEC7, "X"), + (0xEC8, "V"), + (0xECF, "X"), + (0xED0, "V"), + (0xEDA, "X"), + (0xEDC, "M", "ຫນ"), + (0xEDD, "M", "ຫມ"), + (0xEDE, "V"), + (0xEE0, "X"), + (0xF00, "V"), + (0xF0C, "M", "་"), + (0xF0D, "V"), + (0xF43, "M", "གྷ"), + (0xF44, "V"), + (0xF48, "X"), + (0xF49, "V"), + (0xF4D, "M", "ཌྷ"), + (0xF4E, "V"), + (0xF52, "M", "དྷ"), + (0xF53, "V"), + (0xF57, "M", "བྷ"), + (0xF58, "V"), + (0xF5C, "M", "ཛྷ"), + (0xF5D, "V"), + (0xF69, "M", "ཀྵ"), + (0xF6A, "V"), + (0xF6D, "X"), + (0xF71, "V"), + (0xF73, "M", "ཱི"), + (0xF74, "V"), + (0xF75, "M", "ཱུ"), + (0xF76, "M", "ྲྀ"), + (0xF77, "M", "ྲཱྀ"), + (0xF78, "M", "ླྀ"), + (0xF79, "M", "ླཱྀ"), + (0xF7A, "V"), + (0xF81, "M", "ཱྀ"), + (0xF82, "V"), + (0xF93, "M", "ྒྷ"), + (0xF94, "V"), + (0xF98, "X"), + (0xF99, "V"), + (0xF9D, "M", "ྜྷ"), + (0xF9E, "V"), + (0xFA2, "M", "ྡྷ"), + (0xFA3, "V"), + (0xFA7, "M", "ྦྷ"), + (0xFA8, "V"), + (0xFAC, "M", "ྫྷ"), + (0xFAD, "V"), + (0xFB9, "M", "ྐྵ"), + (0xFBA, "V"), + (0xFBD, "X"), + (0xFBE, "V"), + (0xFCD, "X"), + (0xFCE, "V"), + (0xFDB, "X"), + (0x1000, "V"), + (0x10A0, "X"), + (0x10C7, "M", "ⴧ"), + (0x10C8, "X"), + (0x10CD, "M", "ⴭ"), + (0x10CE, "X"), + (0x10D0, "V"), + (0x10FC, "M", "ნ"), + (0x10FD, "V"), + (0x115F, "X"), + (0x1161, "V"), + (0x1249, "X"), + (0x124A, "V"), + (0x124E, "X"), + (0x1250, "V"), + (0x1257, "X"), + (0x1258, "V"), + (0x1259, "X"), + (0x125A, "V"), + (0x125E, "X"), + (0x1260, "V"), + (0x1289, "X"), + (0x128A, "V"), + (0x128E, "X"), + (0x1290, "V"), + (0x12B1, "X"), + (0x12B2, "V"), + (0x12B6, "X"), + (0x12B8, "V"), + (0x12BF, "X"), + (0x12C0, "V"), + (0x12C1, "X"), + (0x12C2, "V"), + (0x12C6, "X"), + (0x12C8, "V"), + (0x12D7, "X"), + (0x12D8, "V"), + (0x1311, "X"), + (0x1312, "V"), + ] + + +def _seg_14() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1316, "X"), + (0x1318, "V"), + (0x135B, "X"), + (0x135D, "V"), + (0x137D, "X"), + (0x1380, "V"), + (0x139A, "X"), + (0x13A0, "V"), + (0x13F6, "X"), + (0x13F8, "M", "Ᏸ"), + (0x13F9, "M", "Ᏹ"), + (0x13FA, "M", "Ᏺ"), + (0x13FB, "M", "Ᏻ"), + (0x13FC, "M", "Ᏼ"), + (0x13FD, "M", "Ᏽ"), + (0x13FE, "X"), + (0x1400, "V"), + (0x1680, "X"), + (0x1681, "V"), + (0x169D, "X"), + (0x16A0, "V"), + (0x16F9, "X"), + (0x1700, "V"), + (0x1716, "X"), + (0x171F, "V"), + (0x1737, "X"), + (0x1740, "V"), + (0x1754, "X"), + (0x1760, "V"), + (0x176D, "X"), + (0x176E, "V"), + (0x1771, "X"), + (0x1772, "V"), + (0x1774, "X"), + (0x1780, "V"), + (0x17B4, "X"), + (0x17B6, "V"), + (0x17DE, "X"), + (0x17E0, "V"), + (0x17EA, "X"), + (0x17F0, "V"), + (0x17FA, "X"), + (0x1800, "V"), + (0x1806, "X"), + (0x1807, "V"), + (0x180B, "I"), + (0x180E, "X"), + (0x180F, "I"), + (0x1810, "V"), + (0x181A, "X"), + (0x1820, "V"), + (0x1879, "X"), + (0x1880, "V"), + (0x18AB, "X"), + (0x18B0, "V"), + (0x18F6, "X"), + (0x1900, "V"), + (0x191F, "X"), + (0x1920, "V"), + (0x192C, "X"), + (0x1930, "V"), + (0x193C, "X"), + (0x1940, "V"), + (0x1941, "X"), + (0x1944, "V"), + (0x196E, "X"), + (0x1970, "V"), + (0x1975, "X"), + (0x1980, "V"), + (0x19AC, "X"), + (0x19B0, "V"), + (0x19CA, "X"), + (0x19D0, "V"), + (0x19DB, "X"), + (0x19DE, "V"), + (0x1A1C, "X"), + (0x1A1E, "V"), + (0x1A5F, "X"), + (0x1A60, "V"), + (0x1A7D, "X"), + (0x1A7F, "V"), + (0x1A8A, "X"), + (0x1A90, "V"), + (0x1A9A, "X"), + (0x1AA0, "V"), + (0x1AAE, "X"), + (0x1AB0, "V"), + (0x1ACF, "X"), + (0x1B00, "V"), + (0x1B4D, "X"), + (0x1B50, "V"), + (0x1B7F, "X"), + (0x1B80, "V"), + (0x1BF4, "X"), + (0x1BFC, "V"), + (0x1C38, "X"), + (0x1C3B, "V"), + (0x1C4A, "X"), + (0x1C4D, "V"), + (0x1C80, "M", "в"), + ] + + +def _seg_15() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1C81, "M", "д"), + (0x1C82, "M", "о"), + (0x1C83, "M", "с"), + (0x1C84, "M", "т"), + (0x1C86, "M", "ъ"), + (0x1C87, "M", "ѣ"), + (0x1C88, "M", "ꙋ"), + (0x1C89, "X"), + (0x1C90, "M", "ა"), + (0x1C91, "M", "ბ"), + (0x1C92, "M", "გ"), + (0x1C93, "M", "დ"), + (0x1C94, "M", "ე"), + (0x1C95, "M", "ვ"), + (0x1C96, "M", "ზ"), + (0x1C97, "M", "თ"), + (0x1C98, "M", "ი"), + (0x1C99, "M", "კ"), + (0x1C9A, "M", "ლ"), + (0x1C9B, "M", "მ"), + (0x1C9C, "M", "ნ"), + (0x1C9D, "M", "ო"), + (0x1C9E, "M", "პ"), + (0x1C9F, "M", "ჟ"), + (0x1CA0, "M", "რ"), + (0x1CA1, "M", "ს"), + (0x1CA2, "M", "ტ"), + (0x1CA3, "M", "უ"), + (0x1CA4, "M", "ფ"), + (0x1CA5, "M", "ქ"), + (0x1CA6, "M", "ღ"), + (0x1CA7, "M", "ყ"), + (0x1CA8, "M", "შ"), + (0x1CA9, "M", "ჩ"), + (0x1CAA, "M", "ც"), + (0x1CAB, "M", "ძ"), + (0x1CAC, "M", "წ"), + (0x1CAD, "M", "ჭ"), + (0x1CAE, "M", "ხ"), + (0x1CAF, "M", "ჯ"), + (0x1CB0, "M", "ჰ"), + (0x1CB1, "M", "ჱ"), + (0x1CB2, "M", "ჲ"), + (0x1CB3, "M", "ჳ"), + (0x1CB4, "M", "ჴ"), + (0x1CB5, "M", "ჵ"), + (0x1CB6, "M", "ჶ"), + (0x1CB7, "M", "ჷ"), + (0x1CB8, "M", "ჸ"), + (0x1CB9, "M", "ჹ"), + (0x1CBA, "M", "ჺ"), + (0x1CBB, "X"), + (0x1CBD, "M", "ჽ"), + (0x1CBE, "M", "ჾ"), + (0x1CBF, "M", "ჿ"), + (0x1CC0, "V"), + (0x1CC8, "X"), + (0x1CD0, "V"), + (0x1CFB, "X"), + (0x1D00, "V"), + (0x1D2C, "M", "a"), + (0x1D2D, "M", "æ"), + (0x1D2E, "M", "b"), + (0x1D2F, "V"), + (0x1D30, "M", "d"), + (0x1D31, "M", "e"), + (0x1D32, "M", "ǝ"), + (0x1D33, "M", "g"), + (0x1D34, "M", "h"), + (0x1D35, "M", "i"), + (0x1D36, "M", "j"), + (0x1D37, "M", "k"), + (0x1D38, "M", "l"), + (0x1D39, "M", "m"), + (0x1D3A, "M", "n"), + (0x1D3B, "V"), + (0x1D3C, "M", "o"), + (0x1D3D, "M", "ȣ"), + (0x1D3E, "M", "p"), + (0x1D3F, "M", "r"), + (0x1D40, "M", "t"), + (0x1D41, "M", "u"), + (0x1D42, "M", "w"), + (0x1D43, "M", "a"), + (0x1D44, "M", "ɐ"), + (0x1D45, "M", "ɑ"), + (0x1D46, "M", "ᴂ"), + (0x1D47, "M", "b"), + (0x1D48, "M", "d"), + (0x1D49, "M", "e"), + (0x1D4A, "M", "ə"), + (0x1D4B, "M", "ɛ"), + (0x1D4C, "M", "ɜ"), + (0x1D4D, "M", "g"), + (0x1D4E, "V"), + (0x1D4F, "M", "k"), + (0x1D50, "M", "m"), + (0x1D51, "M", "ŋ"), + (0x1D52, "M", "o"), + (0x1D53, "M", "ɔ"), + ] + + +def _seg_16() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D54, "M", "ᴖ"), + (0x1D55, "M", "ᴗ"), + (0x1D56, "M", "p"), + (0x1D57, "M", "t"), + (0x1D58, "M", "u"), + (0x1D59, "M", "ᴝ"), + (0x1D5A, "M", "ɯ"), + (0x1D5B, "M", "v"), + (0x1D5C, "M", "ᴥ"), + (0x1D5D, "M", "β"), + (0x1D5E, "M", "γ"), + (0x1D5F, "M", "δ"), + (0x1D60, "M", "φ"), + (0x1D61, "M", "χ"), + (0x1D62, "M", "i"), + (0x1D63, "M", "r"), + (0x1D64, "M", "u"), + (0x1D65, "M", "v"), + (0x1D66, "M", "β"), + (0x1D67, "M", "γ"), + (0x1D68, "M", "ρ"), + (0x1D69, "M", "φ"), + (0x1D6A, "M", "χ"), + (0x1D6B, "V"), + (0x1D78, "M", "н"), + (0x1D79, "V"), + (0x1D9B, "M", "ɒ"), + (0x1D9C, "M", "c"), + (0x1D9D, "M", "ɕ"), + (0x1D9E, "M", "ð"), + (0x1D9F, "M", "ɜ"), + (0x1DA0, "M", "f"), + (0x1DA1, "M", "ɟ"), + (0x1DA2, "M", "ɡ"), + (0x1DA3, "M", "ɥ"), + (0x1DA4, "M", "ɨ"), + (0x1DA5, "M", "ɩ"), + (0x1DA6, "M", "ɪ"), + (0x1DA7, "M", "ᵻ"), + (0x1DA8, "M", "ʝ"), + (0x1DA9, "M", "ɭ"), + (0x1DAA, "M", "ᶅ"), + (0x1DAB, "M", "ʟ"), + (0x1DAC, "M", "ɱ"), + (0x1DAD, "M", "ɰ"), + (0x1DAE, "M", "ɲ"), + (0x1DAF, "M", "ɳ"), + (0x1DB0, "M", "ɴ"), + (0x1DB1, "M", "ɵ"), + (0x1DB2, "M", "ɸ"), + (0x1DB3, "M", "ʂ"), + (0x1DB4, "M", "ʃ"), + (0x1DB5, "M", "ƫ"), + (0x1DB6, "M", "ʉ"), + (0x1DB7, "M", "ʊ"), + (0x1DB8, "M", "ᴜ"), + (0x1DB9, "M", "ʋ"), + (0x1DBA, "M", "ʌ"), + (0x1DBB, "M", "z"), + (0x1DBC, "M", "ʐ"), + (0x1DBD, "M", "ʑ"), + (0x1DBE, "M", "ʒ"), + (0x1DBF, "M", "θ"), + (0x1DC0, "V"), + (0x1E00, "M", "ḁ"), + (0x1E01, "V"), + (0x1E02, "M", "ḃ"), + (0x1E03, "V"), + (0x1E04, "M", "ḅ"), + (0x1E05, "V"), + (0x1E06, "M", "ḇ"), + (0x1E07, "V"), + (0x1E08, "M", "ḉ"), + (0x1E09, "V"), + (0x1E0A, "M", "ḋ"), + (0x1E0B, "V"), + (0x1E0C, "M", "ḍ"), + (0x1E0D, "V"), + (0x1E0E, "M", "ḏ"), + (0x1E0F, "V"), + (0x1E10, "M", "ḑ"), + (0x1E11, "V"), + (0x1E12, "M", "ḓ"), + (0x1E13, "V"), + (0x1E14, "M", "ḕ"), + (0x1E15, "V"), + (0x1E16, "M", "ḗ"), + (0x1E17, "V"), + (0x1E18, "M", "ḙ"), + (0x1E19, "V"), + (0x1E1A, "M", "ḛ"), + (0x1E1B, "V"), + (0x1E1C, "M", "ḝ"), + (0x1E1D, "V"), + (0x1E1E, "M", "ḟ"), + (0x1E1F, "V"), + (0x1E20, "M", "ḡ"), + (0x1E21, "V"), + (0x1E22, "M", "ḣ"), + (0x1E23, "V"), + ] + + +def _seg_17() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1E24, "M", "ḥ"), + (0x1E25, "V"), + (0x1E26, "M", "ḧ"), + (0x1E27, "V"), + (0x1E28, "M", "ḩ"), + (0x1E29, "V"), + (0x1E2A, "M", "ḫ"), + (0x1E2B, "V"), + (0x1E2C, "M", "ḭ"), + (0x1E2D, "V"), + (0x1E2E, "M", "ḯ"), + (0x1E2F, "V"), + (0x1E30, "M", "ḱ"), + (0x1E31, "V"), + (0x1E32, "M", "ḳ"), + (0x1E33, "V"), + (0x1E34, "M", "ḵ"), + (0x1E35, "V"), + (0x1E36, "M", "ḷ"), + (0x1E37, "V"), + (0x1E38, "M", "ḹ"), + (0x1E39, "V"), + (0x1E3A, "M", "ḻ"), + (0x1E3B, "V"), + (0x1E3C, "M", "ḽ"), + (0x1E3D, "V"), + (0x1E3E, "M", "ḿ"), + (0x1E3F, "V"), + (0x1E40, "M", "ṁ"), + (0x1E41, "V"), + (0x1E42, "M", "ṃ"), + (0x1E43, "V"), + (0x1E44, "M", "ṅ"), + (0x1E45, "V"), + (0x1E46, "M", "ṇ"), + (0x1E47, "V"), + (0x1E48, "M", "ṉ"), + (0x1E49, "V"), + (0x1E4A, "M", "ṋ"), + (0x1E4B, "V"), + (0x1E4C, "M", "ṍ"), + (0x1E4D, "V"), + (0x1E4E, "M", "ṏ"), + (0x1E4F, "V"), + (0x1E50, "M", "ṑ"), + (0x1E51, "V"), + (0x1E52, "M", "ṓ"), + (0x1E53, "V"), + (0x1E54, "M", "ṕ"), + (0x1E55, "V"), + (0x1E56, "M", "ṗ"), + (0x1E57, "V"), + (0x1E58, "M", "ṙ"), + (0x1E59, "V"), + (0x1E5A, "M", "ṛ"), + (0x1E5B, "V"), + (0x1E5C, "M", "ṝ"), + (0x1E5D, "V"), + (0x1E5E, "M", "ṟ"), + (0x1E5F, "V"), + (0x1E60, "M", "ṡ"), + (0x1E61, "V"), + (0x1E62, "M", "ṣ"), + (0x1E63, "V"), + (0x1E64, "M", "ṥ"), + (0x1E65, "V"), + (0x1E66, "M", "ṧ"), + (0x1E67, "V"), + (0x1E68, "M", "ṩ"), + (0x1E69, "V"), + (0x1E6A, "M", "ṫ"), + (0x1E6B, "V"), + (0x1E6C, "M", "ṭ"), + (0x1E6D, "V"), + (0x1E6E, "M", "ṯ"), + (0x1E6F, "V"), + (0x1E70, "M", "ṱ"), + (0x1E71, "V"), + (0x1E72, "M", "ṳ"), + (0x1E73, "V"), + (0x1E74, "M", "ṵ"), + (0x1E75, "V"), + (0x1E76, "M", "ṷ"), + (0x1E77, "V"), + (0x1E78, "M", "ṹ"), + (0x1E79, "V"), + (0x1E7A, "M", "ṻ"), + (0x1E7B, "V"), + (0x1E7C, "M", "ṽ"), + (0x1E7D, "V"), + (0x1E7E, "M", "ṿ"), + (0x1E7F, "V"), + (0x1E80, "M", "ẁ"), + (0x1E81, "V"), + (0x1E82, "M", "ẃ"), + (0x1E83, "V"), + (0x1E84, "M", "ẅ"), + (0x1E85, "V"), + (0x1E86, "M", "ẇ"), + (0x1E87, "V"), + ] + + +def _seg_18() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1E88, "M", "ẉ"), + (0x1E89, "V"), + (0x1E8A, "M", "ẋ"), + (0x1E8B, "V"), + (0x1E8C, "M", "ẍ"), + (0x1E8D, "V"), + (0x1E8E, "M", "ẏ"), + (0x1E8F, "V"), + (0x1E90, "M", "ẑ"), + (0x1E91, "V"), + (0x1E92, "M", "ẓ"), + (0x1E93, "V"), + (0x1E94, "M", "ẕ"), + (0x1E95, "V"), + (0x1E9A, "M", "aʾ"), + (0x1E9B, "M", "ṡ"), + (0x1E9C, "V"), + (0x1E9E, "M", "ß"), + (0x1E9F, "V"), + (0x1EA0, "M", "ạ"), + (0x1EA1, "V"), + (0x1EA2, "M", "ả"), + (0x1EA3, "V"), + (0x1EA4, "M", "ấ"), + (0x1EA5, "V"), + (0x1EA6, "M", "ầ"), + (0x1EA7, "V"), + (0x1EA8, "M", "ẩ"), + (0x1EA9, "V"), + (0x1EAA, "M", "ẫ"), + (0x1EAB, "V"), + (0x1EAC, "M", "ậ"), + (0x1EAD, "V"), + (0x1EAE, "M", "ắ"), + (0x1EAF, "V"), + (0x1EB0, "M", "ằ"), + (0x1EB1, "V"), + (0x1EB2, "M", "ẳ"), + (0x1EB3, "V"), + (0x1EB4, "M", "ẵ"), + (0x1EB5, "V"), + (0x1EB6, "M", "ặ"), + (0x1EB7, "V"), + (0x1EB8, "M", "ẹ"), + (0x1EB9, "V"), + (0x1EBA, "M", "ẻ"), + (0x1EBB, "V"), + (0x1EBC, "M", "ẽ"), + (0x1EBD, "V"), + (0x1EBE, "M", "ế"), + (0x1EBF, "V"), + (0x1EC0, "M", "ề"), + (0x1EC1, "V"), + (0x1EC2, "M", "ể"), + (0x1EC3, "V"), + (0x1EC4, "M", "ễ"), + (0x1EC5, "V"), + (0x1EC6, "M", "ệ"), + (0x1EC7, "V"), + (0x1EC8, "M", "ỉ"), + (0x1EC9, "V"), + (0x1ECA, "M", "ị"), + (0x1ECB, "V"), + (0x1ECC, "M", "ọ"), + (0x1ECD, "V"), + (0x1ECE, "M", "ỏ"), + (0x1ECF, "V"), + (0x1ED0, "M", "ố"), + (0x1ED1, "V"), + (0x1ED2, "M", "ồ"), + (0x1ED3, "V"), + (0x1ED4, "M", "ổ"), + (0x1ED5, "V"), + (0x1ED6, "M", "ỗ"), + (0x1ED7, "V"), + (0x1ED8, "M", "ộ"), + (0x1ED9, "V"), + (0x1EDA, "M", "ớ"), + (0x1EDB, "V"), + (0x1EDC, "M", "ờ"), + (0x1EDD, "V"), + (0x1EDE, "M", "ở"), + (0x1EDF, "V"), + (0x1EE0, "M", "ỡ"), + (0x1EE1, "V"), + (0x1EE2, "M", "ợ"), + (0x1EE3, "V"), + (0x1EE4, "M", "ụ"), + (0x1EE5, "V"), + (0x1EE6, "M", "ủ"), + (0x1EE7, "V"), + (0x1EE8, "M", "ứ"), + (0x1EE9, "V"), + (0x1EEA, "M", "ừ"), + (0x1EEB, "V"), + (0x1EEC, "M", "ử"), + (0x1EED, "V"), + (0x1EEE, "M", "ữ"), + (0x1EEF, "V"), + (0x1EF0, "M", "ự"), + ] + + +def _seg_19() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1EF1, "V"), + (0x1EF2, "M", "ỳ"), + (0x1EF3, "V"), + (0x1EF4, "M", "ỵ"), + (0x1EF5, "V"), + (0x1EF6, "M", "ỷ"), + (0x1EF7, "V"), + (0x1EF8, "M", "ỹ"), + (0x1EF9, "V"), + (0x1EFA, "M", "ỻ"), + (0x1EFB, "V"), + (0x1EFC, "M", "ỽ"), + (0x1EFD, "V"), + (0x1EFE, "M", "ỿ"), + (0x1EFF, "V"), + (0x1F08, "M", "ἀ"), + (0x1F09, "M", "ἁ"), + (0x1F0A, "M", "ἂ"), + (0x1F0B, "M", "ἃ"), + (0x1F0C, "M", "ἄ"), + (0x1F0D, "M", "ἅ"), + (0x1F0E, "M", "ἆ"), + (0x1F0F, "M", "ἇ"), + (0x1F10, "V"), + (0x1F16, "X"), + (0x1F18, "M", "ἐ"), + (0x1F19, "M", "ἑ"), + (0x1F1A, "M", "ἒ"), + (0x1F1B, "M", "ἓ"), + (0x1F1C, "M", "ἔ"), + (0x1F1D, "M", "ἕ"), + (0x1F1E, "X"), + (0x1F20, "V"), + (0x1F28, "M", "ἠ"), + (0x1F29, "M", "ἡ"), + (0x1F2A, "M", "ἢ"), + (0x1F2B, "M", "ἣ"), + (0x1F2C, "M", "ἤ"), + (0x1F2D, "M", "ἥ"), + (0x1F2E, "M", "ἦ"), + (0x1F2F, "M", "ἧ"), + (0x1F30, "V"), + (0x1F38, "M", "ἰ"), + (0x1F39, "M", "ἱ"), + (0x1F3A, "M", "ἲ"), + (0x1F3B, "M", "ἳ"), + (0x1F3C, "M", "ἴ"), + (0x1F3D, "M", "ἵ"), + (0x1F3E, "M", "ἶ"), + (0x1F3F, "M", "ἷ"), + (0x1F40, "V"), + (0x1F46, "X"), + (0x1F48, "M", "ὀ"), + (0x1F49, "M", "ὁ"), + (0x1F4A, "M", "ὂ"), + (0x1F4B, "M", "ὃ"), + (0x1F4C, "M", "ὄ"), + (0x1F4D, "M", "ὅ"), + (0x1F4E, "X"), + (0x1F50, "V"), + (0x1F58, "X"), + (0x1F59, "M", "ὑ"), + (0x1F5A, "X"), + (0x1F5B, "M", "ὓ"), + (0x1F5C, "X"), + (0x1F5D, "M", "ὕ"), + (0x1F5E, "X"), + (0x1F5F, "M", "ὗ"), + (0x1F60, "V"), + (0x1F68, "M", "ὠ"), + (0x1F69, "M", "ὡ"), + (0x1F6A, "M", "ὢ"), + (0x1F6B, "M", "ὣ"), + (0x1F6C, "M", "ὤ"), + (0x1F6D, "M", "ὥ"), + (0x1F6E, "M", "ὦ"), + (0x1F6F, "M", "ὧ"), + (0x1F70, "V"), + (0x1F71, "M", "ά"), + (0x1F72, "V"), + (0x1F73, "M", "έ"), + (0x1F74, "V"), + (0x1F75, "M", "ή"), + (0x1F76, "V"), + (0x1F77, "M", "ί"), + (0x1F78, "V"), + (0x1F79, "M", "ό"), + (0x1F7A, "V"), + (0x1F7B, "M", "ύ"), + (0x1F7C, "V"), + (0x1F7D, "M", "ώ"), + (0x1F7E, "X"), + (0x1F80, "M", "ἀι"), + (0x1F81, "M", "ἁι"), + (0x1F82, "M", "ἂι"), + (0x1F83, "M", "ἃι"), + (0x1F84, "M", "ἄι"), + (0x1F85, "M", "ἅι"), + (0x1F86, "M", "ἆι"), + (0x1F87, "M", "ἇι"), + ] + + +def _seg_20() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1F88, "M", "ἀι"), + (0x1F89, "M", "ἁι"), + (0x1F8A, "M", "ἂι"), + (0x1F8B, "M", "ἃι"), + (0x1F8C, "M", "ἄι"), + (0x1F8D, "M", "ἅι"), + (0x1F8E, "M", "ἆι"), + (0x1F8F, "M", "ἇι"), + (0x1F90, "M", "ἠι"), + (0x1F91, "M", "ἡι"), + (0x1F92, "M", "ἢι"), + (0x1F93, "M", "ἣι"), + (0x1F94, "M", "ἤι"), + (0x1F95, "M", "ἥι"), + (0x1F96, "M", "ἦι"), + (0x1F97, "M", "ἧι"), + (0x1F98, "M", "ἠι"), + (0x1F99, "M", "ἡι"), + (0x1F9A, "M", "ἢι"), + (0x1F9B, "M", "ἣι"), + (0x1F9C, "M", "ἤι"), + (0x1F9D, "M", "ἥι"), + (0x1F9E, "M", "ἦι"), + (0x1F9F, "M", "ἧι"), + (0x1FA0, "M", "ὠι"), + (0x1FA1, "M", "ὡι"), + (0x1FA2, "M", "ὢι"), + (0x1FA3, "M", "ὣι"), + (0x1FA4, "M", "ὤι"), + (0x1FA5, "M", "ὥι"), + (0x1FA6, "M", "ὦι"), + (0x1FA7, "M", "ὧι"), + (0x1FA8, "M", "ὠι"), + (0x1FA9, "M", "ὡι"), + (0x1FAA, "M", "ὢι"), + (0x1FAB, "M", "ὣι"), + (0x1FAC, "M", "ὤι"), + (0x1FAD, "M", "ὥι"), + (0x1FAE, "M", "ὦι"), + (0x1FAF, "M", "ὧι"), + (0x1FB0, "V"), + (0x1FB2, "M", "ὰι"), + (0x1FB3, "M", "αι"), + (0x1FB4, "M", "άι"), + (0x1FB5, "X"), + (0x1FB6, "V"), + (0x1FB7, "M", "ᾶι"), + (0x1FB8, "M", "ᾰ"), + (0x1FB9, "M", "ᾱ"), + (0x1FBA, "M", "ὰ"), + (0x1FBB, "M", "ά"), + (0x1FBC, "M", "αι"), + (0x1FBD, "3", " ̓"), + (0x1FBE, "M", "ι"), + (0x1FBF, "3", " ̓"), + (0x1FC0, "3", " ͂"), + (0x1FC1, "3", " ̈͂"), + (0x1FC2, "M", "ὴι"), + (0x1FC3, "M", "ηι"), + (0x1FC4, "M", "ήι"), + (0x1FC5, "X"), + (0x1FC6, "V"), + (0x1FC7, "M", "ῆι"), + (0x1FC8, "M", "ὲ"), + (0x1FC9, "M", "έ"), + (0x1FCA, "M", "ὴ"), + (0x1FCB, "M", "ή"), + (0x1FCC, "M", "ηι"), + (0x1FCD, "3", " ̓̀"), + (0x1FCE, "3", " ̓́"), + (0x1FCF, "3", " ̓͂"), + (0x1FD0, "V"), + (0x1FD3, "M", "ΐ"), + (0x1FD4, "X"), + (0x1FD6, "V"), + (0x1FD8, "M", "ῐ"), + (0x1FD9, "M", "ῑ"), + (0x1FDA, "M", "ὶ"), + (0x1FDB, "M", "ί"), + (0x1FDC, "X"), + (0x1FDD, "3", " ̔̀"), + (0x1FDE, "3", " ̔́"), + (0x1FDF, "3", " ̔͂"), + (0x1FE0, "V"), + (0x1FE3, "M", "ΰ"), + (0x1FE4, "V"), + (0x1FE8, "M", "ῠ"), + (0x1FE9, "M", "ῡ"), + (0x1FEA, "M", "ὺ"), + (0x1FEB, "M", "ύ"), + (0x1FEC, "M", "ῥ"), + (0x1FED, "3", " ̈̀"), + (0x1FEE, "3", " ̈́"), + (0x1FEF, "3", "`"), + (0x1FF0, "X"), + (0x1FF2, "M", "ὼι"), + (0x1FF3, "M", "ωι"), + (0x1FF4, "M", "ώι"), + (0x1FF5, "X"), + (0x1FF6, "V"), + ] + + +def _seg_21() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1FF7, "M", "ῶι"), + (0x1FF8, "M", "ὸ"), + (0x1FF9, "M", "ό"), + (0x1FFA, "M", "ὼ"), + (0x1FFB, "M", "ώ"), + (0x1FFC, "M", "ωι"), + (0x1FFD, "3", " ́"), + (0x1FFE, "3", " ̔"), + (0x1FFF, "X"), + (0x2000, "3", " "), + (0x200B, "I"), + (0x200C, "D", ""), + (0x200E, "X"), + (0x2010, "V"), + (0x2011, "M", "‐"), + (0x2012, "V"), + (0x2017, "3", " ̳"), + (0x2018, "V"), + (0x2024, "X"), + (0x2027, "V"), + (0x2028, "X"), + (0x202F, "3", " "), + (0x2030, "V"), + (0x2033, "M", "′′"), + (0x2034, "M", "′′′"), + (0x2035, "V"), + (0x2036, "M", "‵‵"), + (0x2037, "M", "‵‵‵"), + (0x2038, "V"), + (0x203C, "3", "!!"), + (0x203D, "V"), + (0x203E, "3", " ̅"), + (0x203F, "V"), + (0x2047, "3", "??"), + (0x2048, "3", "?!"), + (0x2049, "3", "!?"), + (0x204A, "V"), + (0x2057, "M", "′′′′"), + (0x2058, "V"), + (0x205F, "3", " "), + (0x2060, "I"), + (0x2061, "X"), + (0x2064, "I"), + (0x2065, "X"), + (0x2070, "M", "0"), + (0x2071, "M", "i"), + (0x2072, "X"), + (0x2074, "M", "4"), + (0x2075, "M", "5"), + (0x2076, "M", "6"), + (0x2077, "M", "7"), + (0x2078, "M", "8"), + (0x2079, "M", "9"), + (0x207A, "3", "+"), + (0x207B, "M", "−"), + (0x207C, "3", "="), + (0x207D, "3", "("), + (0x207E, "3", ")"), + (0x207F, "M", "n"), + (0x2080, "M", "0"), + (0x2081, "M", "1"), + (0x2082, "M", "2"), + (0x2083, "M", "3"), + (0x2084, "M", "4"), + (0x2085, "M", "5"), + (0x2086, "M", "6"), + (0x2087, "M", "7"), + (0x2088, "M", "8"), + (0x2089, "M", "9"), + (0x208A, "3", "+"), + (0x208B, "M", "−"), + (0x208C, "3", "="), + (0x208D, "3", "("), + (0x208E, "3", ")"), + (0x208F, "X"), + (0x2090, "M", "a"), + (0x2091, "M", "e"), + (0x2092, "M", "o"), + (0x2093, "M", "x"), + (0x2094, "M", "ə"), + (0x2095, "M", "h"), + (0x2096, "M", "k"), + (0x2097, "M", "l"), + (0x2098, "M", "m"), + (0x2099, "M", "n"), + (0x209A, "M", "p"), + (0x209B, "M", "s"), + (0x209C, "M", "t"), + (0x209D, "X"), + (0x20A0, "V"), + (0x20A8, "M", "rs"), + (0x20A9, "V"), + (0x20C1, "X"), + (0x20D0, "V"), + (0x20F1, "X"), + (0x2100, "3", "a/c"), + (0x2101, "3", "a/s"), + (0x2102, "M", "c"), + (0x2103, "M", "°c"), + (0x2104, "V"), + ] + + +def _seg_22() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2105, "3", "c/o"), + (0x2106, "3", "c/u"), + (0x2107, "M", "ɛ"), + (0x2108, "V"), + (0x2109, "M", "°f"), + (0x210A, "M", "g"), + (0x210B, "M", "h"), + (0x210F, "M", "ħ"), + (0x2110, "M", "i"), + (0x2112, "M", "l"), + (0x2114, "V"), + (0x2115, "M", "n"), + (0x2116, "M", "no"), + (0x2117, "V"), + (0x2119, "M", "p"), + (0x211A, "M", "q"), + (0x211B, "M", "r"), + (0x211E, "V"), + (0x2120, "M", "sm"), + (0x2121, "M", "tel"), + (0x2122, "M", "tm"), + (0x2123, "V"), + (0x2124, "M", "z"), + (0x2125, "V"), + (0x2126, "M", "ω"), + (0x2127, "V"), + (0x2128, "M", "z"), + (0x2129, "V"), + (0x212A, "M", "k"), + (0x212B, "M", "å"), + (0x212C, "M", "b"), + (0x212D, "M", "c"), + (0x212E, "V"), + (0x212F, "M", "e"), + (0x2131, "M", "f"), + (0x2132, "X"), + (0x2133, "M", "m"), + (0x2134, "M", "o"), + (0x2135, "M", "א"), + (0x2136, "M", "ב"), + (0x2137, "M", "ג"), + (0x2138, "M", "ד"), + (0x2139, "M", "i"), + (0x213A, "V"), + (0x213B, "M", "fax"), + (0x213C, "M", "π"), + (0x213D, "M", "γ"), + (0x213F, "M", "π"), + (0x2140, "M", "∑"), + (0x2141, "V"), + (0x2145, "M", "d"), + (0x2147, "M", "e"), + (0x2148, "M", "i"), + (0x2149, "M", "j"), + (0x214A, "V"), + (0x2150, "M", "1⁄7"), + (0x2151, "M", "1⁄9"), + (0x2152, "M", "1⁄10"), + (0x2153, "M", "1⁄3"), + (0x2154, "M", "2⁄3"), + (0x2155, "M", "1⁄5"), + (0x2156, "M", "2⁄5"), + (0x2157, "M", "3⁄5"), + (0x2158, "M", "4⁄5"), + (0x2159, "M", "1⁄6"), + (0x215A, "M", "5⁄6"), + (0x215B, "M", "1⁄8"), + (0x215C, "M", "3⁄8"), + (0x215D, "M", "5⁄8"), + (0x215E, "M", "7⁄8"), + (0x215F, "M", "1⁄"), + (0x2160, "M", "i"), + (0x2161, "M", "ii"), + (0x2162, "M", "iii"), + (0x2163, "M", "iv"), + (0x2164, "M", "v"), + (0x2165, "M", "vi"), + (0x2166, "M", "vii"), + (0x2167, "M", "viii"), + (0x2168, "M", "ix"), + (0x2169, "M", "x"), + (0x216A, "M", "xi"), + (0x216B, "M", "xii"), + (0x216C, "M", "l"), + (0x216D, "M", "c"), + (0x216E, "M", "d"), + (0x216F, "M", "m"), + (0x2170, "M", "i"), + (0x2171, "M", "ii"), + (0x2172, "M", "iii"), + (0x2173, "M", "iv"), + (0x2174, "M", "v"), + (0x2175, "M", "vi"), + (0x2176, "M", "vii"), + (0x2177, "M", "viii"), + (0x2178, "M", "ix"), + (0x2179, "M", "x"), + (0x217A, "M", "xi"), + (0x217B, "M", "xii"), + (0x217C, "M", "l"), + ] + + +def _seg_23() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x217D, "M", "c"), + (0x217E, "M", "d"), + (0x217F, "M", "m"), + (0x2180, "V"), + (0x2183, "X"), + (0x2184, "V"), + (0x2189, "M", "0⁄3"), + (0x218A, "V"), + (0x218C, "X"), + (0x2190, "V"), + (0x222C, "M", "∫∫"), + (0x222D, "M", "∫∫∫"), + (0x222E, "V"), + (0x222F, "M", "∮∮"), + (0x2230, "M", "∮∮∮"), + (0x2231, "V"), + (0x2329, "M", "〈"), + (0x232A, "M", "〉"), + (0x232B, "V"), + (0x2427, "X"), + (0x2440, "V"), + (0x244B, "X"), + (0x2460, "M", "1"), + (0x2461, "M", "2"), + (0x2462, "M", "3"), + (0x2463, "M", "4"), + (0x2464, "M", "5"), + (0x2465, "M", "6"), + (0x2466, "M", "7"), + (0x2467, "M", "8"), + (0x2468, "M", "9"), + (0x2469, "M", "10"), + (0x246A, "M", "11"), + (0x246B, "M", "12"), + (0x246C, "M", "13"), + (0x246D, "M", "14"), + (0x246E, "M", "15"), + (0x246F, "M", "16"), + (0x2470, "M", "17"), + (0x2471, "M", "18"), + (0x2472, "M", "19"), + (0x2473, "M", "20"), + (0x2474, "3", "(1)"), + (0x2475, "3", "(2)"), + (0x2476, "3", "(3)"), + (0x2477, "3", "(4)"), + (0x2478, "3", "(5)"), + (0x2479, "3", "(6)"), + (0x247A, "3", "(7)"), + (0x247B, "3", "(8)"), + (0x247C, "3", "(9)"), + (0x247D, "3", "(10)"), + (0x247E, "3", "(11)"), + (0x247F, "3", "(12)"), + (0x2480, "3", "(13)"), + (0x2481, "3", "(14)"), + (0x2482, "3", "(15)"), + (0x2483, "3", "(16)"), + (0x2484, "3", "(17)"), + (0x2485, "3", "(18)"), + (0x2486, "3", "(19)"), + (0x2487, "3", "(20)"), + (0x2488, "X"), + (0x249C, "3", "(a)"), + (0x249D, "3", "(b)"), + (0x249E, "3", "(c)"), + (0x249F, "3", "(d)"), + (0x24A0, "3", "(e)"), + (0x24A1, "3", "(f)"), + (0x24A2, "3", "(g)"), + (0x24A3, "3", "(h)"), + (0x24A4, "3", "(i)"), + (0x24A5, "3", "(j)"), + (0x24A6, "3", "(k)"), + (0x24A7, "3", "(l)"), + (0x24A8, "3", "(m)"), + (0x24A9, "3", "(n)"), + (0x24AA, "3", "(o)"), + (0x24AB, "3", "(p)"), + (0x24AC, "3", "(q)"), + (0x24AD, "3", "(r)"), + (0x24AE, "3", "(s)"), + (0x24AF, "3", "(t)"), + (0x24B0, "3", "(u)"), + (0x24B1, "3", "(v)"), + (0x24B2, "3", "(w)"), + (0x24B3, "3", "(x)"), + (0x24B4, "3", "(y)"), + (0x24B5, "3", "(z)"), + (0x24B6, "M", "a"), + (0x24B7, "M", "b"), + (0x24B8, "M", "c"), + (0x24B9, "M", "d"), + (0x24BA, "M", "e"), + (0x24BB, "M", "f"), + (0x24BC, "M", "g"), + (0x24BD, "M", "h"), + (0x24BE, "M", "i"), + (0x24BF, "M", "j"), + (0x24C0, "M", "k"), + ] + + +def _seg_24() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x24C1, "M", "l"), + (0x24C2, "M", "m"), + (0x24C3, "M", "n"), + (0x24C4, "M", "o"), + (0x24C5, "M", "p"), + (0x24C6, "M", "q"), + (0x24C7, "M", "r"), + (0x24C8, "M", "s"), + (0x24C9, "M", "t"), + (0x24CA, "M", "u"), + (0x24CB, "M", "v"), + (0x24CC, "M", "w"), + (0x24CD, "M", "x"), + (0x24CE, "M", "y"), + (0x24CF, "M", "z"), + (0x24D0, "M", "a"), + (0x24D1, "M", "b"), + (0x24D2, "M", "c"), + (0x24D3, "M", "d"), + (0x24D4, "M", "e"), + (0x24D5, "M", "f"), + (0x24D6, "M", "g"), + (0x24D7, "M", "h"), + (0x24D8, "M", "i"), + (0x24D9, "M", "j"), + (0x24DA, "M", "k"), + (0x24DB, "M", "l"), + (0x24DC, "M", "m"), + (0x24DD, "M", "n"), + (0x24DE, "M", "o"), + (0x24DF, "M", "p"), + (0x24E0, "M", "q"), + (0x24E1, "M", "r"), + (0x24E2, "M", "s"), + (0x24E3, "M", "t"), + (0x24E4, "M", "u"), + (0x24E5, "M", "v"), + (0x24E6, "M", "w"), + (0x24E7, "M", "x"), + (0x24E8, "M", "y"), + (0x24E9, "M", "z"), + (0x24EA, "M", "0"), + (0x24EB, "V"), + (0x2A0C, "M", "∫∫∫∫"), + (0x2A0D, "V"), + (0x2A74, "3", "::="), + (0x2A75, "3", "=="), + (0x2A76, "3", "==="), + (0x2A77, "V"), + (0x2ADC, "M", "⫝̸"), + (0x2ADD, "V"), + (0x2B74, "X"), + (0x2B76, "V"), + (0x2B96, "X"), + (0x2B97, "V"), + (0x2C00, "M", "ⰰ"), + (0x2C01, "M", "ⰱ"), + (0x2C02, "M", "ⰲ"), + (0x2C03, "M", "ⰳ"), + (0x2C04, "M", "ⰴ"), + (0x2C05, "M", "ⰵ"), + (0x2C06, "M", "ⰶ"), + (0x2C07, "M", "ⰷ"), + (0x2C08, "M", "ⰸ"), + (0x2C09, "M", "ⰹ"), + (0x2C0A, "M", "ⰺ"), + (0x2C0B, "M", "ⰻ"), + (0x2C0C, "M", "ⰼ"), + (0x2C0D, "M", "ⰽ"), + (0x2C0E, "M", "ⰾ"), + (0x2C0F, "M", "ⰿ"), + (0x2C10, "M", "ⱀ"), + (0x2C11, "M", "ⱁ"), + (0x2C12, "M", "ⱂ"), + (0x2C13, "M", "ⱃ"), + (0x2C14, "M", "ⱄ"), + (0x2C15, "M", "ⱅ"), + (0x2C16, "M", "ⱆ"), + (0x2C17, "M", "ⱇ"), + (0x2C18, "M", "ⱈ"), + (0x2C19, "M", "ⱉ"), + (0x2C1A, "M", "ⱊ"), + (0x2C1B, "M", "ⱋ"), + (0x2C1C, "M", "ⱌ"), + (0x2C1D, "M", "ⱍ"), + (0x2C1E, "M", "ⱎ"), + (0x2C1F, "M", "ⱏ"), + (0x2C20, "M", "ⱐ"), + (0x2C21, "M", "ⱑ"), + (0x2C22, "M", "ⱒ"), + (0x2C23, "M", "ⱓ"), + (0x2C24, "M", "ⱔ"), + (0x2C25, "M", "ⱕ"), + (0x2C26, "M", "ⱖ"), + (0x2C27, "M", "ⱗ"), + (0x2C28, "M", "ⱘ"), + (0x2C29, "M", "ⱙ"), + (0x2C2A, "M", "ⱚ"), + (0x2C2B, "M", "ⱛ"), + (0x2C2C, "M", "ⱜ"), + ] + + +def _seg_25() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2C2D, "M", "ⱝ"), + (0x2C2E, "M", "ⱞ"), + (0x2C2F, "M", "ⱟ"), + (0x2C30, "V"), + (0x2C60, "M", "ⱡ"), + (0x2C61, "V"), + (0x2C62, "M", "ɫ"), + (0x2C63, "M", "ᵽ"), + (0x2C64, "M", "ɽ"), + (0x2C65, "V"), + (0x2C67, "M", "ⱨ"), + (0x2C68, "V"), + (0x2C69, "M", "ⱪ"), + (0x2C6A, "V"), + (0x2C6B, "M", "ⱬ"), + (0x2C6C, "V"), + (0x2C6D, "M", "ɑ"), + (0x2C6E, "M", "ɱ"), + (0x2C6F, "M", "ɐ"), + (0x2C70, "M", "ɒ"), + (0x2C71, "V"), + (0x2C72, "M", "ⱳ"), + (0x2C73, "V"), + (0x2C75, "M", "ⱶ"), + (0x2C76, "V"), + (0x2C7C, "M", "j"), + (0x2C7D, "M", "v"), + (0x2C7E, "M", "ȿ"), + (0x2C7F, "M", "ɀ"), + (0x2C80, "M", "ⲁ"), + (0x2C81, "V"), + (0x2C82, "M", "ⲃ"), + (0x2C83, "V"), + (0x2C84, "M", "ⲅ"), + (0x2C85, "V"), + (0x2C86, "M", "ⲇ"), + (0x2C87, "V"), + (0x2C88, "M", "ⲉ"), + (0x2C89, "V"), + (0x2C8A, "M", "ⲋ"), + (0x2C8B, "V"), + (0x2C8C, "M", "ⲍ"), + (0x2C8D, "V"), + (0x2C8E, "M", "ⲏ"), + (0x2C8F, "V"), + (0x2C90, "M", "ⲑ"), + (0x2C91, "V"), + (0x2C92, "M", "ⲓ"), + (0x2C93, "V"), + (0x2C94, "M", "ⲕ"), + (0x2C95, "V"), + (0x2C96, "M", "ⲗ"), + (0x2C97, "V"), + (0x2C98, "M", "ⲙ"), + (0x2C99, "V"), + (0x2C9A, "M", "ⲛ"), + (0x2C9B, "V"), + (0x2C9C, "M", "ⲝ"), + (0x2C9D, "V"), + (0x2C9E, "M", "ⲟ"), + (0x2C9F, "V"), + (0x2CA0, "M", "ⲡ"), + (0x2CA1, "V"), + (0x2CA2, "M", "ⲣ"), + (0x2CA3, "V"), + (0x2CA4, "M", "ⲥ"), + (0x2CA5, "V"), + (0x2CA6, "M", "ⲧ"), + (0x2CA7, "V"), + (0x2CA8, "M", "ⲩ"), + (0x2CA9, "V"), + (0x2CAA, "M", "ⲫ"), + (0x2CAB, "V"), + (0x2CAC, "M", "ⲭ"), + (0x2CAD, "V"), + (0x2CAE, "M", "ⲯ"), + (0x2CAF, "V"), + (0x2CB0, "M", "ⲱ"), + (0x2CB1, "V"), + (0x2CB2, "M", "ⲳ"), + (0x2CB3, "V"), + (0x2CB4, "M", "ⲵ"), + (0x2CB5, "V"), + (0x2CB6, "M", "ⲷ"), + (0x2CB7, "V"), + (0x2CB8, "M", "ⲹ"), + (0x2CB9, "V"), + (0x2CBA, "M", "ⲻ"), + (0x2CBB, "V"), + (0x2CBC, "M", "ⲽ"), + (0x2CBD, "V"), + (0x2CBE, "M", "ⲿ"), + (0x2CBF, "V"), + (0x2CC0, "M", "ⳁ"), + (0x2CC1, "V"), + (0x2CC2, "M", "ⳃ"), + (0x2CC3, "V"), + (0x2CC4, "M", "ⳅ"), + (0x2CC5, "V"), + (0x2CC6, "M", "ⳇ"), + ] + + +def _seg_26() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2CC7, "V"), + (0x2CC8, "M", "ⳉ"), + (0x2CC9, "V"), + (0x2CCA, "M", "ⳋ"), + (0x2CCB, "V"), + (0x2CCC, "M", "ⳍ"), + (0x2CCD, "V"), + (0x2CCE, "M", "ⳏ"), + (0x2CCF, "V"), + (0x2CD0, "M", "ⳑ"), + (0x2CD1, "V"), + (0x2CD2, "M", "ⳓ"), + (0x2CD3, "V"), + (0x2CD4, "M", "ⳕ"), + (0x2CD5, "V"), + (0x2CD6, "M", "ⳗ"), + (0x2CD7, "V"), + (0x2CD8, "M", "ⳙ"), + (0x2CD9, "V"), + (0x2CDA, "M", "ⳛ"), + (0x2CDB, "V"), + (0x2CDC, "M", "ⳝ"), + (0x2CDD, "V"), + (0x2CDE, "M", "ⳟ"), + (0x2CDF, "V"), + (0x2CE0, "M", "ⳡ"), + (0x2CE1, "V"), + (0x2CE2, "M", "ⳣ"), + (0x2CE3, "V"), + (0x2CEB, "M", "ⳬ"), + (0x2CEC, "V"), + (0x2CED, "M", "ⳮ"), + (0x2CEE, "V"), + (0x2CF2, "M", "ⳳ"), + (0x2CF3, "V"), + (0x2CF4, "X"), + (0x2CF9, "V"), + (0x2D26, "X"), + (0x2D27, "V"), + (0x2D28, "X"), + (0x2D2D, "V"), + (0x2D2E, "X"), + (0x2D30, "V"), + (0x2D68, "X"), + (0x2D6F, "M", "ⵡ"), + (0x2D70, "V"), + (0x2D71, "X"), + (0x2D7F, "V"), + (0x2D97, "X"), + (0x2DA0, "V"), + (0x2DA7, "X"), + (0x2DA8, "V"), + (0x2DAF, "X"), + (0x2DB0, "V"), + (0x2DB7, "X"), + (0x2DB8, "V"), + (0x2DBF, "X"), + (0x2DC0, "V"), + (0x2DC7, "X"), + (0x2DC8, "V"), + (0x2DCF, "X"), + (0x2DD0, "V"), + (0x2DD7, "X"), + (0x2DD8, "V"), + (0x2DDF, "X"), + (0x2DE0, "V"), + (0x2E5E, "X"), + (0x2E80, "V"), + (0x2E9A, "X"), + (0x2E9B, "V"), + (0x2E9F, "M", "母"), + (0x2EA0, "V"), + (0x2EF3, "M", "龟"), + (0x2EF4, "X"), + (0x2F00, "M", "一"), + (0x2F01, "M", "丨"), + (0x2F02, "M", "丶"), + (0x2F03, "M", "丿"), + (0x2F04, "M", "乙"), + (0x2F05, "M", "亅"), + (0x2F06, "M", "二"), + (0x2F07, "M", "亠"), + (0x2F08, "M", "人"), + (0x2F09, "M", "儿"), + (0x2F0A, "M", "入"), + (0x2F0B, "M", "八"), + (0x2F0C, "M", "冂"), + (0x2F0D, "M", "冖"), + (0x2F0E, "M", "冫"), + (0x2F0F, "M", "几"), + (0x2F10, "M", "凵"), + (0x2F11, "M", "刀"), + (0x2F12, "M", "力"), + (0x2F13, "M", "勹"), + (0x2F14, "M", "匕"), + (0x2F15, "M", "匚"), + (0x2F16, "M", "匸"), + (0x2F17, "M", "十"), + (0x2F18, "M", "卜"), + (0x2F19, "M", "卩"), + ] + + +def _seg_27() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2F1A, "M", "厂"), + (0x2F1B, "M", "厶"), + (0x2F1C, "M", "又"), + (0x2F1D, "M", "口"), + (0x2F1E, "M", "囗"), + (0x2F1F, "M", "土"), + (0x2F20, "M", "士"), + (0x2F21, "M", "夂"), + (0x2F22, "M", "夊"), + (0x2F23, "M", "夕"), + (0x2F24, "M", "大"), + (0x2F25, "M", "女"), + (0x2F26, "M", "子"), + (0x2F27, "M", "宀"), + (0x2F28, "M", "寸"), + (0x2F29, "M", "小"), + (0x2F2A, "M", "尢"), + (0x2F2B, "M", "尸"), + (0x2F2C, "M", "屮"), + (0x2F2D, "M", "山"), + (0x2F2E, "M", "巛"), + (0x2F2F, "M", "工"), + (0x2F30, "M", "己"), + (0x2F31, "M", "巾"), + (0x2F32, "M", "干"), + (0x2F33, "M", "幺"), + (0x2F34, "M", "广"), + (0x2F35, "M", "廴"), + (0x2F36, "M", "廾"), + (0x2F37, "M", "弋"), + (0x2F38, "M", "弓"), + (0x2F39, "M", "彐"), + (0x2F3A, "M", "彡"), + (0x2F3B, "M", "彳"), + (0x2F3C, "M", "心"), + (0x2F3D, "M", "戈"), + (0x2F3E, "M", "戶"), + (0x2F3F, "M", "手"), + (0x2F40, "M", "支"), + (0x2F41, "M", "攴"), + (0x2F42, "M", "文"), + (0x2F43, "M", "斗"), + (0x2F44, "M", "斤"), + (0x2F45, "M", "方"), + (0x2F46, "M", "无"), + (0x2F47, "M", "日"), + (0x2F48, "M", "曰"), + (0x2F49, "M", "月"), + (0x2F4A, "M", "木"), + (0x2F4B, "M", "欠"), + (0x2F4C, "M", "止"), + (0x2F4D, "M", "歹"), + (0x2F4E, "M", "殳"), + (0x2F4F, "M", "毋"), + (0x2F50, "M", "比"), + (0x2F51, "M", "毛"), + (0x2F52, "M", "氏"), + (0x2F53, "M", "气"), + (0x2F54, "M", "水"), + (0x2F55, "M", "火"), + (0x2F56, "M", "爪"), + (0x2F57, "M", "父"), + (0x2F58, "M", "爻"), + (0x2F59, "M", "爿"), + (0x2F5A, "M", "片"), + (0x2F5B, "M", "牙"), + (0x2F5C, "M", "牛"), + (0x2F5D, "M", "犬"), + (0x2F5E, "M", "玄"), + (0x2F5F, "M", "玉"), + (0x2F60, "M", "瓜"), + (0x2F61, "M", "瓦"), + (0x2F62, "M", "甘"), + (0x2F63, "M", "生"), + (0x2F64, "M", "用"), + (0x2F65, "M", "田"), + (0x2F66, "M", "疋"), + (0x2F67, "M", "疒"), + (0x2F68, "M", "癶"), + (0x2F69, "M", "白"), + (0x2F6A, "M", "皮"), + (0x2F6B, "M", "皿"), + (0x2F6C, "M", "目"), + (0x2F6D, "M", "矛"), + (0x2F6E, "M", "矢"), + (0x2F6F, "M", "石"), + (0x2F70, "M", "示"), + (0x2F71, "M", "禸"), + (0x2F72, "M", "禾"), + (0x2F73, "M", "穴"), + (0x2F74, "M", "立"), + (0x2F75, "M", "竹"), + (0x2F76, "M", "米"), + (0x2F77, "M", "糸"), + (0x2F78, "M", "缶"), + (0x2F79, "M", "网"), + (0x2F7A, "M", "羊"), + (0x2F7B, "M", "羽"), + (0x2F7C, "M", "老"), + (0x2F7D, "M", "而"), + ] + + +def _seg_28() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2F7E, "M", "耒"), + (0x2F7F, "M", "耳"), + (0x2F80, "M", "聿"), + (0x2F81, "M", "肉"), + (0x2F82, "M", "臣"), + (0x2F83, "M", "自"), + (0x2F84, "M", "至"), + (0x2F85, "M", "臼"), + (0x2F86, "M", "舌"), + (0x2F87, "M", "舛"), + (0x2F88, "M", "舟"), + (0x2F89, "M", "艮"), + (0x2F8A, "M", "色"), + (0x2F8B, "M", "艸"), + (0x2F8C, "M", "虍"), + (0x2F8D, "M", "虫"), + (0x2F8E, "M", "血"), + (0x2F8F, "M", "行"), + (0x2F90, "M", "衣"), + (0x2F91, "M", "襾"), + (0x2F92, "M", "見"), + (0x2F93, "M", "角"), + (0x2F94, "M", "言"), + (0x2F95, "M", "谷"), + (0x2F96, "M", "豆"), + (0x2F97, "M", "豕"), + (0x2F98, "M", "豸"), + (0x2F99, "M", "貝"), + (0x2F9A, "M", "赤"), + (0x2F9B, "M", "走"), + (0x2F9C, "M", "足"), + (0x2F9D, "M", "身"), + (0x2F9E, "M", "車"), + (0x2F9F, "M", "辛"), + (0x2FA0, "M", "辰"), + (0x2FA1, "M", "辵"), + (0x2FA2, "M", "邑"), + (0x2FA3, "M", "酉"), + (0x2FA4, "M", "釆"), + (0x2FA5, "M", "里"), + (0x2FA6, "M", "金"), + (0x2FA7, "M", "長"), + (0x2FA8, "M", "門"), + (0x2FA9, "M", "阜"), + (0x2FAA, "M", "隶"), + (0x2FAB, "M", "隹"), + (0x2FAC, "M", "雨"), + (0x2FAD, "M", "靑"), + (0x2FAE, "M", "非"), + (0x2FAF, "M", "面"), + (0x2FB0, "M", "革"), + (0x2FB1, "M", "韋"), + (0x2FB2, "M", "韭"), + (0x2FB3, "M", "音"), + (0x2FB4, "M", "頁"), + (0x2FB5, "M", "風"), + (0x2FB6, "M", "飛"), + (0x2FB7, "M", "食"), + (0x2FB8, "M", "首"), + (0x2FB9, "M", "香"), + (0x2FBA, "M", "馬"), + (0x2FBB, "M", "骨"), + (0x2FBC, "M", "高"), + (0x2FBD, "M", "髟"), + (0x2FBE, "M", "鬥"), + (0x2FBF, "M", "鬯"), + (0x2FC0, "M", "鬲"), + (0x2FC1, "M", "鬼"), + (0x2FC2, "M", "魚"), + (0x2FC3, "M", "鳥"), + (0x2FC4, "M", "鹵"), + (0x2FC5, "M", "鹿"), + (0x2FC6, "M", "麥"), + (0x2FC7, "M", "麻"), + (0x2FC8, "M", "黃"), + (0x2FC9, "M", "黍"), + (0x2FCA, "M", "黑"), + (0x2FCB, "M", "黹"), + (0x2FCC, "M", "黽"), + (0x2FCD, "M", "鼎"), + (0x2FCE, "M", "鼓"), + (0x2FCF, "M", "鼠"), + (0x2FD0, "M", "鼻"), + (0x2FD1, "M", "齊"), + (0x2FD2, "M", "齒"), + (0x2FD3, "M", "龍"), + (0x2FD4, "M", "龜"), + (0x2FD5, "M", "龠"), + (0x2FD6, "X"), + (0x3000, "3", " "), + (0x3001, "V"), + (0x3002, "M", "."), + (0x3003, "V"), + (0x3036, "M", "〒"), + (0x3037, "V"), + (0x3038, "M", "十"), + (0x3039, "M", "卄"), + (0x303A, "M", "卅"), + (0x303B, "V"), + (0x3040, "X"), + ] + + +def _seg_29() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x3041, "V"), + (0x3097, "X"), + (0x3099, "V"), + (0x309B, "3", " ゙"), + (0x309C, "3", " ゚"), + (0x309D, "V"), + (0x309F, "M", "より"), + (0x30A0, "V"), + (0x30FF, "M", "コト"), + (0x3100, "X"), + (0x3105, "V"), + (0x3130, "X"), + (0x3131, "M", "ᄀ"), + (0x3132, "M", "ᄁ"), + (0x3133, "M", "ᆪ"), + (0x3134, "M", "ᄂ"), + (0x3135, "M", "ᆬ"), + (0x3136, "M", "ᆭ"), + (0x3137, "M", "ᄃ"), + (0x3138, "M", "ᄄ"), + (0x3139, "M", "ᄅ"), + (0x313A, "M", "ᆰ"), + (0x313B, "M", "ᆱ"), + (0x313C, "M", "ᆲ"), + (0x313D, "M", "ᆳ"), + (0x313E, "M", "ᆴ"), + (0x313F, "M", "ᆵ"), + (0x3140, "M", "ᄚ"), + (0x3141, "M", "ᄆ"), + (0x3142, "M", "ᄇ"), + (0x3143, "M", "ᄈ"), + (0x3144, "M", "ᄡ"), + (0x3145, "M", "ᄉ"), + (0x3146, "M", "ᄊ"), + (0x3147, "M", "ᄋ"), + (0x3148, "M", "ᄌ"), + (0x3149, "M", "ᄍ"), + (0x314A, "M", "ᄎ"), + (0x314B, "M", "ᄏ"), + (0x314C, "M", "ᄐ"), + (0x314D, "M", "ᄑ"), + (0x314E, "M", "ᄒ"), + (0x314F, "M", "ᅡ"), + (0x3150, "M", "ᅢ"), + (0x3151, "M", "ᅣ"), + (0x3152, "M", "ᅤ"), + (0x3153, "M", "ᅥ"), + (0x3154, "M", "ᅦ"), + (0x3155, "M", "ᅧ"), + (0x3156, "M", "ᅨ"), + (0x3157, "M", "ᅩ"), + (0x3158, "M", "ᅪ"), + (0x3159, "M", "ᅫ"), + (0x315A, "M", "ᅬ"), + (0x315B, "M", "ᅭ"), + (0x315C, "M", "ᅮ"), + (0x315D, "M", "ᅯ"), + (0x315E, "M", "ᅰ"), + (0x315F, "M", "ᅱ"), + (0x3160, "M", "ᅲ"), + (0x3161, "M", "ᅳ"), + (0x3162, "M", "ᅴ"), + (0x3163, "M", "ᅵ"), + (0x3164, "X"), + (0x3165, "M", "ᄔ"), + (0x3166, "M", "ᄕ"), + (0x3167, "M", "ᇇ"), + (0x3168, "M", "ᇈ"), + (0x3169, "M", "ᇌ"), + (0x316A, "M", "ᇎ"), + (0x316B, "M", "ᇓ"), + (0x316C, "M", "ᇗ"), + (0x316D, "M", "ᇙ"), + (0x316E, "M", "ᄜ"), + (0x316F, "M", "ᇝ"), + (0x3170, "M", "ᇟ"), + (0x3171, "M", "ᄝ"), + (0x3172, "M", "ᄞ"), + (0x3173, "M", "ᄠ"), + (0x3174, "M", "ᄢ"), + (0x3175, "M", "ᄣ"), + (0x3176, "M", "ᄧ"), + (0x3177, "M", "ᄩ"), + (0x3178, "M", "ᄫ"), + (0x3179, "M", "ᄬ"), + (0x317A, "M", "ᄭ"), + (0x317B, "M", "ᄮ"), + (0x317C, "M", "ᄯ"), + (0x317D, "M", "ᄲ"), + (0x317E, "M", "ᄶ"), + (0x317F, "M", "ᅀ"), + (0x3180, "M", "ᅇ"), + (0x3181, "M", "ᅌ"), + (0x3182, "M", "ᇱ"), + (0x3183, "M", "ᇲ"), + (0x3184, "M", "ᅗ"), + (0x3185, "M", "ᅘ"), + (0x3186, "M", "ᅙ"), + (0x3187, "M", "ᆄ"), + (0x3188, "M", "ᆅ"), + ] + + +def _seg_30() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x3189, "M", "ᆈ"), + (0x318A, "M", "ᆑ"), + (0x318B, "M", "ᆒ"), + (0x318C, "M", "ᆔ"), + (0x318D, "M", "ᆞ"), + (0x318E, "M", "ᆡ"), + (0x318F, "X"), + (0x3190, "V"), + (0x3192, "M", "一"), + (0x3193, "M", "二"), + (0x3194, "M", "三"), + (0x3195, "M", "四"), + (0x3196, "M", "上"), + (0x3197, "M", "中"), + (0x3198, "M", "下"), + (0x3199, "M", "甲"), + (0x319A, "M", "乙"), + (0x319B, "M", "丙"), + (0x319C, "M", "丁"), + (0x319D, "M", "天"), + (0x319E, "M", "地"), + (0x319F, "M", "人"), + (0x31A0, "V"), + (0x31E4, "X"), + (0x31F0, "V"), + (0x3200, "3", "(ᄀ)"), + (0x3201, "3", "(ᄂ)"), + (0x3202, "3", "(ᄃ)"), + (0x3203, "3", "(ᄅ)"), + (0x3204, "3", "(ᄆ)"), + (0x3205, "3", "(ᄇ)"), + (0x3206, "3", "(ᄉ)"), + (0x3207, "3", "(ᄋ)"), + (0x3208, "3", "(ᄌ)"), + (0x3209, "3", "(ᄎ)"), + (0x320A, "3", "(ᄏ)"), + (0x320B, "3", "(ᄐ)"), + (0x320C, "3", "(ᄑ)"), + (0x320D, "3", "(ᄒ)"), + (0x320E, "3", "(가)"), + (0x320F, "3", "(나)"), + (0x3210, "3", "(다)"), + (0x3211, "3", "(라)"), + (0x3212, "3", "(마)"), + (0x3213, "3", "(바)"), + (0x3214, "3", "(사)"), + (0x3215, "3", "(아)"), + (0x3216, "3", "(자)"), + (0x3217, "3", "(차)"), + (0x3218, "3", "(카)"), + (0x3219, "3", "(타)"), + (0x321A, "3", "(파)"), + (0x321B, "3", "(하)"), + (0x321C, "3", "(주)"), + (0x321D, "3", "(오전)"), + (0x321E, "3", "(오후)"), + (0x321F, "X"), + (0x3220, "3", "(一)"), + (0x3221, "3", "(二)"), + (0x3222, "3", "(三)"), + (0x3223, "3", "(四)"), + (0x3224, "3", "(五)"), + (0x3225, "3", "(六)"), + (0x3226, "3", "(七)"), + (0x3227, "3", "(八)"), + (0x3228, "3", "(九)"), + (0x3229, "3", "(十)"), + (0x322A, "3", "(月)"), + (0x322B, "3", "(火)"), + (0x322C, "3", "(水)"), + (0x322D, "3", "(木)"), + (0x322E, "3", "(金)"), + (0x322F, "3", "(土)"), + (0x3230, "3", "(日)"), + (0x3231, "3", "(株)"), + (0x3232, "3", "(有)"), + (0x3233, "3", "(社)"), + (0x3234, "3", "(名)"), + (0x3235, "3", "(特)"), + (0x3236, "3", "(財)"), + (0x3237, "3", "(祝)"), + (0x3238, "3", "(労)"), + (0x3239, "3", "(代)"), + (0x323A, "3", "(呼)"), + (0x323B, "3", "(学)"), + (0x323C, "3", "(監)"), + (0x323D, "3", "(企)"), + (0x323E, "3", "(資)"), + (0x323F, "3", "(協)"), + (0x3240, "3", "(祭)"), + (0x3241, "3", "(休)"), + (0x3242, "3", "(自)"), + (0x3243, "3", "(至)"), + (0x3244, "M", "問"), + (0x3245, "M", "幼"), + (0x3246, "M", "文"), + (0x3247, "M", "箏"), + (0x3248, "V"), + (0x3250, "M", "pte"), + (0x3251, "M", "21"), + ] + + +def _seg_31() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x3252, "M", "22"), + (0x3253, "M", "23"), + (0x3254, "M", "24"), + (0x3255, "M", "25"), + (0x3256, "M", "26"), + (0x3257, "M", "27"), + (0x3258, "M", "28"), + (0x3259, "M", "29"), + (0x325A, "M", "30"), + (0x325B, "M", "31"), + (0x325C, "M", "32"), + (0x325D, "M", "33"), + (0x325E, "M", "34"), + (0x325F, "M", "35"), + (0x3260, "M", "ᄀ"), + (0x3261, "M", "ᄂ"), + (0x3262, "M", "ᄃ"), + (0x3263, "M", "ᄅ"), + (0x3264, "M", "ᄆ"), + (0x3265, "M", "ᄇ"), + (0x3266, "M", "ᄉ"), + (0x3267, "M", "ᄋ"), + (0x3268, "M", "ᄌ"), + (0x3269, "M", "ᄎ"), + (0x326A, "M", "ᄏ"), + (0x326B, "M", "ᄐ"), + (0x326C, "M", "ᄑ"), + (0x326D, "M", "ᄒ"), + (0x326E, "M", "가"), + (0x326F, "M", "나"), + (0x3270, "M", "다"), + (0x3271, "M", "라"), + (0x3272, "M", "마"), + (0x3273, "M", "바"), + (0x3274, "M", "사"), + (0x3275, "M", "아"), + (0x3276, "M", "자"), + (0x3277, "M", "차"), + (0x3278, "M", "카"), + (0x3279, "M", "타"), + (0x327A, "M", "파"), + (0x327B, "M", "하"), + (0x327C, "M", "참고"), + (0x327D, "M", "주의"), + (0x327E, "M", "우"), + (0x327F, "V"), + (0x3280, "M", "一"), + (0x3281, "M", "二"), + (0x3282, "M", "三"), + (0x3283, "M", "四"), + (0x3284, "M", "五"), + (0x3285, "M", "六"), + (0x3286, "M", "七"), + (0x3287, "M", "八"), + (0x3288, "M", "九"), + (0x3289, "M", "十"), + (0x328A, "M", "月"), + (0x328B, "M", "火"), + (0x328C, "M", "水"), + (0x328D, "M", "木"), + (0x328E, "M", "金"), + (0x328F, "M", "土"), + (0x3290, "M", "日"), + (0x3291, "M", "株"), + (0x3292, "M", "有"), + (0x3293, "M", "社"), + (0x3294, "M", "名"), + (0x3295, "M", "特"), + (0x3296, "M", "財"), + (0x3297, "M", "祝"), + (0x3298, "M", "労"), + (0x3299, "M", "秘"), + (0x329A, "M", "男"), + (0x329B, "M", "女"), + (0x329C, "M", "適"), + (0x329D, "M", "優"), + (0x329E, "M", "印"), + (0x329F, "M", "注"), + (0x32A0, "M", "項"), + (0x32A1, "M", "休"), + (0x32A2, "M", "写"), + (0x32A3, "M", "正"), + (0x32A4, "M", "上"), + (0x32A5, "M", "中"), + (0x32A6, "M", "下"), + (0x32A7, "M", "左"), + (0x32A8, "M", "右"), + (0x32A9, "M", "医"), + (0x32AA, "M", "宗"), + (0x32AB, "M", "学"), + (0x32AC, "M", "監"), + (0x32AD, "M", "企"), + (0x32AE, "M", "資"), + (0x32AF, "M", "協"), + (0x32B0, "M", "夜"), + (0x32B1, "M", "36"), + (0x32B2, "M", "37"), + (0x32B3, "M", "38"), + (0x32B4, "M", "39"), + (0x32B5, "M", "40"), + ] + + +def _seg_32() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x32B6, "M", "41"), + (0x32B7, "M", "42"), + (0x32B8, "M", "43"), + (0x32B9, "M", "44"), + (0x32BA, "M", "45"), + (0x32BB, "M", "46"), + (0x32BC, "M", "47"), + (0x32BD, "M", "48"), + (0x32BE, "M", "49"), + (0x32BF, "M", "50"), + (0x32C0, "M", "1月"), + (0x32C1, "M", "2月"), + (0x32C2, "M", "3月"), + (0x32C3, "M", "4月"), + (0x32C4, "M", "5月"), + (0x32C5, "M", "6月"), + (0x32C6, "M", "7月"), + (0x32C7, "M", "8月"), + (0x32C8, "M", "9月"), + (0x32C9, "M", "10月"), + (0x32CA, "M", "11月"), + (0x32CB, "M", "12月"), + (0x32CC, "M", "hg"), + (0x32CD, "M", "erg"), + (0x32CE, "M", "ev"), + (0x32CF, "M", "ltd"), + (0x32D0, "M", "ア"), + (0x32D1, "M", "イ"), + (0x32D2, "M", "ウ"), + (0x32D3, "M", "エ"), + (0x32D4, "M", "オ"), + (0x32D5, "M", "カ"), + (0x32D6, "M", "キ"), + (0x32D7, "M", "ク"), + (0x32D8, "M", "ケ"), + (0x32D9, "M", "コ"), + (0x32DA, "M", "サ"), + (0x32DB, "M", "シ"), + (0x32DC, "M", "ス"), + (0x32DD, "M", "セ"), + (0x32DE, "M", "ソ"), + (0x32DF, "M", "タ"), + (0x32E0, "M", "チ"), + (0x32E1, "M", "ツ"), + (0x32E2, "M", "テ"), + (0x32E3, "M", "ト"), + (0x32E4, "M", "ナ"), + (0x32E5, "M", "ニ"), + (0x32E6, "M", "ヌ"), + (0x32E7, "M", "ネ"), + (0x32E8, "M", "ノ"), + (0x32E9, "M", "ハ"), + (0x32EA, "M", "ヒ"), + (0x32EB, "M", "フ"), + (0x32EC, "M", "ヘ"), + (0x32ED, "M", "ホ"), + (0x32EE, "M", "マ"), + (0x32EF, "M", "ミ"), + (0x32F0, "M", "ム"), + (0x32F1, "M", "メ"), + (0x32F2, "M", "モ"), + (0x32F3, "M", "ヤ"), + (0x32F4, "M", "ユ"), + (0x32F5, "M", "ヨ"), + (0x32F6, "M", "ラ"), + (0x32F7, "M", "リ"), + (0x32F8, "M", "ル"), + (0x32F9, "M", "レ"), + (0x32FA, "M", "ロ"), + (0x32FB, "M", "ワ"), + (0x32FC, "M", "ヰ"), + (0x32FD, "M", "ヱ"), + (0x32FE, "M", "ヲ"), + (0x32FF, "M", "令和"), + (0x3300, "M", "アパート"), + (0x3301, "M", "アルファ"), + (0x3302, "M", "アンペア"), + (0x3303, "M", "アール"), + (0x3304, "M", "イニング"), + (0x3305, "M", "インチ"), + (0x3306, "M", "ウォン"), + (0x3307, "M", "エスクード"), + (0x3308, "M", "エーカー"), + (0x3309, "M", "オンス"), + (0x330A, "M", "オーム"), + (0x330B, "M", "カイリ"), + (0x330C, "M", "カラット"), + (0x330D, "M", "カロリー"), + (0x330E, "M", "ガロン"), + (0x330F, "M", "ガンマ"), + (0x3310, "M", "ギガ"), + (0x3311, "M", "ギニー"), + (0x3312, "M", "キュリー"), + (0x3313, "M", "ギルダー"), + (0x3314, "M", "キロ"), + (0x3315, "M", "キログラム"), + (0x3316, "M", "キロメートル"), + (0x3317, "M", "キロワット"), + (0x3318, "M", "グラム"), + (0x3319, "M", "グラムトン"), + ] + + +def _seg_33() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x331A, "M", "クルゼイロ"), + (0x331B, "M", "クローネ"), + (0x331C, "M", "ケース"), + (0x331D, "M", "コルナ"), + (0x331E, "M", "コーポ"), + (0x331F, "M", "サイクル"), + (0x3320, "M", "サンチーム"), + (0x3321, "M", "シリング"), + (0x3322, "M", "センチ"), + (0x3323, "M", "セント"), + (0x3324, "M", "ダース"), + (0x3325, "M", "デシ"), + (0x3326, "M", "ドル"), + (0x3327, "M", "トン"), + (0x3328, "M", "ナノ"), + (0x3329, "M", "ノット"), + (0x332A, "M", "ハイツ"), + (0x332B, "M", "パーセント"), + (0x332C, "M", "パーツ"), + (0x332D, "M", "バーレル"), + (0x332E, "M", "ピアストル"), + (0x332F, "M", "ピクル"), + (0x3330, "M", "ピコ"), + (0x3331, "M", "ビル"), + (0x3332, "M", "ファラッド"), + (0x3333, "M", "フィート"), + (0x3334, "M", "ブッシェル"), + (0x3335, "M", "フラン"), + (0x3336, "M", "ヘクタール"), + (0x3337, "M", "ペソ"), + (0x3338, "M", "ペニヒ"), + (0x3339, "M", "ヘルツ"), + (0x333A, "M", "ペンス"), + (0x333B, "M", "ページ"), + (0x333C, "M", "ベータ"), + (0x333D, "M", "ポイント"), + (0x333E, "M", "ボルト"), + (0x333F, "M", "ホン"), + (0x3340, "M", "ポンド"), + (0x3341, "M", "ホール"), + (0x3342, "M", "ホーン"), + (0x3343, "M", "マイクロ"), + (0x3344, "M", "マイル"), + (0x3345, "M", "マッハ"), + (0x3346, "M", "マルク"), + (0x3347, "M", "マンション"), + (0x3348, "M", "ミクロン"), + (0x3349, "M", "ミリ"), + (0x334A, "M", "ミリバール"), + (0x334B, "M", "メガ"), + (0x334C, "M", "メガトン"), + (0x334D, "M", "メートル"), + (0x334E, "M", "ヤード"), + (0x334F, "M", "ヤール"), + (0x3350, "M", "ユアン"), + (0x3351, "M", "リットル"), + (0x3352, "M", "リラ"), + (0x3353, "M", "ルピー"), + (0x3354, "M", "ルーブル"), + (0x3355, "M", "レム"), + (0x3356, "M", "レントゲン"), + (0x3357, "M", "ワット"), + (0x3358, "M", "0点"), + (0x3359, "M", "1点"), + (0x335A, "M", "2点"), + (0x335B, "M", "3点"), + (0x335C, "M", "4点"), + (0x335D, "M", "5点"), + (0x335E, "M", "6点"), + (0x335F, "M", "7点"), + (0x3360, "M", "8点"), + (0x3361, "M", "9点"), + (0x3362, "M", "10点"), + (0x3363, "M", "11点"), + (0x3364, "M", "12点"), + (0x3365, "M", "13点"), + (0x3366, "M", "14点"), + (0x3367, "M", "15点"), + (0x3368, "M", "16点"), + (0x3369, "M", "17点"), + (0x336A, "M", "18点"), + (0x336B, "M", "19点"), + (0x336C, "M", "20点"), + (0x336D, "M", "21点"), + (0x336E, "M", "22点"), + (0x336F, "M", "23点"), + (0x3370, "M", "24点"), + (0x3371, "M", "hpa"), + (0x3372, "M", "da"), + (0x3373, "M", "au"), + (0x3374, "M", "bar"), + (0x3375, "M", "ov"), + (0x3376, "M", "pc"), + (0x3377, "M", "dm"), + (0x3378, "M", "dm2"), + (0x3379, "M", "dm3"), + (0x337A, "M", "iu"), + (0x337B, "M", "平成"), + (0x337C, "M", "昭和"), + (0x337D, "M", "大正"), + ] + + +def _seg_34() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x337E, "M", "明治"), + (0x337F, "M", "株式会社"), + (0x3380, "M", "pa"), + (0x3381, "M", "na"), + (0x3382, "M", "μa"), + (0x3383, "M", "ma"), + (0x3384, "M", "ka"), + (0x3385, "M", "kb"), + (0x3386, "M", "mb"), + (0x3387, "M", "gb"), + (0x3388, "M", "cal"), + (0x3389, "M", "kcal"), + (0x338A, "M", "pf"), + (0x338B, "M", "nf"), + (0x338C, "M", "μf"), + (0x338D, "M", "μg"), + (0x338E, "M", "mg"), + (0x338F, "M", "kg"), + (0x3390, "M", "hz"), + (0x3391, "M", "khz"), + (0x3392, "M", "mhz"), + (0x3393, "M", "ghz"), + (0x3394, "M", "thz"), + (0x3395, "M", "μl"), + (0x3396, "M", "ml"), + (0x3397, "M", "dl"), + (0x3398, "M", "kl"), + (0x3399, "M", "fm"), + (0x339A, "M", "nm"), + (0x339B, "M", "μm"), + (0x339C, "M", "mm"), + (0x339D, "M", "cm"), + (0x339E, "M", "km"), + (0x339F, "M", "mm2"), + (0x33A0, "M", "cm2"), + (0x33A1, "M", "m2"), + (0x33A2, "M", "km2"), + (0x33A3, "M", "mm3"), + (0x33A4, "M", "cm3"), + (0x33A5, "M", "m3"), + (0x33A6, "M", "km3"), + (0x33A7, "M", "m∕s"), + (0x33A8, "M", "m∕s2"), + (0x33A9, "M", "pa"), + (0x33AA, "M", "kpa"), + (0x33AB, "M", "mpa"), + (0x33AC, "M", "gpa"), + (0x33AD, "M", "rad"), + (0x33AE, "M", "rad∕s"), + (0x33AF, "M", "rad∕s2"), + (0x33B0, "M", "ps"), + (0x33B1, "M", "ns"), + (0x33B2, "M", "μs"), + (0x33B3, "M", "ms"), + (0x33B4, "M", "pv"), + (0x33B5, "M", "nv"), + (0x33B6, "M", "μv"), + (0x33B7, "M", "mv"), + (0x33B8, "M", "kv"), + (0x33B9, "M", "mv"), + (0x33BA, "M", "pw"), + (0x33BB, "M", "nw"), + (0x33BC, "M", "μw"), + (0x33BD, "M", "mw"), + (0x33BE, "M", "kw"), + (0x33BF, "M", "mw"), + (0x33C0, "M", "kω"), + (0x33C1, "M", "mω"), + (0x33C2, "X"), + (0x33C3, "M", "bq"), + (0x33C4, "M", "cc"), + (0x33C5, "M", "cd"), + (0x33C6, "M", "c∕kg"), + (0x33C7, "X"), + (0x33C8, "M", "db"), + (0x33C9, "M", "gy"), + (0x33CA, "M", "ha"), + (0x33CB, "M", "hp"), + (0x33CC, "M", "in"), + (0x33CD, "M", "kk"), + (0x33CE, "M", "km"), + (0x33CF, "M", "kt"), + (0x33D0, "M", "lm"), + (0x33D1, "M", "ln"), + (0x33D2, "M", "log"), + (0x33D3, "M", "lx"), + (0x33D4, "M", "mb"), + (0x33D5, "M", "mil"), + (0x33D6, "M", "mol"), + (0x33D7, "M", "ph"), + (0x33D8, "X"), + (0x33D9, "M", "ppm"), + (0x33DA, "M", "pr"), + (0x33DB, "M", "sr"), + (0x33DC, "M", "sv"), + (0x33DD, "M", "wb"), + (0x33DE, "M", "v∕m"), + (0x33DF, "M", "a∕m"), + (0x33E0, "M", "1日"), + (0x33E1, "M", "2日"), + ] + + +def _seg_35() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x33E2, "M", "3日"), + (0x33E3, "M", "4日"), + (0x33E4, "M", "5日"), + (0x33E5, "M", "6日"), + (0x33E6, "M", "7日"), + (0x33E7, "M", "8日"), + (0x33E8, "M", "9日"), + (0x33E9, "M", "10日"), + (0x33EA, "M", "11日"), + (0x33EB, "M", "12日"), + (0x33EC, "M", "13日"), + (0x33ED, "M", "14日"), + (0x33EE, "M", "15日"), + (0x33EF, "M", "16日"), + (0x33F0, "M", "17日"), + (0x33F1, "M", "18日"), + (0x33F2, "M", "19日"), + (0x33F3, "M", "20日"), + (0x33F4, "M", "21日"), + (0x33F5, "M", "22日"), + (0x33F6, "M", "23日"), + (0x33F7, "M", "24日"), + (0x33F8, "M", "25日"), + (0x33F9, "M", "26日"), + (0x33FA, "M", "27日"), + (0x33FB, "M", "28日"), + (0x33FC, "M", "29日"), + (0x33FD, "M", "30日"), + (0x33FE, "M", "31日"), + (0x33FF, "M", "gal"), + (0x3400, "V"), + (0xA48D, "X"), + (0xA490, "V"), + (0xA4C7, "X"), + (0xA4D0, "V"), + (0xA62C, "X"), + (0xA640, "M", "ꙁ"), + (0xA641, "V"), + (0xA642, "M", "ꙃ"), + (0xA643, "V"), + (0xA644, "M", "ꙅ"), + (0xA645, "V"), + (0xA646, "M", "ꙇ"), + (0xA647, "V"), + (0xA648, "M", "ꙉ"), + (0xA649, "V"), + (0xA64A, "M", "ꙋ"), + (0xA64B, "V"), + (0xA64C, "M", "ꙍ"), + (0xA64D, "V"), + (0xA64E, "M", "ꙏ"), + (0xA64F, "V"), + (0xA650, "M", "ꙑ"), + (0xA651, "V"), + (0xA652, "M", "ꙓ"), + (0xA653, "V"), + (0xA654, "M", "ꙕ"), + (0xA655, "V"), + (0xA656, "M", "ꙗ"), + (0xA657, "V"), + (0xA658, "M", "ꙙ"), + (0xA659, "V"), + (0xA65A, "M", "ꙛ"), + (0xA65B, "V"), + (0xA65C, "M", "ꙝ"), + (0xA65D, "V"), + (0xA65E, "M", "ꙟ"), + (0xA65F, "V"), + (0xA660, "M", "ꙡ"), + (0xA661, "V"), + (0xA662, "M", "ꙣ"), + (0xA663, "V"), + (0xA664, "M", "ꙥ"), + (0xA665, "V"), + (0xA666, "M", "ꙧ"), + (0xA667, "V"), + (0xA668, "M", "ꙩ"), + (0xA669, "V"), + (0xA66A, "M", "ꙫ"), + (0xA66B, "V"), + (0xA66C, "M", "ꙭ"), + (0xA66D, "V"), + (0xA680, "M", "ꚁ"), + (0xA681, "V"), + (0xA682, "M", "ꚃ"), + (0xA683, "V"), + (0xA684, "M", "ꚅ"), + (0xA685, "V"), + (0xA686, "M", "ꚇ"), + (0xA687, "V"), + (0xA688, "M", "ꚉ"), + (0xA689, "V"), + (0xA68A, "M", "ꚋ"), + (0xA68B, "V"), + (0xA68C, "M", "ꚍ"), + (0xA68D, "V"), + (0xA68E, "M", "ꚏ"), + (0xA68F, "V"), + (0xA690, "M", "ꚑ"), + (0xA691, "V"), + ] + + +def _seg_36() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xA692, "M", "ꚓ"), + (0xA693, "V"), + (0xA694, "M", "ꚕ"), + (0xA695, "V"), + (0xA696, "M", "ꚗ"), + (0xA697, "V"), + (0xA698, "M", "ꚙ"), + (0xA699, "V"), + (0xA69A, "M", "ꚛ"), + (0xA69B, "V"), + (0xA69C, "M", "ъ"), + (0xA69D, "M", "ь"), + (0xA69E, "V"), + (0xA6F8, "X"), + (0xA700, "V"), + (0xA722, "M", "ꜣ"), + (0xA723, "V"), + (0xA724, "M", "ꜥ"), + (0xA725, "V"), + (0xA726, "M", "ꜧ"), + (0xA727, "V"), + (0xA728, "M", "ꜩ"), + (0xA729, "V"), + (0xA72A, "M", "ꜫ"), + (0xA72B, "V"), + (0xA72C, "M", "ꜭ"), + (0xA72D, "V"), + (0xA72E, "M", "ꜯ"), + (0xA72F, "V"), + (0xA732, "M", "ꜳ"), + (0xA733, "V"), + (0xA734, "M", "ꜵ"), + (0xA735, "V"), + (0xA736, "M", "ꜷ"), + (0xA737, "V"), + (0xA738, "M", "ꜹ"), + (0xA739, "V"), + (0xA73A, "M", "ꜻ"), + (0xA73B, "V"), + (0xA73C, "M", "ꜽ"), + (0xA73D, "V"), + (0xA73E, "M", "ꜿ"), + (0xA73F, "V"), + (0xA740, "M", "ꝁ"), + (0xA741, "V"), + (0xA742, "M", "ꝃ"), + (0xA743, "V"), + (0xA744, "M", "ꝅ"), + (0xA745, "V"), + (0xA746, "M", "ꝇ"), + (0xA747, "V"), + (0xA748, "M", "ꝉ"), + (0xA749, "V"), + (0xA74A, "M", "ꝋ"), + (0xA74B, "V"), + (0xA74C, "M", "ꝍ"), + (0xA74D, "V"), + (0xA74E, "M", "ꝏ"), + (0xA74F, "V"), + (0xA750, "M", "ꝑ"), + (0xA751, "V"), + (0xA752, "M", "ꝓ"), + (0xA753, "V"), + (0xA754, "M", "ꝕ"), + (0xA755, "V"), + (0xA756, "M", "ꝗ"), + (0xA757, "V"), + (0xA758, "M", "ꝙ"), + (0xA759, "V"), + (0xA75A, "M", "ꝛ"), + (0xA75B, "V"), + (0xA75C, "M", "ꝝ"), + (0xA75D, "V"), + (0xA75E, "M", "ꝟ"), + (0xA75F, "V"), + (0xA760, "M", "ꝡ"), + (0xA761, "V"), + (0xA762, "M", "ꝣ"), + (0xA763, "V"), + (0xA764, "M", "ꝥ"), + (0xA765, "V"), + (0xA766, "M", "ꝧ"), + (0xA767, "V"), + (0xA768, "M", "ꝩ"), + (0xA769, "V"), + (0xA76A, "M", "ꝫ"), + (0xA76B, "V"), + (0xA76C, "M", "ꝭ"), + (0xA76D, "V"), + (0xA76E, "M", "ꝯ"), + (0xA76F, "V"), + (0xA770, "M", "ꝯ"), + (0xA771, "V"), + (0xA779, "M", "ꝺ"), + (0xA77A, "V"), + (0xA77B, "M", "ꝼ"), + (0xA77C, "V"), + (0xA77D, "M", "ᵹ"), + (0xA77E, "M", "ꝿ"), + (0xA77F, "V"), + ] + + +def _seg_37() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xA780, "M", "ꞁ"), + (0xA781, "V"), + (0xA782, "M", "ꞃ"), + (0xA783, "V"), + (0xA784, "M", "ꞅ"), + (0xA785, "V"), + (0xA786, "M", "ꞇ"), + (0xA787, "V"), + (0xA78B, "M", "ꞌ"), + (0xA78C, "V"), + (0xA78D, "M", "ɥ"), + (0xA78E, "V"), + (0xA790, "M", "ꞑ"), + (0xA791, "V"), + (0xA792, "M", "ꞓ"), + (0xA793, "V"), + (0xA796, "M", "ꞗ"), + (0xA797, "V"), + (0xA798, "M", "ꞙ"), + (0xA799, "V"), + (0xA79A, "M", "ꞛ"), + (0xA79B, "V"), + (0xA79C, "M", "ꞝ"), + (0xA79D, "V"), + (0xA79E, "M", "ꞟ"), + (0xA79F, "V"), + (0xA7A0, "M", "ꞡ"), + (0xA7A1, "V"), + (0xA7A2, "M", "ꞣ"), + (0xA7A3, "V"), + (0xA7A4, "M", "ꞥ"), + (0xA7A5, "V"), + (0xA7A6, "M", "ꞧ"), + (0xA7A7, "V"), + (0xA7A8, "M", "ꞩ"), + (0xA7A9, "V"), + (0xA7AA, "M", "ɦ"), + (0xA7AB, "M", "ɜ"), + (0xA7AC, "M", "ɡ"), + (0xA7AD, "M", "ɬ"), + (0xA7AE, "M", "ɪ"), + (0xA7AF, "V"), + (0xA7B0, "M", "ʞ"), + (0xA7B1, "M", "ʇ"), + (0xA7B2, "M", "ʝ"), + (0xA7B3, "M", "ꭓ"), + (0xA7B4, "M", "ꞵ"), + (0xA7B5, "V"), + (0xA7B6, "M", "ꞷ"), + (0xA7B7, "V"), + (0xA7B8, "M", "ꞹ"), + (0xA7B9, "V"), + (0xA7BA, "M", "ꞻ"), + (0xA7BB, "V"), + (0xA7BC, "M", "ꞽ"), + (0xA7BD, "V"), + (0xA7BE, "M", "ꞿ"), + (0xA7BF, "V"), + (0xA7C0, "M", "ꟁ"), + (0xA7C1, "V"), + (0xA7C2, "M", "ꟃ"), + (0xA7C3, "V"), + (0xA7C4, "M", "ꞔ"), + (0xA7C5, "M", "ʂ"), + (0xA7C6, "M", "ᶎ"), + (0xA7C7, "M", "ꟈ"), + (0xA7C8, "V"), + (0xA7C9, "M", "ꟊ"), + (0xA7CA, "V"), + (0xA7CB, "X"), + (0xA7D0, "M", "ꟑ"), + (0xA7D1, "V"), + (0xA7D2, "X"), + (0xA7D3, "V"), + (0xA7D4, "X"), + (0xA7D5, "V"), + (0xA7D6, "M", "ꟗ"), + (0xA7D7, "V"), + (0xA7D8, "M", "ꟙ"), + (0xA7D9, "V"), + (0xA7DA, "X"), + (0xA7F2, "M", "c"), + (0xA7F3, "M", "f"), + (0xA7F4, "M", "q"), + (0xA7F5, "M", "ꟶ"), + (0xA7F6, "V"), + (0xA7F8, "M", "ħ"), + (0xA7F9, "M", "œ"), + (0xA7FA, "V"), + (0xA82D, "X"), + (0xA830, "V"), + (0xA83A, "X"), + (0xA840, "V"), + (0xA878, "X"), + (0xA880, "V"), + (0xA8C6, "X"), + (0xA8CE, "V"), + (0xA8DA, "X"), + (0xA8E0, "V"), + (0xA954, "X"), + ] + + +def _seg_38() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xA95F, "V"), + (0xA97D, "X"), + (0xA980, "V"), + (0xA9CE, "X"), + (0xA9CF, "V"), + (0xA9DA, "X"), + (0xA9DE, "V"), + (0xA9FF, "X"), + (0xAA00, "V"), + (0xAA37, "X"), + (0xAA40, "V"), + (0xAA4E, "X"), + (0xAA50, "V"), + (0xAA5A, "X"), + (0xAA5C, "V"), + (0xAAC3, "X"), + (0xAADB, "V"), + (0xAAF7, "X"), + (0xAB01, "V"), + (0xAB07, "X"), + (0xAB09, "V"), + (0xAB0F, "X"), + (0xAB11, "V"), + (0xAB17, "X"), + (0xAB20, "V"), + (0xAB27, "X"), + (0xAB28, "V"), + (0xAB2F, "X"), + (0xAB30, "V"), + (0xAB5C, "M", "ꜧ"), + (0xAB5D, "M", "ꬷ"), + (0xAB5E, "M", "ɫ"), + (0xAB5F, "M", "ꭒ"), + (0xAB60, "V"), + (0xAB69, "M", "ʍ"), + (0xAB6A, "V"), + (0xAB6C, "X"), + (0xAB70, "M", "Ꭰ"), + (0xAB71, "M", "Ꭱ"), + (0xAB72, "M", "Ꭲ"), + (0xAB73, "M", "Ꭳ"), + (0xAB74, "M", "Ꭴ"), + (0xAB75, "M", "Ꭵ"), + (0xAB76, "M", "Ꭶ"), + (0xAB77, "M", "Ꭷ"), + (0xAB78, "M", "Ꭸ"), + (0xAB79, "M", "Ꭹ"), + (0xAB7A, "M", "Ꭺ"), + (0xAB7B, "M", "Ꭻ"), + (0xAB7C, "M", "Ꭼ"), + (0xAB7D, "M", "Ꭽ"), + (0xAB7E, "M", "Ꭾ"), + (0xAB7F, "M", "Ꭿ"), + (0xAB80, "M", "Ꮀ"), + (0xAB81, "M", "Ꮁ"), + (0xAB82, "M", "Ꮂ"), + (0xAB83, "M", "Ꮃ"), + (0xAB84, "M", "Ꮄ"), + (0xAB85, "M", "Ꮅ"), + (0xAB86, "M", "Ꮆ"), + (0xAB87, "M", "Ꮇ"), + (0xAB88, "M", "Ꮈ"), + (0xAB89, "M", "Ꮉ"), + (0xAB8A, "M", "Ꮊ"), + (0xAB8B, "M", "Ꮋ"), + (0xAB8C, "M", "Ꮌ"), + (0xAB8D, "M", "Ꮍ"), + (0xAB8E, "M", "Ꮎ"), + (0xAB8F, "M", "Ꮏ"), + (0xAB90, "M", "Ꮐ"), + (0xAB91, "M", "Ꮑ"), + (0xAB92, "M", "Ꮒ"), + (0xAB93, "M", "Ꮓ"), + (0xAB94, "M", "Ꮔ"), + (0xAB95, "M", "Ꮕ"), + (0xAB96, "M", "Ꮖ"), + (0xAB97, "M", "Ꮗ"), + (0xAB98, "M", "Ꮘ"), + (0xAB99, "M", "Ꮙ"), + (0xAB9A, "M", "Ꮚ"), + (0xAB9B, "M", "Ꮛ"), + (0xAB9C, "M", "Ꮜ"), + (0xAB9D, "M", "Ꮝ"), + (0xAB9E, "M", "Ꮞ"), + (0xAB9F, "M", "Ꮟ"), + (0xABA0, "M", "Ꮠ"), + (0xABA1, "M", "Ꮡ"), + (0xABA2, "M", "Ꮢ"), + (0xABA3, "M", "Ꮣ"), + (0xABA4, "M", "Ꮤ"), + (0xABA5, "M", "Ꮥ"), + (0xABA6, "M", "Ꮦ"), + (0xABA7, "M", "Ꮧ"), + (0xABA8, "M", "Ꮨ"), + (0xABA9, "M", "Ꮩ"), + (0xABAA, "M", "Ꮪ"), + (0xABAB, "M", "Ꮫ"), + (0xABAC, "M", "Ꮬ"), + (0xABAD, "M", "Ꮭ"), + (0xABAE, "M", "Ꮮ"), + ] + + +def _seg_39() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xABAF, "M", "Ꮯ"), + (0xABB0, "M", "Ꮰ"), + (0xABB1, "M", "Ꮱ"), + (0xABB2, "M", "Ꮲ"), + (0xABB3, "M", "Ꮳ"), + (0xABB4, "M", "Ꮴ"), + (0xABB5, "M", "Ꮵ"), + (0xABB6, "M", "Ꮶ"), + (0xABB7, "M", "Ꮷ"), + (0xABB8, "M", "Ꮸ"), + (0xABB9, "M", "Ꮹ"), + (0xABBA, "M", "Ꮺ"), + (0xABBB, "M", "Ꮻ"), + (0xABBC, "M", "Ꮼ"), + (0xABBD, "M", "Ꮽ"), + (0xABBE, "M", "Ꮾ"), + (0xABBF, "M", "Ꮿ"), + (0xABC0, "V"), + (0xABEE, "X"), + (0xABF0, "V"), + (0xABFA, "X"), + (0xAC00, "V"), + (0xD7A4, "X"), + (0xD7B0, "V"), + (0xD7C7, "X"), + (0xD7CB, "V"), + (0xD7FC, "X"), + (0xF900, "M", "豈"), + (0xF901, "M", "更"), + (0xF902, "M", "車"), + (0xF903, "M", "賈"), + (0xF904, "M", "滑"), + (0xF905, "M", "串"), + (0xF906, "M", "句"), + (0xF907, "M", "龜"), + (0xF909, "M", "契"), + (0xF90A, "M", "金"), + (0xF90B, "M", "喇"), + (0xF90C, "M", "奈"), + (0xF90D, "M", "懶"), + (0xF90E, "M", "癩"), + (0xF90F, "M", "羅"), + (0xF910, "M", "蘿"), + (0xF911, "M", "螺"), + (0xF912, "M", "裸"), + (0xF913, "M", "邏"), + (0xF914, "M", "樂"), + (0xF915, "M", "洛"), + (0xF916, "M", "烙"), + (0xF917, "M", "珞"), + (0xF918, "M", "落"), + (0xF919, "M", "酪"), + (0xF91A, "M", "駱"), + (0xF91B, "M", "亂"), + (0xF91C, "M", "卵"), + (0xF91D, "M", "欄"), + (0xF91E, "M", "爛"), + (0xF91F, "M", "蘭"), + (0xF920, "M", "鸞"), + (0xF921, "M", "嵐"), + (0xF922, "M", "濫"), + (0xF923, "M", "藍"), + (0xF924, "M", "襤"), + (0xF925, "M", "拉"), + (0xF926, "M", "臘"), + (0xF927, "M", "蠟"), + (0xF928, "M", "廊"), + (0xF929, "M", "朗"), + (0xF92A, "M", "浪"), + (0xF92B, "M", "狼"), + (0xF92C, "M", "郎"), + (0xF92D, "M", "來"), + (0xF92E, "M", "冷"), + (0xF92F, "M", "勞"), + (0xF930, "M", "擄"), + (0xF931, "M", "櫓"), + (0xF932, "M", "爐"), + (0xF933, "M", "盧"), + (0xF934, "M", "老"), + (0xF935, "M", "蘆"), + (0xF936, "M", "虜"), + (0xF937, "M", "路"), + (0xF938, "M", "露"), + (0xF939, "M", "魯"), + (0xF93A, "M", "鷺"), + (0xF93B, "M", "碌"), + (0xF93C, "M", "祿"), + (0xF93D, "M", "綠"), + (0xF93E, "M", "菉"), + (0xF93F, "M", "錄"), + (0xF940, "M", "鹿"), + (0xF941, "M", "論"), + (0xF942, "M", "壟"), + (0xF943, "M", "弄"), + (0xF944, "M", "籠"), + (0xF945, "M", "聾"), + (0xF946, "M", "牢"), + (0xF947, "M", "磊"), + (0xF948, "M", "賂"), + (0xF949, "M", "雷"), + ] + + +def _seg_40() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xF94A, "M", "壘"), + (0xF94B, "M", "屢"), + (0xF94C, "M", "樓"), + (0xF94D, "M", "淚"), + (0xF94E, "M", "漏"), + (0xF94F, "M", "累"), + (0xF950, "M", "縷"), + (0xF951, "M", "陋"), + (0xF952, "M", "勒"), + (0xF953, "M", "肋"), + (0xF954, "M", "凜"), + (0xF955, "M", "凌"), + (0xF956, "M", "稜"), + (0xF957, "M", "綾"), + (0xF958, "M", "菱"), + (0xF959, "M", "陵"), + (0xF95A, "M", "讀"), + (0xF95B, "M", "拏"), + (0xF95C, "M", "樂"), + (0xF95D, "M", "諾"), + (0xF95E, "M", "丹"), + (0xF95F, "M", "寧"), + (0xF960, "M", "怒"), + (0xF961, "M", "率"), + (0xF962, "M", "異"), + (0xF963, "M", "北"), + (0xF964, "M", "磻"), + (0xF965, "M", "便"), + (0xF966, "M", "復"), + (0xF967, "M", "不"), + (0xF968, "M", "泌"), + (0xF969, "M", "數"), + (0xF96A, "M", "索"), + (0xF96B, "M", "參"), + (0xF96C, "M", "塞"), + (0xF96D, "M", "省"), + (0xF96E, "M", "葉"), + (0xF96F, "M", "說"), + (0xF970, "M", "殺"), + (0xF971, "M", "辰"), + (0xF972, "M", "沈"), + (0xF973, "M", "拾"), + (0xF974, "M", "若"), + (0xF975, "M", "掠"), + (0xF976, "M", "略"), + (0xF977, "M", "亮"), + (0xF978, "M", "兩"), + (0xF979, "M", "凉"), + (0xF97A, "M", "梁"), + (0xF97B, "M", "糧"), + (0xF97C, "M", "良"), + (0xF97D, "M", "諒"), + (0xF97E, "M", "量"), + (0xF97F, "M", "勵"), + (0xF980, "M", "呂"), + (0xF981, "M", "女"), + (0xF982, "M", "廬"), + (0xF983, "M", "旅"), + (0xF984, "M", "濾"), + (0xF985, "M", "礪"), + (0xF986, "M", "閭"), + (0xF987, "M", "驪"), + (0xF988, "M", "麗"), + (0xF989, "M", "黎"), + (0xF98A, "M", "力"), + (0xF98B, "M", "曆"), + (0xF98C, "M", "歷"), + (0xF98D, "M", "轢"), + (0xF98E, "M", "年"), + (0xF98F, "M", "憐"), + (0xF990, "M", "戀"), + (0xF991, "M", "撚"), + (0xF992, "M", "漣"), + (0xF993, "M", "煉"), + (0xF994, "M", "璉"), + (0xF995, "M", "秊"), + (0xF996, "M", "練"), + (0xF997, "M", "聯"), + (0xF998, "M", "輦"), + (0xF999, "M", "蓮"), + (0xF99A, "M", "連"), + (0xF99B, "M", "鍊"), + (0xF99C, "M", "列"), + (0xF99D, "M", "劣"), + (0xF99E, "M", "咽"), + (0xF99F, "M", "烈"), + (0xF9A0, "M", "裂"), + (0xF9A1, "M", "說"), + (0xF9A2, "M", "廉"), + (0xF9A3, "M", "念"), + (0xF9A4, "M", "捻"), + (0xF9A5, "M", "殮"), + (0xF9A6, "M", "簾"), + (0xF9A7, "M", "獵"), + (0xF9A8, "M", "令"), + (0xF9A9, "M", "囹"), + (0xF9AA, "M", "寧"), + (0xF9AB, "M", "嶺"), + (0xF9AC, "M", "怜"), + (0xF9AD, "M", "玲"), + ] + + +def _seg_41() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xF9AE, "M", "瑩"), + (0xF9AF, "M", "羚"), + (0xF9B0, "M", "聆"), + (0xF9B1, "M", "鈴"), + (0xF9B2, "M", "零"), + (0xF9B3, "M", "靈"), + (0xF9B4, "M", "領"), + (0xF9B5, "M", "例"), + (0xF9B6, "M", "禮"), + (0xF9B7, "M", "醴"), + (0xF9B8, "M", "隸"), + (0xF9B9, "M", "惡"), + (0xF9BA, "M", "了"), + (0xF9BB, "M", "僚"), + (0xF9BC, "M", "寮"), + (0xF9BD, "M", "尿"), + (0xF9BE, "M", "料"), + (0xF9BF, "M", "樂"), + (0xF9C0, "M", "燎"), + (0xF9C1, "M", "療"), + (0xF9C2, "M", "蓼"), + (0xF9C3, "M", "遼"), + (0xF9C4, "M", "龍"), + (0xF9C5, "M", "暈"), + (0xF9C6, "M", "阮"), + (0xF9C7, "M", "劉"), + (0xF9C8, "M", "杻"), + (0xF9C9, "M", "柳"), + (0xF9CA, "M", "流"), + (0xF9CB, "M", "溜"), + (0xF9CC, "M", "琉"), + (0xF9CD, "M", "留"), + (0xF9CE, "M", "硫"), + (0xF9CF, "M", "紐"), + (0xF9D0, "M", "類"), + (0xF9D1, "M", "六"), + (0xF9D2, "M", "戮"), + (0xF9D3, "M", "陸"), + (0xF9D4, "M", "倫"), + (0xF9D5, "M", "崙"), + (0xF9D6, "M", "淪"), + (0xF9D7, "M", "輪"), + (0xF9D8, "M", "律"), + (0xF9D9, "M", "慄"), + (0xF9DA, "M", "栗"), + (0xF9DB, "M", "率"), + (0xF9DC, "M", "隆"), + (0xF9DD, "M", "利"), + (0xF9DE, "M", "吏"), + (0xF9DF, "M", "履"), + (0xF9E0, "M", "易"), + (0xF9E1, "M", "李"), + (0xF9E2, "M", "梨"), + (0xF9E3, "M", "泥"), + (0xF9E4, "M", "理"), + (0xF9E5, "M", "痢"), + (0xF9E6, "M", "罹"), + (0xF9E7, "M", "裏"), + (0xF9E8, "M", "裡"), + (0xF9E9, "M", "里"), + (0xF9EA, "M", "離"), + (0xF9EB, "M", "匿"), + (0xF9EC, "M", "溺"), + (0xF9ED, "M", "吝"), + (0xF9EE, "M", "燐"), + (0xF9EF, "M", "璘"), + (0xF9F0, "M", "藺"), + (0xF9F1, "M", "隣"), + (0xF9F2, "M", "鱗"), + (0xF9F3, "M", "麟"), + (0xF9F4, "M", "林"), + (0xF9F5, "M", "淋"), + (0xF9F6, "M", "臨"), + (0xF9F7, "M", "立"), + (0xF9F8, "M", "笠"), + (0xF9F9, "M", "粒"), + (0xF9FA, "M", "狀"), + (0xF9FB, "M", "炙"), + (0xF9FC, "M", "識"), + (0xF9FD, "M", "什"), + (0xF9FE, "M", "茶"), + (0xF9FF, "M", "刺"), + (0xFA00, "M", "切"), + (0xFA01, "M", "度"), + (0xFA02, "M", "拓"), + (0xFA03, "M", "糖"), + (0xFA04, "M", "宅"), + (0xFA05, "M", "洞"), + (0xFA06, "M", "暴"), + (0xFA07, "M", "輻"), + (0xFA08, "M", "行"), + (0xFA09, "M", "降"), + (0xFA0A, "M", "見"), + (0xFA0B, "M", "廓"), + (0xFA0C, "M", "兀"), + (0xFA0D, "M", "嗀"), + (0xFA0E, "V"), + (0xFA10, "M", "塚"), + (0xFA11, "V"), + (0xFA12, "M", "晴"), + ] + + +def _seg_42() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFA13, "V"), + (0xFA15, "M", "凞"), + (0xFA16, "M", "猪"), + (0xFA17, "M", "益"), + (0xFA18, "M", "礼"), + (0xFA19, "M", "神"), + (0xFA1A, "M", "祥"), + (0xFA1B, "M", "福"), + (0xFA1C, "M", "靖"), + (0xFA1D, "M", "精"), + (0xFA1E, "M", "羽"), + (0xFA1F, "V"), + (0xFA20, "M", "蘒"), + (0xFA21, "V"), + (0xFA22, "M", "諸"), + (0xFA23, "V"), + (0xFA25, "M", "逸"), + (0xFA26, "M", "都"), + (0xFA27, "V"), + (0xFA2A, "M", "飯"), + (0xFA2B, "M", "飼"), + (0xFA2C, "M", "館"), + (0xFA2D, "M", "鶴"), + (0xFA2E, "M", "郞"), + (0xFA2F, "M", "隷"), + (0xFA30, "M", "侮"), + (0xFA31, "M", "僧"), + (0xFA32, "M", "免"), + (0xFA33, "M", "勉"), + (0xFA34, "M", "勤"), + (0xFA35, "M", "卑"), + (0xFA36, "M", "喝"), + (0xFA37, "M", "嘆"), + (0xFA38, "M", "器"), + (0xFA39, "M", "塀"), + (0xFA3A, "M", "墨"), + (0xFA3B, "M", "層"), + (0xFA3C, "M", "屮"), + (0xFA3D, "M", "悔"), + (0xFA3E, "M", "慨"), + (0xFA3F, "M", "憎"), + (0xFA40, "M", "懲"), + (0xFA41, "M", "敏"), + (0xFA42, "M", "既"), + (0xFA43, "M", "暑"), + (0xFA44, "M", "梅"), + (0xFA45, "M", "海"), + (0xFA46, "M", "渚"), + (0xFA47, "M", "漢"), + (0xFA48, "M", "煮"), + (0xFA49, "M", "爫"), + (0xFA4A, "M", "琢"), + (0xFA4B, "M", "碑"), + (0xFA4C, "M", "社"), + (0xFA4D, "M", "祉"), + (0xFA4E, "M", "祈"), + (0xFA4F, "M", "祐"), + (0xFA50, "M", "祖"), + (0xFA51, "M", "祝"), + (0xFA52, "M", "禍"), + (0xFA53, "M", "禎"), + (0xFA54, "M", "穀"), + (0xFA55, "M", "突"), + (0xFA56, "M", "節"), + (0xFA57, "M", "練"), + (0xFA58, "M", "縉"), + (0xFA59, "M", "繁"), + (0xFA5A, "M", "署"), + (0xFA5B, "M", "者"), + (0xFA5C, "M", "臭"), + (0xFA5D, "M", "艹"), + (0xFA5F, "M", "著"), + (0xFA60, "M", "褐"), + (0xFA61, "M", "視"), + (0xFA62, "M", "謁"), + (0xFA63, "M", "謹"), + (0xFA64, "M", "賓"), + (0xFA65, "M", "贈"), + (0xFA66, "M", "辶"), + (0xFA67, "M", "逸"), + (0xFA68, "M", "難"), + (0xFA69, "M", "響"), + (0xFA6A, "M", "頻"), + (0xFA6B, "M", "恵"), + (0xFA6C, "M", "𤋮"), + (0xFA6D, "M", "舘"), + (0xFA6E, "X"), + (0xFA70, "M", "並"), + (0xFA71, "M", "况"), + (0xFA72, "M", "全"), + (0xFA73, "M", "侀"), + (0xFA74, "M", "充"), + (0xFA75, "M", "冀"), + (0xFA76, "M", "勇"), + (0xFA77, "M", "勺"), + (0xFA78, "M", "喝"), + (0xFA79, "M", "啕"), + (0xFA7A, "M", "喙"), + (0xFA7B, "M", "嗢"), + (0xFA7C, "M", "塚"), + ] + + +def _seg_43() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFA7D, "M", "墳"), + (0xFA7E, "M", "奄"), + (0xFA7F, "M", "奔"), + (0xFA80, "M", "婢"), + (0xFA81, "M", "嬨"), + (0xFA82, "M", "廒"), + (0xFA83, "M", "廙"), + (0xFA84, "M", "彩"), + (0xFA85, "M", "徭"), + (0xFA86, "M", "惘"), + (0xFA87, "M", "慎"), + (0xFA88, "M", "愈"), + (0xFA89, "M", "憎"), + (0xFA8A, "M", "慠"), + (0xFA8B, "M", "懲"), + (0xFA8C, "M", "戴"), + (0xFA8D, "M", "揄"), + (0xFA8E, "M", "搜"), + (0xFA8F, "M", "摒"), + (0xFA90, "M", "敖"), + (0xFA91, "M", "晴"), + (0xFA92, "M", "朗"), + (0xFA93, "M", "望"), + (0xFA94, "M", "杖"), + (0xFA95, "M", "歹"), + (0xFA96, "M", "殺"), + (0xFA97, "M", "流"), + (0xFA98, "M", "滛"), + (0xFA99, "M", "滋"), + (0xFA9A, "M", "漢"), + (0xFA9B, "M", "瀞"), + (0xFA9C, "M", "煮"), + (0xFA9D, "M", "瞧"), + (0xFA9E, "M", "爵"), + (0xFA9F, "M", "犯"), + (0xFAA0, "M", "猪"), + (0xFAA1, "M", "瑱"), + (0xFAA2, "M", "甆"), + (0xFAA3, "M", "画"), + (0xFAA4, "M", "瘝"), + (0xFAA5, "M", "瘟"), + (0xFAA6, "M", "益"), + (0xFAA7, "M", "盛"), + (0xFAA8, "M", "直"), + (0xFAA9, "M", "睊"), + (0xFAAA, "M", "着"), + (0xFAAB, "M", "磌"), + (0xFAAC, "M", "窱"), + (0xFAAD, "M", "節"), + (0xFAAE, "M", "类"), + (0xFAAF, "M", "絛"), + (0xFAB0, "M", "練"), + (0xFAB1, "M", "缾"), + (0xFAB2, "M", "者"), + (0xFAB3, "M", "荒"), + (0xFAB4, "M", "華"), + (0xFAB5, "M", "蝹"), + (0xFAB6, "M", "襁"), + (0xFAB7, "M", "覆"), + (0xFAB8, "M", "視"), + (0xFAB9, "M", "調"), + (0xFABA, "M", "諸"), + (0xFABB, "M", "請"), + (0xFABC, "M", "謁"), + (0xFABD, "M", "諾"), + (0xFABE, "M", "諭"), + (0xFABF, "M", "謹"), + (0xFAC0, "M", "變"), + (0xFAC1, "M", "贈"), + (0xFAC2, "M", "輸"), + (0xFAC3, "M", "遲"), + (0xFAC4, "M", "醙"), + (0xFAC5, "M", "鉶"), + (0xFAC6, "M", "陼"), + (0xFAC7, "M", "難"), + (0xFAC8, "M", "靖"), + (0xFAC9, "M", "韛"), + (0xFACA, "M", "響"), + (0xFACB, "M", "頋"), + (0xFACC, "M", "頻"), + (0xFACD, "M", "鬒"), + (0xFACE, "M", "龜"), + (0xFACF, "M", "𢡊"), + (0xFAD0, "M", "𢡄"), + (0xFAD1, "M", "𣏕"), + (0xFAD2, "M", "㮝"), + (0xFAD3, "M", "䀘"), + (0xFAD4, "M", "䀹"), + (0xFAD5, "M", "𥉉"), + (0xFAD6, "M", "𥳐"), + (0xFAD7, "M", "𧻓"), + (0xFAD8, "M", "齃"), + (0xFAD9, "M", "龎"), + (0xFADA, "X"), + (0xFB00, "M", "ff"), + (0xFB01, "M", "fi"), + (0xFB02, "M", "fl"), + (0xFB03, "M", "ffi"), + (0xFB04, "M", "ffl"), + (0xFB05, "M", "st"), + ] + + +def _seg_44() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFB07, "X"), + (0xFB13, "M", "մն"), + (0xFB14, "M", "մե"), + (0xFB15, "M", "մի"), + (0xFB16, "M", "վն"), + (0xFB17, "M", "մխ"), + (0xFB18, "X"), + (0xFB1D, "M", "יִ"), + (0xFB1E, "V"), + (0xFB1F, "M", "ײַ"), + (0xFB20, "M", "ע"), + (0xFB21, "M", "א"), + (0xFB22, "M", "ד"), + (0xFB23, "M", "ה"), + (0xFB24, "M", "כ"), + (0xFB25, "M", "ל"), + (0xFB26, "M", "ם"), + (0xFB27, "M", "ר"), + (0xFB28, "M", "ת"), + (0xFB29, "3", "+"), + (0xFB2A, "M", "שׁ"), + (0xFB2B, "M", "שׂ"), + (0xFB2C, "M", "שּׁ"), + (0xFB2D, "M", "שּׂ"), + (0xFB2E, "M", "אַ"), + (0xFB2F, "M", "אָ"), + (0xFB30, "M", "אּ"), + (0xFB31, "M", "בּ"), + (0xFB32, "M", "גּ"), + (0xFB33, "M", "דּ"), + (0xFB34, "M", "הּ"), + (0xFB35, "M", "וּ"), + (0xFB36, "M", "זּ"), + (0xFB37, "X"), + (0xFB38, "M", "טּ"), + (0xFB39, "M", "יּ"), + (0xFB3A, "M", "ךּ"), + (0xFB3B, "M", "כּ"), + (0xFB3C, "M", "לּ"), + (0xFB3D, "X"), + (0xFB3E, "M", "מּ"), + (0xFB3F, "X"), + (0xFB40, "M", "נּ"), + (0xFB41, "M", "סּ"), + (0xFB42, "X"), + (0xFB43, "M", "ףּ"), + (0xFB44, "M", "פּ"), + (0xFB45, "X"), + (0xFB46, "M", "צּ"), + (0xFB47, "M", "קּ"), + (0xFB48, "M", "רּ"), + (0xFB49, "M", "שּ"), + (0xFB4A, "M", "תּ"), + (0xFB4B, "M", "וֹ"), + (0xFB4C, "M", "בֿ"), + (0xFB4D, "M", "כֿ"), + (0xFB4E, "M", "פֿ"), + (0xFB4F, "M", "אל"), + (0xFB50, "M", "ٱ"), + (0xFB52, "M", "ٻ"), + (0xFB56, "M", "پ"), + (0xFB5A, "M", "ڀ"), + (0xFB5E, "M", "ٺ"), + (0xFB62, "M", "ٿ"), + (0xFB66, "M", "ٹ"), + (0xFB6A, "M", "ڤ"), + (0xFB6E, "M", "ڦ"), + (0xFB72, "M", "ڄ"), + (0xFB76, "M", "ڃ"), + (0xFB7A, "M", "چ"), + (0xFB7E, "M", "ڇ"), + (0xFB82, "M", "ڍ"), + (0xFB84, "M", "ڌ"), + (0xFB86, "M", "ڎ"), + (0xFB88, "M", "ڈ"), + (0xFB8A, "M", "ژ"), + (0xFB8C, "M", "ڑ"), + (0xFB8E, "M", "ک"), + (0xFB92, "M", "گ"), + (0xFB96, "M", "ڳ"), + (0xFB9A, "M", "ڱ"), + (0xFB9E, "M", "ں"), + (0xFBA0, "M", "ڻ"), + (0xFBA4, "M", "ۀ"), + (0xFBA6, "M", "ہ"), + (0xFBAA, "M", "ھ"), + (0xFBAE, "M", "ے"), + (0xFBB0, "M", "ۓ"), + (0xFBB2, "V"), + (0xFBC3, "X"), + (0xFBD3, "M", "ڭ"), + (0xFBD7, "M", "ۇ"), + (0xFBD9, "M", "ۆ"), + (0xFBDB, "M", "ۈ"), + (0xFBDD, "M", "ۇٴ"), + (0xFBDE, "M", "ۋ"), + (0xFBE0, "M", "ۅ"), + (0xFBE2, "M", "ۉ"), + (0xFBE4, "M", "ې"), + (0xFBE8, "M", "ى"), + ] + + +def _seg_45() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFBEA, "M", "ئا"), + (0xFBEC, "M", "ئە"), + (0xFBEE, "M", "ئو"), + (0xFBF0, "M", "ئۇ"), + (0xFBF2, "M", "ئۆ"), + (0xFBF4, "M", "ئۈ"), + (0xFBF6, "M", "ئې"), + (0xFBF9, "M", "ئى"), + (0xFBFC, "M", "ی"), + (0xFC00, "M", "ئج"), + (0xFC01, "M", "ئح"), + (0xFC02, "M", "ئم"), + (0xFC03, "M", "ئى"), + (0xFC04, "M", "ئي"), + (0xFC05, "M", "بج"), + (0xFC06, "M", "بح"), + (0xFC07, "M", "بخ"), + (0xFC08, "M", "بم"), + (0xFC09, "M", "بى"), + (0xFC0A, "M", "بي"), + (0xFC0B, "M", "تج"), + (0xFC0C, "M", "تح"), + (0xFC0D, "M", "تخ"), + (0xFC0E, "M", "تم"), + (0xFC0F, "M", "تى"), + (0xFC10, "M", "تي"), + (0xFC11, "M", "ثج"), + (0xFC12, "M", "ثم"), + (0xFC13, "M", "ثى"), + (0xFC14, "M", "ثي"), + (0xFC15, "M", "جح"), + (0xFC16, "M", "جم"), + (0xFC17, "M", "حج"), + (0xFC18, "M", "حم"), + (0xFC19, "M", "خج"), + (0xFC1A, "M", "خح"), + (0xFC1B, "M", "خم"), + (0xFC1C, "M", "سج"), + (0xFC1D, "M", "سح"), + (0xFC1E, "M", "سخ"), + (0xFC1F, "M", "سم"), + (0xFC20, "M", "صح"), + (0xFC21, "M", "صم"), + (0xFC22, "M", "ضج"), + (0xFC23, "M", "ضح"), + (0xFC24, "M", "ضخ"), + (0xFC25, "M", "ضم"), + (0xFC26, "M", "طح"), + (0xFC27, "M", "طم"), + (0xFC28, "M", "ظم"), + (0xFC29, "M", "عج"), + (0xFC2A, "M", "عم"), + (0xFC2B, "M", "غج"), + (0xFC2C, "M", "غم"), + (0xFC2D, "M", "فج"), + (0xFC2E, "M", "فح"), + (0xFC2F, "M", "فخ"), + (0xFC30, "M", "فم"), + (0xFC31, "M", "فى"), + (0xFC32, "M", "في"), + (0xFC33, "M", "قح"), + (0xFC34, "M", "قم"), + (0xFC35, "M", "قى"), + (0xFC36, "M", "قي"), + (0xFC37, "M", "كا"), + (0xFC38, "M", "كج"), + (0xFC39, "M", "كح"), + (0xFC3A, "M", "كخ"), + (0xFC3B, "M", "كل"), + (0xFC3C, "M", "كم"), + (0xFC3D, "M", "كى"), + (0xFC3E, "M", "كي"), + (0xFC3F, "M", "لج"), + (0xFC40, "M", "لح"), + (0xFC41, "M", "لخ"), + (0xFC42, "M", "لم"), + (0xFC43, "M", "لى"), + (0xFC44, "M", "لي"), + (0xFC45, "M", "مج"), + (0xFC46, "M", "مح"), + (0xFC47, "M", "مخ"), + (0xFC48, "M", "مم"), + (0xFC49, "M", "مى"), + (0xFC4A, "M", "مي"), + (0xFC4B, "M", "نج"), + (0xFC4C, "M", "نح"), + (0xFC4D, "M", "نخ"), + (0xFC4E, "M", "نم"), + (0xFC4F, "M", "نى"), + (0xFC50, "M", "ني"), + (0xFC51, "M", "هج"), + (0xFC52, "M", "هم"), + (0xFC53, "M", "هى"), + (0xFC54, "M", "هي"), + (0xFC55, "M", "يج"), + (0xFC56, "M", "يح"), + (0xFC57, "M", "يخ"), + (0xFC58, "M", "يم"), + (0xFC59, "M", "يى"), + (0xFC5A, "M", "يي"), + ] + + +def _seg_46() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFC5B, "M", "ذٰ"), + (0xFC5C, "M", "رٰ"), + (0xFC5D, "M", "ىٰ"), + (0xFC5E, "3", " ٌّ"), + (0xFC5F, "3", " ٍّ"), + (0xFC60, "3", " َّ"), + (0xFC61, "3", " ُّ"), + (0xFC62, "3", " ِّ"), + (0xFC63, "3", " ّٰ"), + (0xFC64, "M", "ئر"), + (0xFC65, "M", "ئز"), + (0xFC66, "M", "ئم"), + (0xFC67, "M", "ئن"), + (0xFC68, "M", "ئى"), + (0xFC69, "M", "ئي"), + (0xFC6A, "M", "بر"), + (0xFC6B, "M", "بز"), + (0xFC6C, "M", "بم"), + (0xFC6D, "M", "بن"), + (0xFC6E, "M", "بى"), + (0xFC6F, "M", "بي"), + (0xFC70, "M", "تر"), + (0xFC71, "M", "تز"), + (0xFC72, "M", "تم"), + (0xFC73, "M", "تن"), + (0xFC74, "M", "تى"), + (0xFC75, "M", "تي"), + (0xFC76, "M", "ثر"), + (0xFC77, "M", "ثز"), + (0xFC78, "M", "ثم"), + (0xFC79, "M", "ثن"), + (0xFC7A, "M", "ثى"), + (0xFC7B, "M", "ثي"), + (0xFC7C, "M", "فى"), + (0xFC7D, "M", "في"), + (0xFC7E, "M", "قى"), + (0xFC7F, "M", "قي"), + (0xFC80, "M", "كا"), + (0xFC81, "M", "كل"), + (0xFC82, "M", "كم"), + (0xFC83, "M", "كى"), + (0xFC84, "M", "كي"), + (0xFC85, "M", "لم"), + (0xFC86, "M", "لى"), + (0xFC87, "M", "لي"), + (0xFC88, "M", "ما"), + (0xFC89, "M", "مم"), + (0xFC8A, "M", "نر"), + (0xFC8B, "M", "نز"), + (0xFC8C, "M", "نم"), + (0xFC8D, "M", "نن"), + (0xFC8E, "M", "نى"), + (0xFC8F, "M", "ني"), + (0xFC90, "M", "ىٰ"), + (0xFC91, "M", "ير"), + (0xFC92, "M", "يز"), + (0xFC93, "M", "يم"), + (0xFC94, "M", "ين"), + (0xFC95, "M", "يى"), + (0xFC96, "M", "يي"), + (0xFC97, "M", "ئج"), + (0xFC98, "M", "ئح"), + (0xFC99, "M", "ئخ"), + (0xFC9A, "M", "ئم"), + (0xFC9B, "M", "ئه"), + (0xFC9C, "M", "بج"), + (0xFC9D, "M", "بح"), + (0xFC9E, "M", "بخ"), + (0xFC9F, "M", "بم"), + (0xFCA0, "M", "به"), + (0xFCA1, "M", "تج"), + (0xFCA2, "M", "تح"), + (0xFCA3, "M", "تخ"), + (0xFCA4, "M", "تم"), + (0xFCA5, "M", "ته"), + (0xFCA6, "M", "ثم"), + (0xFCA7, "M", "جح"), + (0xFCA8, "M", "جم"), + (0xFCA9, "M", "حج"), + (0xFCAA, "M", "حم"), + (0xFCAB, "M", "خج"), + (0xFCAC, "M", "خم"), + (0xFCAD, "M", "سج"), + (0xFCAE, "M", "سح"), + (0xFCAF, "M", "سخ"), + (0xFCB0, "M", "سم"), + (0xFCB1, "M", "صح"), + (0xFCB2, "M", "صخ"), + (0xFCB3, "M", "صم"), + (0xFCB4, "M", "ضج"), + (0xFCB5, "M", "ضح"), + (0xFCB6, "M", "ضخ"), + (0xFCB7, "M", "ضم"), + (0xFCB8, "M", "طح"), + (0xFCB9, "M", "ظم"), + (0xFCBA, "M", "عج"), + (0xFCBB, "M", "عم"), + (0xFCBC, "M", "غج"), + (0xFCBD, "M", "غم"), + (0xFCBE, "M", "فج"), + ] + + +def _seg_47() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFCBF, "M", "فح"), + (0xFCC0, "M", "فخ"), + (0xFCC1, "M", "فم"), + (0xFCC2, "M", "قح"), + (0xFCC3, "M", "قم"), + (0xFCC4, "M", "كج"), + (0xFCC5, "M", "كح"), + (0xFCC6, "M", "كخ"), + (0xFCC7, "M", "كل"), + (0xFCC8, "M", "كم"), + (0xFCC9, "M", "لج"), + (0xFCCA, "M", "لح"), + (0xFCCB, "M", "لخ"), + (0xFCCC, "M", "لم"), + (0xFCCD, "M", "له"), + (0xFCCE, "M", "مج"), + (0xFCCF, "M", "مح"), + (0xFCD0, "M", "مخ"), + (0xFCD1, "M", "مم"), + (0xFCD2, "M", "نج"), + (0xFCD3, "M", "نح"), + (0xFCD4, "M", "نخ"), + (0xFCD5, "M", "نم"), + (0xFCD6, "M", "نه"), + (0xFCD7, "M", "هج"), + (0xFCD8, "M", "هم"), + (0xFCD9, "M", "هٰ"), + (0xFCDA, "M", "يج"), + (0xFCDB, "M", "يح"), + (0xFCDC, "M", "يخ"), + (0xFCDD, "M", "يم"), + (0xFCDE, "M", "يه"), + (0xFCDF, "M", "ئم"), + (0xFCE0, "M", "ئه"), + (0xFCE1, "M", "بم"), + (0xFCE2, "M", "به"), + (0xFCE3, "M", "تم"), + (0xFCE4, "M", "ته"), + (0xFCE5, "M", "ثم"), + (0xFCE6, "M", "ثه"), + (0xFCE7, "M", "سم"), + (0xFCE8, "M", "سه"), + (0xFCE9, "M", "شم"), + (0xFCEA, "M", "شه"), + (0xFCEB, "M", "كل"), + (0xFCEC, "M", "كم"), + (0xFCED, "M", "لم"), + (0xFCEE, "M", "نم"), + (0xFCEF, "M", "نه"), + (0xFCF0, "M", "يم"), + (0xFCF1, "M", "يه"), + (0xFCF2, "M", "ـَّ"), + (0xFCF3, "M", "ـُّ"), + (0xFCF4, "M", "ـِّ"), + (0xFCF5, "M", "طى"), + (0xFCF6, "M", "طي"), + (0xFCF7, "M", "عى"), + (0xFCF8, "M", "عي"), + (0xFCF9, "M", "غى"), + (0xFCFA, "M", "غي"), + (0xFCFB, "M", "سى"), + (0xFCFC, "M", "سي"), + (0xFCFD, "M", "شى"), + (0xFCFE, "M", "شي"), + (0xFCFF, "M", "حى"), + (0xFD00, "M", "حي"), + (0xFD01, "M", "جى"), + (0xFD02, "M", "جي"), + (0xFD03, "M", "خى"), + (0xFD04, "M", "خي"), + (0xFD05, "M", "صى"), + (0xFD06, "M", "صي"), + (0xFD07, "M", "ضى"), + (0xFD08, "M", "ضي"), + (0xFD09, "M", "شج"), + (0xFD0A, "M", "شح"), + (0xFD0B, "M", "شخ"), + (0xFD0C, "M", "شم"), + (0xFD0D, "M", "شر"), + (0xFD0E, "M", "سر"), + (0xFD0F, "M", "صر"), + (0xFD10, "M", "ضر"), + (0xFD11, "M", "طى"), + (0xFD12, "M", "طي"), + (0xFD13, "M", "عى"), + (0xFD14, "M", "عي"), + (0xFD15, "M", "غى"), + (0xFD16, "M", "غي"), + (0xFD17, "M", "سى"), + (0xFD18, "M", "سي"), + (0xFD19, "M", "شى"), + (0xFD1A, "M", "شي"), + (0xFD1B, "M", "حى"), + (0xFD1C, "M", "حي"), + (0xFD1D, "M", "جى"), + (0xFD1E, "M", "جي"), + (0xFD1F, "M", "خى"), + (0xFD20, "M", "خي"), + (0xFD21, "M", "صى"), + (0xFD22, "M", "صي"), + ] + + +def _seg_48() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFD23, "M", "ضى"), + (0xFD24, "M", "ضي"), + (0xFD25, "M", "شج"), + (0xFD26, "M", "شح"), + (0xFD27, "M", "شخ"), + (0xFD28, "M", "شم"), + (0xFD29, "M", "شر"), + (0xFD2A, "M", "سر"), + (0xFD2B, "M", "صر"), + (0xFD2C, "M", "ضر"), + (0xFD2D, "M", "شج"), + (0xFD2E, "M", "شح"), + (0xFD2F, "M", "شخ"), + (0xFD30, "M", "شم"), + (0xFD31, "M", "سه"), + (0xFD32, "M", "شه"), + (0xFD33, "M", "طم"), + (0xFD34, "M", "سج"), + (0xFD35, "M", "سح"), + (0xFD36, "M", "سخ"), + (0xFD37, "M", "شج"), + (0xFD38, "M", "شح"), + (0xFD39, "M", "شخ"), + (0xFD3A, "M", "طم"), + (0xFD3B, "M", "ظم"), + (0xFD3C, "M", "اً"), + (0xFD3E, "V"), + (0xFD50, "M", "تجم"), + (0xFD51, "M", "تحج"), + (0xFD53, "M", "تحم"), + (0xFD54, "M", "تخم"), + (0xFD55, "M", "تمج"), + (0xFD56, "M", "تمح"), + (0xFD57, "M", "تمخ"), + (0xFD58, "M", "جمح"), + (0xFD5A, "M", "حمي"), + (0xFD5B, "M", "حمى"), + (0xFD5C, "M", "سحج"), + (0xFD5D, "M", "سجح"), + (0xFD5E, "M", "سجى"), + (0xFD5F, "M", "سمح"), + (0xFD61, "M", "سمج"), + (0xFD62, "M", "سمم"), + (0xFD64, "M", "صحح"), + (0xFD66, "M", "صمم"), + (0xFD67, "M", "شحم"), + (0xFD69, "M", "شجي"), + (0xFD6A, "M", "شمخ"), + (0xFD6C, "M", "شمم"), + (0xFD6E, "M", "ضحى"), + (0xFD6F, "M", "ضخم"), + (0xFD71, "M", "طمح"), + (0xFD73, "M", "طمم"), + (0xFD74, "M", "طمي"), + (0xFD75, "M", "عجم"), + (0xFD76, "M", "عمم"), + (0xFD78, "M", "عمى"), + (0xFD79, "M", "غمم"), + (0xFD7A, "M", "غمي"), + (0xFD7B, "M", "غمى"), + (0xFD7C, "M", "فخم"), + (0xFD7E, "M", "قمح"), + (0xFD7F, "M", "قمم"), + (0xFD80, "M", "لحم"), + (0xFD81, "M", "لحي"), + (0xFD82, "M", "لحى"), + (0xFD83, "M", "لجج"), + (0xFD85, "M", "لخم"), + (0xFD87, "M", "لمح"), + (0xFD89, "M", "محج"), + (0xFD8A, "M", "محم"), + (0xFD8B, "M", "محي"), + (0xFD8C, "M", "مجح"), + (0xFD8D, "M", "مجم"), + (0xFD8E, "M", "مخج"), + (0xFD8F, "M", "مخم"), + (0xFD90, "X"), + (0xFD92, "M", "مجخ"), + (0xFD93, "M", "همج"), + (0xFD94, "M", "همم"), + (0xFD95, "M", "نحم"), + (0xFD96, "M", "نحى"), + (0xFD97, "M", "نجم"), + (0xFD99, "M", "نجى"), + (0xFD9A, "M", "نمي"), + (0xFD9B, "M", "نمى"), + (0xFD9C, "M", "يمم"), + (0xFD9E, "M", "بخي"), + (0xFD9F, "M", "تجي"), + (0xFDA0, "M", "تجى"), + (0xFDA1, "M", "تخي"), + (0xFDA2, "M", "تخى"), + (0xFDA3, "M", "تمي"), + (0xFDA4, "M", "تمى"), + (0xFDA5, "M", "جمي"), + (0xFDA6, "M", "جحى"), + (0xFDA7, "M", "جمى"), + (0xFDA8, "M", "سخى"), + (0xFDA9, "M", "صحي"), + (0xFDAA, "M", "شحي"), + ] + + +def _seg_49() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFDAB, "M", "ضحي"), + (0xFDAC, "M", "لجي"), + (0xFDAD, "M", "لمي"), + (0xFDAE, "M", "يحي"), + (0xFDAF, "M", "يجي"), + (0xFDB0, "M", "يمي"), + (0xFDB1, "M", "ممي"), + (0xFDB2, "M", "قمي"), + (0xFDB3, "M", "نحي"), + (0xFDB4, "M", "قمح"), + (0xFDB5, "M", "لحم"), + (0xFDB6, "M", "عمي"), + (0xFDB7, "M", "كمي"), + (0xFDB8, "M", "نجح"), + (0xFDB9, "M", "مخي"), + (0xFDBA, "M", "لجم"), + (0xFDBB, "M", "كمم"), + (0xFDBC, "M", "لجم"), + (0xFDBD, "M", "نجح"), + (0xFDBE, "M", "جحي"), + (0xFDBF, "M", "حجي"), + (0xFDC0, "M", "مجي"), + (0xFDC1, "M", "فمي"), + (0xFDC2, "M", "بحي"), + (0xFDC3, "M", "كمم"), + (0xFDC4, "M", "عجم"), + (0xFDC5, "M", "صمم"), + (0xFDC6, "M", "سخي"), + (0xFDC7, "M", "نجي"), + (0xFDC8, "X"), + (0xFDCF, "V"), + (0xFDD0, "X"), + (0xFDF0, "M", "صلے"), + (0xFDF1, "M", "قلے"), + (0xFDF2, "M", "الله"), + (0xFDF3, "M", "اكبر"), + (0xFDF4, "M", "محمد"), + (0xFDF5, "M", "صلعم"), + (0xFDF6, "M", "رسول"), + (0xFDF7, "M", "عليه"), + (0xFDF8, "M", "وسلم"), + (0xFDF9, "M", "صلى"), + (0xFDFA, "3", "صلى الله عليه وسلم"), + (0xFDFB, "3", "جل جلاله"), + (0xFDFC, "M", "ریال"), + (0xFDFD, "V"), + (0xFE00, "I"), + (0xFE10, "3", ","), + (0xFE11, "M", "、"), + (0xFE12, "X"), + (0xFE13, "3", ":"), + (0xFE14, "3", ";"), + (0xFE15, "3", "!"), + (0xFE16, "3", "?"), + (0xFE17, "M", "〖"), + (0xFE18, "M", "〗"), + (0xFE19, "X"), + (0xFE20, "V"), + (0xFE30, "X"), + (0xFE31, "M", "—"), + (0xFE32, "M", "–"), + (0xFE33, "3", "_"), + (0xFE35, "3", "("), + (0xFE36, "3", ")"), + (0xFE37, "3", "{"), + (0xFE38, "3", "}"), + (0xFE39, "M", "〔"), + (0xFE3A, "M", "〕"), + (0xFE3B, "M", "【"), + (0xFE3C, "M", "】"), + (0xFE3D, "M", "《"), + (0xFE3E, "M", "》"), + (0xFE3F, "M", "〈"), + (0xFE40, "M", "〉"), + (0xFE41, "M", "「"), + (0xFE42, "M", "」"), + (0xFE43, "M", "『"), + (0xFE44, "M", "』"), + (0xFE45, "V"), + (0xFE47, "3", "["), + (0xFE48, "3", "]"), + (0xFE49, "3", " ̅"), + (0xFE4D, "3", "_"), + (0xFE50, "3", ","), + (0xFE51, "M", "、"), + (0xFE52, "X"), + (0xFE54, "3", ";"), + (0xFE55, "3", ":"), + (0xFE56, "3", "?"), + (0xFE57, "3", "!"), + (0xFE58, "M", "—"), + (0xFE59, "3", "("), + (0xFE5A, "3", ")"), + (0xFE5B, "3", "{"), + (0xFE5C, "3", "}"), + (0xFE5D, "M", "〔"), + (0xFE5E, "M", "〕"), + (0xFE5F, "3", "#"), + (0xFE60, "3", "&"), + (0xFE61, "3", "*"), + ] + + +def _seg_50() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFE62, "3", "+"), + (0xFE63, "M", "-"), + (0xFE64, "3", "<"), + (0xFE65, "3", ">"), + (0xFE66, "3", "="), + (0xFE67, "X"), + (0xFE68, "3", "\\"), + (0xFE69, "3", "$"), + (0xFE6A, "3", "%"), + (0xFE6B, "3", "@"), + (0xFE6C, "X"), + (0xFE70, "3", " ً"), + (0xFE71, "M", "ـً"), + (0xFE72, "3", " ٌ"), + (0xFE73, "V"), + (0xFE74, "3", " ٍ"), + (0xFE75, "X"), + (0xFE76, "3", " َ"), + (0xFE77, "M", "ـَ"), + (0xFE78, "3", " ُ"), + (0xFE79, "M", "ـُ"), + (0xFE7A, "3", " ِ"), + (0xFE7B, "M", "ـِ"), + (0xFE7C, "3", " ّ"), + (0xFE7D, "M", "ـّ"), + (0xFE7E, "3", " ْ"), + (0xFE7F, "M", "ـْ"), + (0xFE80, "M", "ء"), + (0xFE81, "M", "آ"), + (0xFE83, "M", "أ"), + (0xFE85, "M", "ؤ"), + (0xFE87, "M", "إ"), + (0xFE89, "M", "ئ"), + (0xFE8D, "M", "ا"), + (0xFE8F, "M", "ب"), + (0xFE93, "M", "ة"), + (0xFE95, "M", "ت"), + (0xFE99, "M", "ث"), + (0xFE9D, "M", "ج"), + (0xFEA1, "M", "ح"), + (0xFEA5, "M", "خ"), + (0xFEA9, "M", "د"), + (0xFEAB, "M", "ذ"), + (0xFEAD, "M", "ر"), + (0xFEAF, "M", "ز"), + (0xFEB1, "M", "س"), + (0xFEB5, "M", "ش"), + (0xFEB9, "M", "ص"), + (0xFEBD, "M", "ض"), + (0xFEC1, "M", "ط"), + (0xFEC5, "M", "ظ"), + (0xFEC9, "M", "ع"), + (0xFECD, "M", "غ"), + (0xFED1, "M", "ف"), + (0xFED5, "M", "ق"), + (0xFED9, "M", "ك"), + (0xFEDD, "M", "ل"), + (0xFEE1, "M", "م"), + (0xFEE5, "M", "ن"), + (0xFEE9, "M", "ه"), + (0xFEED, "M", "و"), + (0xFEEF, "M", "ى"), + (0xFEF1, "M", "ي"), + (0xFEF5, "M", "لآ"), + (0xFEF7, "M", "لأ"), + (0xFEF9, "M", "لإ"), + (0xFEFB, "M", "لا"), + (0xFEFD, "X"), + (0xFEFF, "I"), + (0xFF00, "X"), + (0xFF01, "3", "!"), + (0xFF02, "3", '"'), + (0xFF03, "3", "#"), + (0xFF04, "3", "$"), + (0xFF05, "3", "%"), + (0xFF06, "3", "&"), + (0xFF07, "3", "'"), + (0xFF08, "3", "("), + (0xFF09, "3", ")"), + (0xFF0A, "3", "*"), + (0xFF0B, "3", "+"), + (0xFF0C, "3", ","), + (0xFF0D, "M", "-"), + (0xFF0E, "M", "."), + (0xFF0F, "3", "/"), + (0xFF10, "M", "0"), + (0xFF11, "M", "1"), + (0xFF12, "M", "2"), + (0xFF13, "M", "3"), + (0xFF14, "M", "4"), + (0xFF15, "M", "5"), + (0xFF16, "M", "6"), + (0xFF17, "M", "7"), + (0xFF18, "M", "8"), + (0xFF19, "M", "9"), + (0xFF1A, "3", ":"), + (0xFF1B, "3", ";"), + (0xFF1C, "3", "<"), + (0xFF1D, "3", "="), + (0xFF1E, "3", ">"), + ] + + +def _seg_51() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFF1F, "3", "?"), + (0xFF20, "3", "@"), + (0xFF21, "M", "a"), + (0xFF22, "M", "b"), + (0xFF23, "M", "c"), + (0xFF24, "M", "d"), + (0xFF25, "M", "e"), + (0xFF26, "M", "f"), + (0xFF27, "M", "g"), + (0xFF28, "M", "h"), + (0xFF29, "M", "i"), + (0xFF2A, "M", "j"), + (0xFF2B, "M", "k"), + (0xFF2C, "M", "l"), + (0xFF2D, "M", "m"), + (0xFF2E, "M", "n"), + (0xFF2F, "M", "o"), + (0xFF30, "M", "p"), + (0xFF31, "M", "q"), + (0xFF32, "M", "r"), + (0xFF33, "M", "s"), + (0xFF34, "M", "t"), + (0xFF35, "M", "u"), + (0xFF36, "M", "v"), + (0xFF37, "M", "w"), + (0xFF38, "M", "x"), + (0xFF39, "M", "y"), + (0xFF3A, "M", "z"), + (0xFF3B, "3", "["), + (0xFF3C, "3", "\\"), + (0xFF3D, "3", "]"), + (0xFF3E, "3", "^"), + (0xFF3F, "3", "_"), + (0xFF40, "3", "`"), + (0xFF41, "M", "a"), + (0xFF42, "M", "b"), + (0xFF43, "M", "c"), + (0xFF44, "M", "d"), + (0xFF45, "M", "e"), + (0xFF46, "M", "f"), + (0xFF47, "M", "g"), + (0xFF48, "M", "h"), + (0xFF49, "M", "i"), + (0xFF4A, "M", "j"), + (0xFF4B, "M", "k"), + (0xFF4C, "M", "l"), + (0xFF4D, "M", "m"), + (0xFF4E, "M", "n"), + (0xFF4F, "M", "o"), + (0xFF50, "M", "p"), + (0xFF51, "M", "q"), + (0xFF52, "M", "r"), + (0xFF53, "M", "s"), + (0xFF54, "M", "t"), + (0xFF55, "M", "u"), + (0xFF56, "M", "v"), + (0xFF57, "M", "w"), + (0xFF58, "M", "x"), + (0xFF59, "M", "y"), + (0xFF5A, "M", "z"), + (0xFF5B, "3", "{"), + (0xFF5C, "3", "|"), + (0xFF5D, "3", "}"), + (0xFF5E, "3", "~"), + (0xFF5F, "M", "⦅"), + (0xFF60, "M", "⦆"), + (0xFF61, "M", "."), + (0xFF62, "M", "「"), + (0xFF63, "M", "」"), + (0xFF64, "M", "、"), + (0xFF65, "M", "・"), + (0xFF66, "M", "ヲ"), + (0xFF67, "M", "ァ"), + (0xFF68, "M", "ィ"), + (0xFF69, "M", "ゥ"), + (0xFF6A, "M", "ェ"), + (0xFF6B, "M", "ォ"), + (0xFF6C, "M", "ャ"), + (0xFF6D, "M", "ュ"), + (0xFF6E, "M", "ョ"), + (0xFF6F, "M", "ッ"), + (0xFF70, "M", "ー"), + (0xFF71, "M", "ア"), + (0xFF72, "M", "イ"), + (0xFF73, "M", "ウ"), + (0xFF74, "M", "エ"), + (0xFF75, "M", "オ"), + (0xFF76, "M", "カ"), + (0xFF77, "M", "キ"), + (0xFF78, "M", "ク"), + (0xFF79, "M", "ケ"), + (0xFF7A, "M", "コ"), + (0xFF7B, "M", "サ"), + (0xFF7C, "M", "シ"), + (0xFF7D, "M", "ス"), + (0xFF7E, "M", "セ"), + (0xFF7F, "M", "ソ"), + (0xFF80, "M", "タ"), + (0xFF81, "M", "チ"), + (0xFF82, "M", "ツ"), + ] + + +def _seg_52() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFF83, "M", "テ"), + (0xFF84, "M", "ト"), + (0xFF85, "M", "ナ"), + (0xFF86, "M", "ニ"), + (0xFF87, "M", "ヌ"), + (0xFF88, "M", "ネ"), + (0xFF89, "M", "ノ"), + (0xFF8A, "M", "ハ"), + (0xFF8B, "M", "ヒ"), + (0xFF8C, "M", "フ"), + (0xFF8D, "M", "ヘ"), + (0xFF8E, "M", "ホ"), + (0xFF8F, "M", "マ"), + (0xFF90, "M", "ミ"), + (0xFF91, "M", "ム"), + (0xFF92, "M", "メ"), + (0xFF93, "M", "モ"), + (0xFF94, "M", "ヤ"), + (0xFF95, "M", "ユ"), + (0xFF96, "M", "ヨ"), + (0xFF97, "M", "ラ"), + (0xFF98, "M", "リ"), + (0xFF99, "M", "ル"), + (0xFF9A, "M", "レ"), + (0xFF9B, "M", "ロ"), + (0xFF9C, "M", "ワ"), + (0xFF9D, "M", "ン"), + (0xFF9E, "M", "゙"), + (0xFF9F, "M", "゚"), + (0xFFA0, "X"), + (0xFFA1, "M", "ᄀ"), + (0xFFA2, "M", "ᄁ"), + (0xFFA3, "M", "ᆪ"), + (0xFFA4, "M", "ᄂ"), + (0xFFA5, "M", "ᆬ"), + (0xFFA6, "M", "ᆭ"), + (0xFFA7, "M", "ᄃ"), + (0xFFA8, "M", "ᄄ"), + (0xFFA9, "M", "ᄅ"), + (0xFFAA, "M", "ᆰ"), + (0xFFAB, "M", "ᆱ"), + (0xFFAC, "M", "ᆲ"), + (0xFFAD, "M", "ᆳ"), + (0xFFAE, "M", "ᆴ"), + (0xFFAF, "M", "ᆵ"), + (0xFFB0, "M", "ᄚ"), + (0xFFB1, "M", "ᄆ"), + (0xFFB2, "M", "ᄇ"), + (0xFFB3, "M", "ᄈ"), + (0xFFB4, "M", "ᄡ"), + (0xFFB5, "M", "ᄉ"), + (0xFFB6, "M", "ᄊ"), + (0xFFB7, "M", "ᄋ"), + (0xFFB8, "M", "ᄌ"), + (0xFFB9, "M", "ᄍ"), + (0xFFBA, "M", "ᄎ"), + (0xFFBB, "M", "ᄏ"), + (0xFFBC, "M", "ᄐ"), + (0xFFBD, "M", "ᄑ"), + (0xFFBE, "M", "ᄒ"), + (0xFFBF, "X"), + (0xFFC2, "M", "ᅡ"), + (0xFFC3, "M", "ᅢ"), + (0xFFC4, "M", "ᅣ"), + (0xFFC5, "M", "ᅤ"), + (0xFFC6, "M", "ᅥ"), + (0xFFC7, "M", "ᅦ"), + (0xFFC8, "X"), + (0xFFCA, "M", "ᅧ"), + (0xFFCB, "M", "ᅨ"), + (0xFFCC, "M", "ᅩ"), + (0xFFCD, "M", "ᅪ"), + (0xFFCE, "M", "ᅫ"), + (0xFFCF, "M", "ᅬ"), + (0xFFD0, "X"), + (0xFFD2, "M", "ᅭ"), + (0xFFD3, "M", "ᅮ"), + (0xFFD4, "M", "ᅯ"), + (0xFFD5, "M", "ᅰ"), + (0xFFD6, "M", "ᅱ"), + (0xFFD7, "M", "ᅲ"), + (0xFFD8, "X"), + (0xFFDA, "M", "ᅳ"), + (0xFFDB, "M", "ᅴ"), + (0xFFDC, "M", "ᅵ"), + (0xFFDD, "X"), + (0xFFE0, "M", "¢"), + (0xFFE1, "M", "£"), + (0xFFE2, "M", "¬"), + (0xFFE3, "3", " ̄"), + (0xFFE4, "M", "¦"), + (0xFFE5, "M", "¥"), + (0xFFE6, "M", "₩"), + (0xFFE7, "X"), + (0xFFE8, "M", "│"), + (0xFFE9, "M", "←"), + (0xFFEA, "M", "↑"), + (0xFFEB, "M", "→"), + (0xFFEC, "M", "↓"), + (0xFFED, "M", "■"), + ] + + +def _seg_53() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFFEE, "M", "○"), + (0xFFEF, "X"), + (0x10000, "V"), + (0x1000C, "X"), + (0x1000D, "V"), + (0x10027, "X"), + (0x10028, "V"), + (0x1003B, "X"), + (0x1003C, "V"), + (0x1003E, "X"), + (0x1003F, "V"), + (0x1004E, "X"), + (0x10050, "V"), + (0x1005E, "X"), + (0x10080, "V"), + (0x100FB, "X"), + (0x10100, "V"), + (0x10103, "X"), + (0x10107, "V"), + (0x10134, "X"), + (0x10137, "V"), + (0x1018F, "X"), + (0x10190, "V"), + (0x1019D, "X"), + (0x101A0, "V"), + (0x101A1, "X"), + (0x101D0, "V"), + (0x101FE, "X"), + (0x10280, "V"), + (0x1029D, "X"), + (0x102A0, "V"), + (0x102D1, "X"), + (0x102E0, "V"), + (0x102FC, "X"), + (0x10300, "V"), + (0x10324, "X"), + (0x1032D, "V"), + (0x1034B, "X"), + (0x10350, "V"), + (0x1037B, "X"), + (0x10380, "V"), + (0x1039E, "X"), + (0x1039F, "V"), + (0x103C4, "X"), + (0x103C8, "V"), + (0x103D6, "X"), + (0x10400, "M", "𐐨"), + (0x10401, "M", "𐐩"), + (0x10402, "M", "𐐪"), + (0x10403, "M", "𐐫"), + (0x10404, "M", "𐐬"), + (0x10405, "M", "𐐭"), + (0x10406, "M", "𐐮"), + (0x10407, "M", "𐐯"), + (0x10408, "M", "𐐰"), + (0x10409, "M", "𐐱"), + (0x1040A, "M", "𐐲"), + (0x1040B, "M", "𐐳"), + (0x1040C, "M", "𐐴"), + (0x1040D, "M", "𐐵"), + (0x1040E, "M", "𐐶"), + (0x1040F, "M", "𐐷"), + (0x10410, "M", "𐐸"), + (0x10411, "M", "𐐹"), + (0x10412, "M", "𐐺"), + (0x10413, "M", "𐐻"), + (0x10414, "M", "𐐼"), + (0x10415, "M", "𐐽"), + (0x10416, "M", "𐐾"), + (0x10417, "M", "𐐿"), + (0x10418, "M", "𐑀"), + (0x10419, "M", "𐑁"), + (0x1041A, "M", "𐑂"), + (0x1041B, "M", "𐑃"), + (0x1041C, "M", "𐑄"), + (0x1041D, "M", "𐑅"), + (0x1041E, "M", "𐑆"), + (0x1041F, "M", "𐑇"), + (0x10420, "M", "𐑈"), + (0x10421, "M", "𐑉"), + (0x10422, "M", "𐑊"), + (0x10423, "M", "𐑋"), + (0x10424, "M", "𐑌"), + (0x10425, "M", "𐑍"), + (0x10426, "M", "𐑎"), + (0x10427, "M", "𐑏"), + (0x10428, "V"), + (0x1049E, "X"), + (0x104A0, "V"), + (0x104AA, "X"), + (0x104B0, "M", "𐓘"), + (0x104B1, "M", "𐓙"), + (0x104B2, "M", "𐓚"), + (0x104B3, "M", "𐓛"), + (0x104B4, "M", "𐓜"), + (0x104B5, "M", "𐓝"), + (0x104B6, "M", "𐓞"), + (0x104B7, "M", "𐓟"), + (0x104B8, "M", "𐓠"), + (0x104B9, "M", "𐓡"), + ] + + +def _seg_54() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x104BA, "M", "𐓢"), + (0x104BB, "M", "𐓣"), + (0x104BC, "M", "𐓤"), + (0x104BD, "M", "𐓥"), + (0x104BE, "M", "𐓦"), + (0x104BF, "M", "𐓧"), + (0x104C0, "M", "𐓨"), + (0x104C1, "M", "𐓩"), + (0x104C2, "M", "𐓪"), + (0x104C3, "M", "𐓫"), + (0x104C4, "M", "𐓬"), + (0x104C5, "M", "𐓭"), + (0x104C6, "M", "𐓮"), + (0x104C7, "M", "𐓯"), + (0x104C8, "M", "𐓰"), + (0x104C9, "M", "𐓱"), + (0x104CA, "M", "𐓲"), + (0x104CB, "M", "𐓳"), + (0x104CC, "M", "𐓴"), + (0x104CD, "M", "𐓵"), + (0x104CE, "M", "𐓶"), + (0x104CF, "M", "𐓷"), + (0x104D0, "M", "𐓸"), + (0x104D1, "M", "𐓹"), + (0x104D2, "M", "𐓺"), + (0x104D3, "M", "𐓻"), + (0x104D4, "X"), + (0x104D8, "V"), + (0x104FC, "X"), + (0x10500, "V"), + (0x10528, "X"), + (0x10530, "V"), + (0x10564, "X"), + (0x1056F, "V"), + (0x10570, "M", "𐖗"), + (0x10571, "M", "𐖘"), + (0x10572, "M", "𐖙"), + (0x10573, "M", "𐖚"), + (0x10574, "M", "𐖛"), + (0x10575, "M", "𐖜"), + (0x10576, "M", "𐖝"), + (0x10577, "M", "𐖞"), + (0x10578, "M", "𐖟"), + (0x10579, "M", "𐖠"), + (0x1057A, "M", "𐖡"), + (0x1057B, "X"), + (0x1057C, "M", "𐖣"), + (0x1057D, "M", "𐖤"), + (0x1057E, "M", "𐖥"), + (0x1057F, "M", "𐖦"), + (0x10580, "M", "𐖧"), + (0x10581, "M", "𐖨"), + (0x10582, "M", "𐖩"), + (0x10583, "M", "𐖪"), + (0x10584, "M", "𐖫"), + (0x10585, "M", "𐖬"), + (0x10586, "M", "𐖭"), + (0x10587, "M", "𐖮"), + (0x10588, "M", "𐖯"), + (0x10589, "M", "𐖰"), + (0x1058A, "M", "𐖱"), + (0x1058B, "X"), + (0x1058C, "M", "𐖳"), + (0x1058D, "M", "𐖴"), + (0x1058E, "M", "𐖵"), + (0x1058F, "M", "𐖶"), + (0x10590, "M", "𐖷"), + (0x10591, "M", "𐖸"), + (0x10592, "M", "𐖹"), + (0x10593, "X"), + (0x10594, "M", "𐖻"), + (0x10595, "M", "𐖼"), + (0x10596, "X"), + (0x10597, "V"), + (0x105A2, "X"), + (0x105A3, "V"), + (0x105B2, "X"), + (0x105B3, "V"), + (0x105BA, "X"), + (0x105BB, "V"), + (0x105BD, "X"), + (0x10600, "V"), + (0x10737, "X"), + (0x10740, "V"), + (0x10756, "X"), + (0x10760, "V"), + (0x10768, "X"), + (0x10780, "V"), + (0x10781, "M", "ː"), + (0x10782, "M", "ˑ"), + (0x10783, "M", "æ"), + (0x10784, "M", "ʙ"), + (0x10785, "M", "ɓ"), + (0x10786, "X"), + (0x10787, "M", "ʣ"), + (0x10788, "M", "ꭦ"), + (0x10789, "M", "ʥ"), + (0x1078A, "M", "ʤ"), + (0x1078B, "M", "ɖ"), + (0x1078C, "M", "ɗ"), + ] + + +def _seg_55() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1078D, "M", "ᶑ"), + (0x1078E, "M", "ɘ"), + (0x1078F, "M", "ɞ"), + (0x10790, "M", "ʩ"), + (0x10791, "M", "ɤ"), + (0x10792, "M", "ɢ"), + (0x10793, "M", "ɠ"), + (0x10794, "M", "ʛ"), + (0x10795, "M", "ħ"), + (0x10796, "M", "ʜ"), + (0x10797, "M", "ɧ"), + (0x10798, "M", "ʄ"), + (0x10799, "M", "ʪ"), + (0x1079A, "M", "ʫ"), + (0x1079B, "M", "ɬ"), + (0x1079C, "M", "𝼄"), + (0x1079D, "M", "ꞎ"), + (0x1079E, "M", "ɮ"), + (0x1079F, "M", "𝼅"), + (0x107A0, "M", "ʎ"), + (0x107A1, "M", "𝼆"), + (0x107A2, "M", "ø"), + (0x107A3, "M", "ɶ"), + (0x107A4, "M", "ɷ"), + (0x107A5, "M", "q"), + (0x107A6, "M", "ɺ"), + (0x107A7, "M", "𝼈"), + (0x107A8, "M", "ɽ"), + (0x107A9, "M", "ɾ"), + (0x107AA, "M", "ʀ"), + (0x107AB, "M", "ʨ"), + (0x107AC, "M", "ʦ"), + (0x107AD, "M", "ꭧ"), + (0x107AE, "M", "ʧ"), + (0x107AF, "M", "ʈ"), + (0x107B0, "M", "ⱱ"), + (0x107B1, "X"), + (0x107B2, "M", "ʏ"), + (0x107B3, "M", "ʡ"), + (0x107B4, "M", "ʢ"), + (0x107B5, "M", "ʘ"), + (0x107B6, "M", "ǀ"), + (0x107B7, "M", "ǁ"), + (0x107B8, "M", "ǂ"), + (0x107B9, "M", "𝼊"), + (0x107BA, "M", "𝼞"), + (0x107BB, "X"), + (0x10800, "V"), + (0x10806, "X"), + (0x10808, "V"), + (0x10809, "X"), + (0x1080A, "V"), + (0x10836, "X"), + (0x10837, "V"), + (0x10839, "X"), + (0x1083C, "V"), + (0x1083D, "X"), + (0x1083F, "V"), + (0x10856, "X"), + (0x10857, "V"), + (0x1089F, "X"), + (0x108A7, "V"), + (0x108B0, "X"), + (0x108E0, "V"), + (0x108F3, "X"), + (0x108F4, "V"), + (0x108F6, "X"), + (0x108FB, "V"), + (0x1091C, "X"), + (0x1091F, "V"), + (0x1093A, "X"), + (0x1093F, "V"), + (0x10940, "X"), + (0x10980, "V"), + (0x109B8, "X"), + (0x109BC, "V"), + (0x109D0, "X"), + (0x109D2, "V"), + (0x10A04, "X"), + (0x10A05, "V"), + (0x10A07, "X"), + (0x10A0C, "V"), + (0x10A14, "X"), + (0x10A15, "V"), + (0x10A18, "X"), + (0x10A19, "V"), + (0x10A36, "X"), + (0x10A38, "V"), + (0x10A3B, "X"), + (0x10A3F, "V"), + (0x10A49, "X"), + (0x10A50, "V"), + (0x10A59, "X"), + (0x10A60, "V"), + (0x10AA0, "X"), + (0x10AC0, "V"), + (0x10AE7, "X"), + (0x10AEB, "V"), + (0x10AF7, "X"), + (0x10B00, "V"), + ] + + +def _seg_56() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x10B36, "X"), + (0x10B39, "V"), + (0x10B56, "X"), + (0x10B58, "V"), + (0x10B73, "X"), + (0x10B78, "V"), + (0x10B92, "X"), + (0x10B99, "V"), + (0x10B9D, "X"), + (0x10BA9, "V"), + (0x10BB0, "X"), + (0x10C00, "V"), + (0x10C49, "X"), + (0x10C80, "M", "𐳀"), + (0x10C81, "M", "𐳁"), + (0x10C82, "M", "𐳂"), + (0x10C83, "M", "𐳃"), + (0x10C84, "M", "𐳄"), + (0x10C85, "M", "𐳅"), + (0x10C86, "M", "𐳆"), + (0x10C87, "M", "𐳇"), + (0x10C88, "M", "𐳈"), + (0x10C89, "M", "𐳉"), + (0x10C8A, "M", "𐳊"), + (0x10C8B, "M", "𐳋"), + (0x10C8C, "M", "𐳌"), + (0x10C8D, "M", "𐳍"), + (0x10C8E, "M", "𐳎"), + (0x10C8F, "M", "𐳏"), + (0x10C90, "M", "𐳐"), + (0x10C91, "M", "𐳑"), + (0x10C92, "M", "𐳒"), + (0x10C93, "M", "𐳓"), + (0x10C94, "M", "𐳔"), + (0x10C95, "M", "𐳕"), + (0x10C96, "M", "𐳖"), + (0x10C97, "M", "𐳗"), + (0x10C98, "M", "𐳘"), + (0x10C99, "M", "𐳙"), + (0x10C9A, "M", "𐳚"), + (0x10C9B, "M", "𐳛"), + (0x10C9C, "M", "𐳜"), + (0x10C9D, "M", "𐳝"), + (0x10C9E, "M", "𐳞"), + (0x10C9F, "M", "𐳟"), + (0x10CA0, "M", "𐳠"), + (0x10CA1, "M", "𐳡"), + (0x10CA2, "M", "𐳢"), + (0x10CA3, "M", "𐳣"), + (0x10CA4, "M", "𐳤"), + (0x10CA5, "M", "𐳥"), + (0x10CA6, "M", "𐳦"), + (0x10CA7, "M", "𐳧"), + (0x10CA8, "M", "𐳨"), + (0x10CA9, "M", "𐳩"), + (0x10CAA, "M", "𐳪"), + (0x10CAB, "M", "𐳫"), + (0x10CAC, "M", "𐳬"), + (0x10CAD, "M", "𐳭"), + (0x10CAE, "M", "𐳮"), + (0x10CAF, "M", "𐳯"), + (0x10CB0, "M", "𐳰"), + (0x10CB1, "M", "𐳱"), + (0x10CB2, "M", "𐳲"), + (0x10CB3, "X"), + (0x10CC0, "V"), + (0x10CF3, "X"), + (0x10CFA, "V"), + (0x10D28, "X"), + (0x10D30, "V"), + (0x10D3A, "X"), + (0x10E60, "V"), + (0x10E7F, "X"), + (0x10E80, "V"), + (0x10EAA, "X"), + (0x10EAB, "V"), + (0x10EAE, "X"), + (0x10EB0, "V"), + (0x10EB2, "X"), + (0x10EFD, "V"), + (0x10F28, "X"), + (0x10F30, "V"), + (0x10F5A, "X"), + (0x10F70, "V"), + (0x10F8A, "X"), + (0x10FB0, "V"), + (0x10FCC, "X"), + (0x10FE0, "V"), + (0x10FF7, "X"), + (0x11000, "V"), + (0x1104E, "X"), + (0x11052, "V"), + (0x11076, "X"), + (0x1107F, "V"), + (0x110BD, "X"), + (0x110BE, "V"), + (0x110C3, "X"), + (0x110D0, "V"), + (0x110E9, "X"), + (0x110F0, "V"), + ] + + +def _seg_57() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x110FA, "X"), + (0x11100, "V"), + (0x11135, "X"), + (0x11136, "V"), + (0x11148, "X"), + (0x11150, "V"), + (0x11177, "X"), + (0x11180, "V"), + (0x111E0, "X"), + (0x111E1, "V"), + (0x111F5, "X"), + (0x11200, "V"), + (0x11212, "X"), + (0x11213, "V"), + (0x11242, "X"), + (0x11280, "V"), + (0x11287, "X"), + (0x11288, "V"), + (0x11289, "X"), + (0x1128A, "V"), + (0x1128E, "X"), + (0x1128F, "V"), + (0x1129E, "X"), + (0x1129F, "V"), + (0x112AA, "X"), + (0x112B0, "V"), + (0x112EB, "X"), + (0x112F0, "V"), + (0x112FA, "X"), + (0x11300, "V"), + (0x11304, "X"), + (0x11305, "V"), + (0x1130D, "X"), + (0x1130F, "V"), + (0x11311, "X"), + (0x11313, "V"), + (0x11329, "X"), + (0x1132A, "V"), + (0x11331, "X"), + (0x11332, "V"), + (0x11334, "X"), + (0x11335, "V"), + (0x1133A, "X"), + (0x1133B, "V"), + (0x11345, "X"), + (0x11347, "V"), + (0x11349, "X"), + (0x1134B, "V"), + (0x1134E, "X"), + (0x11350, "V"), + (0x11351, "X"), + (0x11357, "V"), + (0x11358, "X"), + (0x1135D, "V"), + (0x11364, "X"), + (0x11366, "V"), + (0x1136D, "X"), + (0x11370, "V"), + (0x11375, "X"), + (0x11400, "V"), + (0x1145C, "X"), + (0x1145D, "V"), + (0x11462, "X"), + (0x11480, "V"), + (0x114C8, "X"), + (0x114D0, "V"), + (0x114DA, "X"), + (0x11580, "V"), + (0x115B6, "X"), + (0x115B8, "V"), + (0x115DE, "X"), + (0x11600, "V"), + (0x11645, "X"), + (0x11650, "V"), + (0x1165A, "X"), + (0x11660, "V"), + (0x1166D, "X"), + (0x11680, "V"), + (0x116BA, "X"), + (0x116C0, "V"), + (0x116CA, "X"), + (0x11700, "V"), + (0x1171B, "X"), + (0x1171D, "V"), + (0x1172C, "X"), + (0x11730, "V"), + (0x11747, "X"), + (0x11800, "V"), + (0x1183C, "X"), + (0x118A0, "M", "𑣀"), + (0x118A1, "M", "𑣁"), + (0x118A2, "M", "𑣂"), + (0x118A3, "M", "𑣃"), + (0x118A4, "M", "𑣄"), + (0x118A5, "M", "𑣅"), + (0x118A6, "M", "𑣆"), + (0x118A7, "M", "𑣇"), + (0x118A8, "M", "𑣈"), + (0x118A9, "M", "𑣉"), + (0x118AA, "M", "𑣊"), + ] + + +def _seg_58() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x118AB, "M", "𑣋"), + (0x118AC, "M", "𑣌"), + (0x118AD, "M", "𑣍"), + (0x118AE, "M", "𑣎"), + (0x118AF, "M", "𑣏"), + (0x118B0, "M", "𑣐"), + (0x118B1, "M", "𑣑"), + (0x118B2, "M", "𑣒"), + (0x118B3, "M", "𑣓"), + (0x118B4, "M", "𑣔"), + (0x118B5, "M", "𑣕"), + (0x118B6, "M", "𑣖"), + (0x118B7, "M", "𑣗"), + (0x118B8, "M", "𑣘"), + (0x118B9, "M", "𑣙"), + (0x118BA, "M", "𑣚"), + (0x118BB, "M", "𑣛"), + (0x118BC, "M", "𑣜"), + (0x118BD, "M", "𑣝"), + (0x118BE, "M", "𑣞"), + (0x118BF, "M", "𑣟"), + (0x118C0, "V"), + (0x118F3, "X"), + (0x118FF, "V"), + (0x11907, "X"), + (0x11909, "V"), + (0x1190A, "X"), + (0x1190C, "V"), + (0x11914, "X"), + (0x11915, "V"), + (0x11917, "X"), + (0x11918, "V"), + (0x11936, "X"), + (0x11937, "V"), + (0x11939, "X"), + (0x1193B, "V"), + (0x11947, "X"), + (0x11950, "V"), + (0x1195A, "X"), + (0x119A0, "V"), + (0x119A8, "X"), + (0x119AA, "V"), + (0x119D8, "X"), + (0x119DA, "V"), + (0x119E5, "X"), + (0x11A00, "V"), + (0x11A48, "X"), + (0x11A50, "V"), + (0x11AA3, "X"), + (0x11AB0, "V"), + (0x11AF9, "X"), + (0x11B00, "V"), + (0x11B0A, "X"), + (0x11C00, "V"), + (0x11C09, "X"), + (0x11C0A, "V"), + (0x11C37, "X"), + (0x11C38, "V"), + (0x11C46, "X"), + (0x11C50, "V"), + (0x11C6D, "X"), + (0x11C70, "V"), + (0x11C90, "X"), + (0x11C92, "V"), + (0x11CA8, "X"), + (0x11CA9, "V"), + (0x11CB7, "X"), + (0x11D00, "V"), + (0x11D07, "X"), + (0x11D08, "V"), + (0x11D0A, "X"), + (0x11D0B, "V"), + (0x11D37, "X"), + (0x11D3A, "V"), + (0x11D3B, "X"), + (0x11D3C, "V"), + (0x11D3E, "X"), + (0x11D3F, "V"), + (0x11D48, "X"), + (0x11D50, "V"), + (0x11D5A, "X"), + (0x11D60, "V"), + (0x11D66, "X"), + (0x11D67, "V"), + (0x11D69, "X"), + (0x11D6A, "V"), + (0x11D8F, "X"), + (0x11D90, "V"), + (0x11D92, "X"), + (0x11D93, "V"), + (0x11D99, "X"), + (0x11DA0, "V"), + (0x11DAA, "X"), + (0x11EE0, "V"), + (0x11EF9, "X"), + (0x11F00, "V"), + (0x11F11, "X"), + (0x11F12, "V"), + (0x11F3B, "X"), + (0x11F3E, "V"), + ] + + +def _seg_59() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x11F5A, "X"), + (0x11FB0, "V"), + (0x11FB1, "X"), + (0x11FC0, "V"), + (0x11FF2, "X"), + (0x11FFF, "V"), + (0x1239A, "X"), + (0x12400, "V"), + (0x1246F, "X"), + (0x12470, "V"), + (0x12475, "X"), + (0x12480, "V"), + (0x12544, "X"), + (0x12F90, "V"), + (0x12FF3, "X"), + (0x13000, "V"), + (0x13430, "X"), + (0x13440, "V"), + (0x13456, "X"), + (0x14400, "V"), + (0x14647, "X"), + (0x16800, "V"), + (0x16A39, "X"), + (0x16A40, "V"), + (0x16A5F, "X"), + (0x16A60, "V"), + (0x16A6A, "X"), + (0x16A6E, "V"), + (0x16ABF, "X"), + (0x16AC0, "V"), + (0x16ACA, "X"), + (0x16AD0, "V"), + (0x16AEE, "X"), + (0x16AF0, "V"), + (0x16AF6, "X"), + (0x16B00, "V"), + (0x16B46, "X"), + (0x16B50, "V"), + (0x16B5A, "X"), + (0x16B5B, "V"), + (0x16B62, "X"), + (0x16B63, "V"), + (0x16B78, "X"), + (0x16B7D, "V"), + (0x16B90, "X"), + (0x16E40, "M", "𖹠"), + (0x16E41, "M", "𖹡"), + (0x16E42, "M", "𖹢"), + (0x16E43, "M", "𖹣"), + (0x16E44, "M", "𖹤"), + (0x16E45, "M", "𖹥"), + (0x16E46, "M", "𖹦"), + (0x16E47, "M", "𖹧"), + (0x16E48, "M", "𖹨"), + (0x16E49, "M", "𖹩"), + (0x16E4A, "M", "𖹪"), + (0x16E4B, "M", "𖹫"), + (0x16E4C, "M", "𖹬"), + (0x16E4D, "M", "𖹭"), + (0x16E4E, "M", "𖹮"), + (0x16E4F, "M", "𖹯"), + (0x16E50, "M", "𖹰"), + (0x16E51, "M", "𖹱"), + (0x16E52, "M", "𖹲"), + (0x16E53, "M", "𖹳"), + (0x16E54, "M", "𖹴"), + (0x16E55, "M", "𖹵"), + (0x16E56, "M", "𖹶"), + (0x16E57, "M", "𖹷"), + (0x16E58, "M", "𖹸"), + (0x16E59, "M", "𖹹"), + (0x16E5A, "M", "𖹺"), + (0x16E5B, "M", "𖹻"), + (0x16E5C, "M", "𖹼"), + (0x16E5D, "M", "𖹽"), + (0x16E5E, "M", "𖹾"), + (0x16E5F, "M", "𖹿"), + (0x16E60, "V"), + (0x16E9B, "X"), + (0x16F00, "V"), + (0x16F4B, "X"), + (0x16F4F, "V"), + (0x16F88, "X"), + (0x16F8F, "V"), + (0x16FA0, "X"), + (0x16FE0, "V"), + (0x16FE5, "X"), + (0x16FF0, "V"), + (0x16FF2, "X"), + (0x17000, "V"), + (0x187F8, "X"), + (0x18800, "V"), + (0x18CD6, "X"), + (0x18D00, "V"), + (0x18D09, "X"), + (0x1AFF0, "V"), + (0x1AFF4, "X"), + (0x1AFF5, "V"), + (0x1AFFC, "X"), + (0x1AFFD, "V"), + ] + + +def _seg_60() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1AFFF, "X"), + (0x1B000, "V"), + (0x1B123, "X"), + (0x1B132, "V"), + (0x1B133, "X"), + (0x1B150, "V"), + (0x1B153, "X"), + (0x1B155, "V"), + (0x1B156, "X"), + (0x1B164, "V"), + (0x1B168, "X"), + (0x1B170, "V"), + (0x1B2FC, "X"), + (0x1BC00, "V"), + (0x1BC6B, "X"), + (0x1BC70, "V"), + (0x1BC7D, "X"), + (0x1BC80, "V"), + (0x1BC89, "X"), + (0x1BC90, "V"), + (0x1BC9A, "X"), + (0x1BC9C, "V"), + (0x1BCA0, "I"), + (0x1BCA4, "X"), + (0x1CF00, "V"), + (0x1CF2E, "X"), + (0x1CF30, "V"), + (0x1CF47, "X"), + (0x1CF50, "V"), + (0x1CFC4, "X"), + (0x1D000, "V"), + (0x1D0F6, "X"), + (0x1D100, "V"), + (0x1D127, "X"), + (0x1D129, "V"), + (0x1D15E, "M", "𝅗𝅥"), + (0x1D15F, "M", "𝅘𝅥"), + (0x1D160, "M", "𝅘𝅥𝅮"), + (0x1D161, "M", "𝅘𝅥𝅯"), + (0x1D162, "M", "𝅘𝅥𝅰"), + (0x1D163, "M", "𝅘𝅥𝅱"), + (0x1D164, "M", "𝅘𝅥𝅲"), + (0x1D165, "V"), + (0x1D173, "X"), + (0x1D17B, "V"), + (0x1D1BB, "M", "𝆹𝅥"), + (0x1D1BC, "M", "𝆺𝅥"), + (0x1D1BD, "M", "𝆹𝅥𝅮"), + (0x1D1BE, "M", "𝆺𝅥𝅮"), + (0x1D1BF, "M", "𝆹𝅥𝅯"), + (0x1D1C0, "M", "𝆺𝅥𝅯"), + (0x1D1C1, "V"), + (0x1D1EB, "X"), + (0x1D200, "V"), + (0x1D246, "X"), + (0x1D2C0, "V"), + (0x1D2D4, "X"), + (0x1D2E0, "V"), + (0x1D2F4, "X"), + (0x1D300, "V"), + (0x1D357, "X"), + (0x1D360, "V"), + (0x1D379, "X"), + (0x1D400, "M", "a"), + (0x1D401, "M", "b"), + (0x1D402, "M", "c"), + (0x1D403, "M", "d"), + (0x1D404, "M", "e"), + (0x1D405, "M", "f"), + (0x1D406, "M", "g"), + (0x1D407, "M", "h"), + (0x1D408, "M", "i"), + (0x1D409, "M", "j"), + (0x1D40A, "M", "k"), + (0x1D40B, "M", "l"), + (0x1D40C, "M", "m"), + (0x1D40D, "M", "n"), + (0x1D40E, "M", "o"), + (0x1D40F, "M", "p"), + (0x1D410, "M", "q"), + (0x1D411, "M", "r"), + (0x1D412, "M", "s"), + (0x1D413, "M", "t"), + (0x1D414, "M", "u"), + (0x1D415, "M", "v"), + (0x1D416, "M", "w"), + (0x1D417, "M", "x"), + (0x1D418, "M", "y"), + (0x1D419, "M", "z"), + (0x1D41A, "M", "a"), + (0x1D41B, "M", "b"), + (0x1D41C, "M", "c"), + (0x1D41D, "M", "d"), + (0x1D41E, "M", "e"), + (0x1D41F, "M", "f"), + (0x1D420, "M", "g"), + (0x1D421, "M", "h"), + (0x1D422, "M", "i"), + (0x1D423, "M", "j"), + (0x1D424, "M", "k"), + ] + + +def _seg_61() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D425, "M", "l"), + (0x1D426, "M", "m"), + (0x1D427, "M", "n"), + (0x1D428, "M", "o"), + (0x1D429, "M", "p"), + (0x1D42A, "M", "q"), + (0x1D42B, "M", "r"), + (0x1D42C, "M", "s"), + (0x1D42D, "M", "t"), + (0x1D42E, "M", "u"), + (0x1D42F, "M", "v"), + (0x1D430, "M", "w"), + (0x1D431, "M", "x"), + (0x1D432, "M", "y"), + (0x1D433, "M", "z"), + (0x1D434, "M", "a"), + (0x1D435, "M", "b"), + (0x1D436, "M", "c"), + (0x1D437, "M", "d"), + (0x1D438, "M", "e"), + (0x1D439, "M", "f"), + (0x1D43A, "M", "g"), + (0x1D43B, "M", "h"), + (0x1D43C, "M", "i"), + (0x1D43D, "M", "j"), + (0x1D43E, "M", "k"), + (0x1D43F, "M", "l"), + (0x1D440, "M", "m"), + (0x1D441, "M", "n"), + (0x1D442, "M", "o"), + (0x1D443, "M", "p"), + (0x1D444, "M", "q"), + (0x1D445, "M", "r"), + (0x1D446, "M", "s"), + (0x1D447, "M", "t"), + (0x1D448, "M", "u"), + (0x1D449, "M", "v"), + (0x1D44A, "M", "w"), + (0x1D44B, "M", "x"), + (0x1D44C, "M", "y"), + (0x1D44D, "M", "z"), + (0x1D44E, "M", "a"), + (0x1D44F, "M", "b"), + (0x1D450, "M", "c"), + (0x1D451, "M", "d"), + (0x1D452, "M", "e"), + (0x1D453, "M", "f"), + (0x1D454, "M", "g"), + (0x1D455, "X"), + (0x1D456, "M", "i"), + (0x1D457, "M", "j"), + (0x1D458, "M", "k"), + (0x1D459, "M", "l"), + (0x1D45A, "M", "m"), + (0x1D45B, "M", "n"), + (0x1D45C, "M", "o"), + (0x1D45D, "M", "p"), + (0x1D45E, "M", "q"), + (0x1D45F, "M", "r"), + (0x1D460, "M", "s"), + (0x1D461, "M", "t"), + (0x1D462, "M", "u"), + (0x1D463, "M", "v"), + (0x1D464, "M", "w"), + (0x1D465, "M", "x"), + (0x1D466, "M", "y"), + (0x1D467, "M", "z"), + (0x1D468, "M", "a"), + (0x1D469, "M", "b"), + (0x1D46A, "M", "c"), + (0x1D46B, "M", "d"), + (0x1D46C, "M", "e"), + (0x1D46D, "M", "f"), + (0x1D46E, "M", "g"), + (0x1D46F, "M", "h"), + (0x1D470, "M", "i"), + (0x1D471, "M", "j"), + (0x1D472, "M", "k"), + (0x1D473, "M", "l"), + (0x1D474, "M", "m"), + (0x1D475, "M", "n"), + (0x1D476, "M", "o"), + (0x1D477, "M", "p"), + (0x1D478, "M", "q"), + (0x1D479, "M", "r"), + (0x1D47A, "M", "s"), + (0x1D47B, "M", "t"), + (0x1D47C, "M", "u"), + (0x1D47D, "M", "v"), + (0x1D47E, "M", "w"), + (0x1D47F, "M", "x"), + (0x1D480, "M", "y"), + (0x1D481, "M", "z"), + (0x1D482, "M", "a"), + (0x1D483, "M", "b"), + (0x1D484, "M", "c"), + (0x1D485, "M", "d"), + (0x1D486, "M", "e"), + (0x1D487, "M", "f"), + (0x1D488, "M", "g"), + ] + + +def _seg_62() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D489, "M", "h"), + (0x1D48A, "M", "i"), + (0x1D48B, "M", "j"), + (0x1D48C, "M", "k"), + (0x1D48D, "M", "l"), + (0x1D48E, "M", "m"), + (0x1D48F, "M", "n"), + (0x1D490, "M", "o"), + (0x1D491, "M", "p"), + (0x1D492, "M", "q"), + (0x1D493, "M", "r"), + (0x1D494, "M", "s"), + (0x1D495, "M", "t"), + (0x1D496, "M", "u"), + (0x1D497, "M", "v"), + (0x1D498, "M", "w"), + (0x1D499, "M", "x"), + (0x1D49A, "M", "y"), + (0x1D49B, "M", "z"), + (0x1D49C, "M", "a"), + (0x1D49D, "X"), + (0x1D49E, "M", "c"), + (0x1D49F, "M", "d"), + (0x1D4A0, "X"), + (0x1D4A2, "M", "g"), + (0x1D4A3, "X"), + (0x1D4A5, "M", "j"), + (0x1D4A6, "M", "k"), + (0x1D4A7, "X"), + (0x1D4A9, "M", "n"), + (0x1D4AA, "M", "o"), + (0x1D4AB, "M", "p"), + (0x1D4AC, "M", "q"), + (0x1D4AD, "X"), + (0x1D4AE, "M", "s"), + (0x1D4AF, "M", "t"), + (0x1D4B0, "M", "u"), + (0x1D4B1, "M", "v"), + (0x1D4B2, "M", "w"), + (0x1D4B3, "M", "x"), + (0x1D4B4, "M", "y"), + (0x1D4B5, "M", "z"), + (0x1D4B6, "M", "a"), + (0x1D4B7, "M", "b"), + (0x1D4B8, "M", "c"), + (0x1D4B9, "M", "d"), + (0x1D4BA, "X"), + (0x1D4BB, "M", "f"), + (0x1D4BC, "X"), + (0x1D4BD, "M", "h"), + (0x1D4BE, "M", "i"), + (0x1D4BF, "M", "j"), + (0x1D4C0, "M", "k"), + (0x1D4C1, "M", "l"), + (0x1D4C2, "M", "m"), + (0x1D4C3, "M", "n"), + (0x1D4C4, "X"), + (0x1D4C5, "M", "p"), + (0x1D4C6, "M", "q"), + (0x1D4C7, "M", "r"), + (0x1D4C8, "M", "s"), + (0x1D4C9, "M", "t"), + (0x1D4CA, "M", "u"), + (0x1D4CB, "M", "v"), + (0x1D4CC, "M", "w"), + (0x1D4CD, "M", "x"), + (0x1D4CE, "M", "y"), + (0x1D4CF, "M", "z"), + (0x1D4D0, "M", "a"), + (0x1D4D1, "M", "b"), + (0x1D4D2, "M", "c"), + (0x1D4D3, "M", "d"), + (0x1D4D4, "M", "e"), + (0x1D4D5, "M", "f"), + (0x1D4D6, "M", "g"), + (0x1D4D7, "M", "h"), + (0x1D4D8, "M", "i"), + (0x1D4D9, "M", "j"), + (0x1D4DA, "M", "k"), + (0x1D4DB, "M", "l"), + (0x1D4DC, "M", "m"), + (0x1D4DD, "M", "n"), + (0x1D4DE, "M", "o"), + (0x1D4DF, "M", "p"), + (0x1D4E0, "M", "q"), + (0x1D4E1, "M", "r"), + (0x1D4E2, "M", "s"), + (0x1D4E3, "M", "t"), + (0x1D4E4, "M", "u"), + (0x1D4E5, "M", "v"), + (0x1D4E6, "M", "w"), + (0x1D4E7, "M", "x"), + (0x1D4E8, "M", "y"), + (0x1D4E9, "M", "z"), + (0x1D4EA, "M", "a"), + (0x1D4EB, "M", "b"), + (0x1D4EC, "M", "c"), + (0x1D4ED, "M", "d"), + (0x1D4EE, "M", "e"), + (0x1D4EF, "M", "f"), + ] + + +def _seg_63() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D4F0, "M", "g"), + (0x1D4F1, "M", "h"), + (0x1D4F2, "M", "i"), + (0x1D4F3, "M", "j"), + (0x1D4F4, "M", "k"), + (0x1D4F5, "M", "l"), + (0x1D4F6, "M", "m"), + (0x1D4F7, "M", "n"), + (0x1D4F8, "M", "o"), + (0x1D4F9, "M", "p"), + (0x1D4FA, "M", "q"), + (0x1D4FB, "M", "r"), + (0x1D4FC, "M", "s"), + (0x1D4FD, "M", "t"), + (0x1D4FE, "M", "u"), + (0x1D4FF, "M", "v"), + (0x1D500, "M", "w"), + (0x1D501, "M", "x"), + (0x1D502, "M", "y"), + (0x1D503, "M", "z"), + (0x1D504, "M", "a"), + (0x1D505, "M", "b"), + (0x1D506, "X"), + (0x1D507, "M", "d"), + (0x1D508, "M", "e"), + (0x1D509, "M", "f"), + (0x1D50A, "M", "g"), + (0x1D50B, "X"), + (0x1D50D, "M", "j"), + (0x1D50E, "M", "k"), + (0x1D50F, "M", "l"), + (0x1D510, "M", "m"), + (0x1D511, "M", "n"), + (0x1D512, "M", "o"), + (0x1D513, "M", "p"), + (0x1D514, "M", "q"), + (0x1D515, "X"), + (0x1D516, "M", "s"), + (0x1D517, "M", "t"), + (0x1D518, "M", "u"), + (0x1D519, "M", "v"), + (0x1D51A, "M", "w"), + (0x1D51B, "M", "x"), + (0x1D51C, "M", "y"), + (0x1D51D, "X"), + (0x1D51E, "M", "a"), + (0x1D51F, "M", "b"), + (0x1D520, "M", "c"), + (0x1D521, "M", "d"), + (0x1D522, "M", "e"), + (0x1D523, "M", "f"), + (0x1D524, "M", "g"), + (0x1D525, "M", "h"), + (0x1D526, "M", "i"), + (0x1D527, "M", "j"), + (0x1D528, "M", "k"), + (0x1D529, "M", "l"), + (0x1D52A, "M", "m"), + (0x1D52B, "M", "n"), + (0x1D52C, "M", "o"), + (0x1D52D, "M", "p"), + (0x1D52E, "M", "q"), + (0x1D52F, "M", "r"), + (0x1D530, "M", "s"), + (0x1D531, "M", "t"), + (0x1D532, "M", "u"), + (0x1D533, "M", "v"), + (0x1D534, "M", "w"), + (0x1D535, "M", "x"), + (0x1D536, "M", "y"), + (0x1D537, "M", "z"), + (0x1D538, "M", "a"), + (0x1D539, "M", "b"), + (0x1D53A, "X"), + (0x1D53B, "M", "d"), + (0x1D53C, "M", "e"), + (0x1D53D, "M", "f"), + (0x1D53E, "M", "g"), + (0x1D53F, "X"), + (0x1D540, "M", "i"), + (0x1D541, "M", "j"), + (0x1D542, "M", "k"), + (0x1D543, "M", "l"), + (0x1D544, "M", "m"), + (0x1D545, "X"), + (0x1D546, "M", "o"), + (0x1D547, "X"), + (0x1D54A, "M", "s"), + (0x1D54B, "M", "t"), + (0x1D54C, "M", "u"), + (0x1D54D, "M", "v"), + (0x1D54E, "M", "w"), + (0x1D54F, "M", "x"), + (0x1D550, "M", "y"), + (0x1D551, "X"), + (0x1D552, "M", "a"), + (0x1D553, "M", "b"), + (0x1D554, "M", "c"), + (0x1D555, "M", "d"), + (0x1D556, "M", "e"), + ] + + +def _seg_64() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D557, "M", "f"), + (0x1D558, "M", "g"), + (0x1D559, "M", "h"), + (0x1D55A, "M", "i"), + (0x1D55B, "M", "j"), + (0x1D55C, "M", "k"), + (0x1D55D, "M", "l"), + (0x1D55E, "M", "m"), + (0x1D55F, "M", "n"), + (0x1D560, "M", "o"), + (0x1D561, "M", "p"), + (0x1D562, "M", "q"), + (0x1D563, "M", "r"), + (0x1D564, "M", "s"), + (0x1D565, "M", "t"), + (0x1D566, "M", "u"), + (0x1D567, "M", "v"), + (0x1D568, "M", "w"), + (0x1D569, "M", "x"), + (0x1D56A, "M", "y"), + (0x1D56B, "M", "z"), + (0x1D56C, "M", "a"), + (0x1D56D, "M", "b"), + (0x1D56E, "M", "c"), + (0x1D56F, "M", "d"), + (0x1D570, "M", "e"), + (0x1D571, "M", "f"), + (0x1D572, "M", "g"), + (0x1D573, "M", "h"), + (0x1D574, "M", "i"), + (0x1D575, "M", "j"), + (0x1D576, "M", "k"), + (0x1D577, "M", "l"), + (0x1D578, "M", "m"), + (0x1D579, "M", "n"), + (0x1D57A, "M", "o"), + (0x1D57B, "M", "p"), + (0x1D57C, "M", "q"), + (0x1D57D, "M", "r"), + (0x1D57E, "M", "s"), + (0x1D57F, "M", "t"), + (0x1D580, "M", "u"), + (0x1D581, "M", "v"), + (0x1D582, "M", "w"), + (0x1D583, "M", "x"), + (0x1D584, "M", "y"), + (0x1D585, "M", "z"), + (0x1D586, "M", "a"), + (0x1D587, "M", "b"), + (0x1D588, "M", "c"), + (0x1D589, "M", "d"), + (0x1D58A, "M", "e"), + (0x1D58B, "M", "f"), + (0x1D58C, "M", "g"), + (0x1D58D, "M", "h"), + (0x1D58E, "M", "i"), + (0x1D58F, "M", "j"), + (0x1D590, "M", "k"), + (0x1D591, "M", "l"), + (0x1D592, "M", "m"), + (0x1D593, "M", "n"), + (0x1D594, "M", "o"), + (0x1D595, "M", "p"), + (0x1D596, "M", "q"), + (0x1D597, "M", "r"), + (0x1D598, "M", "s"), + (0x1D599, "M", "t"), + (0x1D59A, "M", "u"), + (0x1D59B, "M", "v"), + (0x1D59C, "M", "w"), + (0x1D59D, "M", "x"), + (0x1D59E, "M", "y"), + (0x1D59F, "M", "z"), + (0x1D5A0, "M", "a"), + (0x1D5A1, "M", "b"), + (0x1D5A2, "M", "c"), + (0x1D5A3, "M", "d"), + (0x1D5A4, "M", "e"), + (0x1D5A5, "M", "f"), + (0x1D5A6, "M", "g"), + (0x1D5A7, "M", "h"), + (0x1D5A8, "M", "i"), + (0x1D5A9, "M", "j"), + (0x1D5AA, "M", "k"), + (0x1D5AB, "M", "l"), + (0x1D5AC, "M", "m"), + (0x1D5AD, "M", "n"), + (0x1D5AE, "M", "o"), + (0x1D5AF, "M", "p"), + (0x1D5B0, "M", "q"), + (0x1D5B1, "M", "r"), + (0x1D5B2, "M", "s"), + (0x1D5B3, "M", "t"), + (0x1D5B4, "M", "u"), + (0x1D5B5, "M", "v"), + (0x1D5B6, "M", "w"), + (0x1D5B7, "M", "x"), + (0x1D5B8, "M", "y"), + (0x1D5B9, "M", "z"), + (0x1D5BA, "M", "a"), + ] + + +def _seg_65() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D5BB, "M", "b"), + (0x1D5BC, "M", "c"), + (0x1D5BD, "M", "d"), + (0x1D5BE, "M", "e"), + (0x1D5BF, "M", "f"), + (0x1D5C0, "M", "g"), + (0x1D5C1, "M", "h"), + (0x1D5C2, "M", "i"), + (0x1D5C3, "M", "j"), + (0x1D5C4, "M", "k"), + (0x1D5C5, "M", "l"), + (0x1D5C6, "M", "m"), + (0x1D5C7, "M", "n"), + (0x1D5C8, "M", "o"), + (0x1D5C9, "M", "p"), + (0x1D5CA, "M", "q"), + (0x1D5CB, "M", "r"), + (0x1D5CC, "M", "s"), + (0x1D5CD, "M", "t"), + (0x1D5CE, "M", "u"), + (0x1D5CF, "M", "v"), + (0x1D5D0, "M", "w"), + (0x1D5D1, "M", "x"), + (0x1D5D2, "M", "y"), + (0x1D5D3, "M", "z"), + (0x1D5D4, "M", "a"), + (0x1D5D5, "M", "b"), + (0x1D5D6, "M", "c"), + (0x1D5D7, "M", "d"), + (0x1D5D8, "M", "e"), + (0x1D5D9, "M", "f"), + (0x1D5DA, "M", "g"), + (0x1D5DB, "M", "h"), + (0x1D5DC, "M", "i"), + (0x1D5DD, "M", "j"), + (0x1D5DE, "M", "k"), + (0x1D5DF, "M", "l"), + (0x1D5E0, "M", "m"), + (0x1D5E1, "M", "n"), + (0x1D5E2, "M", "o"), + (0x1D5E3, "M", "p"), + (0x1D5E4, "M", "q"), + (0x1D5E5, "M", "r"), + (0x1D5E6, "M", "s"), + (0x1D5E7, "M", "t"), + (0x1D5E8, "M", "u"), + (0x1D5E9, "M", "v"), + (0x1D5EA, "M", "w"), + (0x1D5EB, "M", "x"), + (0x1D5EC, "M", "y"), + (0x1D5ED, "M", "z"), + (0x1D5EE, "M", "a"), + (0x1D5EF, "M", "b"), + (0x1D5F0, "M", "c"), + (0x1D5F1, "M", "d"), + (0x1D5F2, "M", "e"), + (0x1D5F3, "M", "f"), + (0x1D5F4, "M", "g"), + (0x1D5F5, "M", "h"), + (0x1D5F6, "M", "i"), + (0x1D5F7, "M", "j"), + (0x1D5F8, "M", "k"), + (0x1D5F9, "M", "l"), + (0x1D5FA, "M", "m"), + (0x1D5FB, "M", "n"), + (0x1D5FC, "M", "o"), + (0x1D5FD, "M", "p"), + (0x1D5FE, "M", "q"), + (0x1D5FF, "M", "r"), + (0x1D600, "M", "s"), + (0x1D601, "M", "t"), + (0x1D602, "M", "u"), + (0x1D603, "M", "v"), + (0x1D604, "M", "w"), + (0x1D605, "M", "x"), + (0x1D606, "M", "y"), + (0x1D607, "M", "z"), + (0x1D608, "M", "a"), + (0x1D609, "M", "b"), + (0x1D60A, "M", "c"), + (0x1D60B, "M", "d"), + (0x1D60C, "M", "e"), + (0x1D60D, "M", "f"), + (0x1D60E, "M", "g"), + (0x1D60F, "M", "h"), + (0x1D610, "M", "i"), + (0x1D611, "M", "j"), + (0x1D612, "M", "k"), + (0x1D613, "M", "l"), + (0x1D614, "M", "m"), + (0x1D615, "M", "n"), + (0x1D616, "M", "o"), + (0x1D617, "M", "p"), + (0x1D618, "M", "q"), + (0x1D619, "M", "r"), + (0x1D61A, "M", "s"), + (0x1D61B, "M", "t"), + (0x1D61C, "M", "u"), + (0x1D61D, "M", "v"), + (0x1D61E, "M", "w"), + ] + + +def _seg_66() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D61F, "M", "x"), + (0x1D620, "M", "y"), + (0x1D621, "M", "z"), + (0x1D622, "M", "a"), + (0x1D623, "M", "b"), + (0x1D624, "M", "c"), + (0x1D625, "M", "d"), + (0x1D626, "M", "e"), + (0x1D627, "M", "f"), + (0x1D628, "M", "g"), + (0x1D629, "M", "h"), + (0x1D62A, "M", "i"), + (0x1D62B, "M", "j"), + (0x1D62C, "M", "k"), + (0x1D62D, "M", "l"), + (0x1D62E, "M", "m"), + (0x1D62F, "M", "n"), + (0x1D630, "M", "o"), + (0x1D631, "M", "p"), + (0x1D632, "M", "q"), + (0x1D633, "M", "r"), + (0x1D634, "M", "s"), + (0x1D635, "M", "t"), + (0x1D636, "M", "u"), + (0x1D637, "M", "v"), + (0x1D638, "M", "w"), + (0x1D639, "M", "x"), + (0x1D63A, "M", "y"), + (0x1D63B, "M", "z"), + (0x1D63C, "M", "a"), + (0x1D63D, "M", "b"), + (0x1D63E, "M", "c"), + (0x1D63F, "M", "d"), + (0x1D640, "M", "e"), + (0x1D641, "M", "f"), + (0x1D642, "M", "g"), + (0x1D643, "M", "h"), + (0x1D644, "M", "i"), + (0x1D645, "M", "j"), + (0x1D646, "M", "k"), + (0x1D647, "M", "l"), + (0x1D648, "M", "m"), + (0x1D649, "M", "n"), + (0x1D64A, "M", "o"), + (0x1D64B, "M", "p"), + (0x1D64C, "M", "q"), + (0x1D64D, "M", "r"), + (0x1D64E, "M", "s"), + (0x1D64F, "M", "t"), + (0x1D650, "M", "u"), + (0x1D651, "M", "v"), + (0x1D652, "M", "w"), + (0x1D653, "M", "x"), + (0x1D654, "M", "y"), + (0x1D655, "M", "z"), + (0x1D656, "M", "a"), + (0x1D657, "M", "b"), + (0x1D658, "M", "c"), + (0x1D659, "M", "d"), + (0x1D65A, "M", "e"), + (0x1D65B, "M", "f"), + (0x1D65C, "M", "g"), + (0x1D65D, "M", "h"), + (0x1D65E, "M", "i"), + (0x1D65F, "M", "j"), + (0x1D660, "M", "k"), + (0x1D661, "M", "l"), + (0x1D662, "M", "m"), + (0x1D663, "M", "n"), + (0x1D664, "M", "o"), + (0x1D665, "M", "p"), + (0x1D666, "M", "q"), + (0x1D667, "M", "r"), + (0x1D668, "M", "s"), + (0x1D669, "M", "t"), + (0x1D66A, "M", "u"), + (0x1D66B, "M", "v"), + (0x1D66C, "M", "w"), + (0x1D66D, "M", "x"), + (0x1D66E, "M", "y"), + (0x1D66F, "M", "z"), + (0x1D670, "M", "a"), + (0x1D671, "M", "b"), + (0x1D672, "M", "c"), + (0x1D673, "M", "d"), + (0x1D674, "M", "e"), + (0x1D675, "M", "f"), + (0x1D676, "M", "g"), + (0x1D677, "M", "h"), + (0x1D678, "M", "i"), + (0x1D679, "M", "j"), + (0x1D67A, "M", "k"), + (0x1D67B, "M", "l"), + (0x1D67C, "M", "m"), + (0x1D67D, "M", "n"), + (0x1D67E, "M", "o"), + (0x1D67F, "M", "p"), + (0x1D680, "M", "q"), + (0x1D681, "M", "r"), + (0x1D682, "M", "s"), + ] + + +def _seg_67() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D683, "M", "t"), + (0x1D684, "M", "u"), + (0x1D685, "M", "v"), + (0x1D686, "M", "w"), + (0x1D687, "M", "x"), + (0x1D688, "M", "y"), + (0x1D689, "M", "z"), + (0x1D68A, "M", "a"), + (0x1D68B, "M", "b"), + (0x1D68C, "M", "c"), + (0x1D68D, "M", "d"), + (0x1D68E, "M", "e"), + (0x1D68F, "M", "f"), + (0x1D690, "M", "g"), + (0x1D691, "M", "h"), + (0x1D692, "M", "i"), + (0x1D693, "M", "j"), + (0x1D694, "M", "k"), + (0x1D695, "M", "l"), + (0x1D696, "M", "m"), + (0x1D697, "M", "n"), + (0x1D698, "M", "o"), + (0x1D699, "M", "p"), + (0x1D69A, "M", "q"), + (0x1D69B, "M", "r"), + (0x1D69C, "M", "s"), + (0x1D69D, "M", "t"), + (0x1D69E, "M", "u"), + (0x1D69F, "M", "v"), + (0x1D6A0, "M", "w"), + (0x1D6A1, "M", "x"), + (0x1D6A2, "M", "y"), + (0x1D6A3, "M", "z"), + (0x1D6A4, "M", "ı"), + (0x1D6A5, "M", "ȷ"), + (0x1D6A6, "X"), + (0x1D6A8, "M", "α"), + (0x1D6A9, "M", "β"), + (0x1D6AA, "M", "γ"), + (0x1D6AB, "M", "δ"), + (0x1D6AC, "M", "ε"), + (0x1D6AD, "M", "ζ"), + (0x1D6AE, "M", "η"), + (0x1D6AF, "M", "θ"), + (0x1D6B0, "M", "ι"), + (0x1D6B1, "M", "κ"), + (0x1D6B2, "M", "λ"), + (0x1D6B3, "M", "μ"), + (0x1D6B4, "M", "ν"), + (0x1D6B5, "M", "ξ"), + (0x1D6B6, "M", "ο"), + (0x1D6B7, "M", "π"), + (0x1D6B8, "M", "ρ"), + (0x1D6B9, "M", "θ"), + (0x1D6BA, "M", "σ"), + (0x1D6BB, "M", "τ"), + (0x1D6BC, "M", "υ"), + (0x1D6BD, "M", "φ"), + (0x1D6BE, "M", "χ"), + (0x1D6BF, "M", "ψ"), + (0x1D6C0, "M", "ω"), + (0x1D6C1, "M", "∇"), + (0x1D6C2, "M", "α"), + (0x1D6C3, "M", "β"), + (0x1D6C4, "M", "γ"), + (0x1D6C5, "M", "δ"), + (0x1D6C6, "M", "ε"), + (0x1D6C7, "M", "ζ"), + (0x1D6C8, "M", "η"), + (0x1D6C9, "M", "θ"), + (0x1D6CA, "M", "ι"), + (0x1D6CB, "M", "κ"), + (0x1D6CC, "M", "λ"), + (0x1D6CD, "M", "μ"), + (0x1D6CE, "M", "ν"), + (0x1D6CF, "M", "ξ"), + (0x1D6D0, "M", "ο"), + (0x1D6D1, "M", "π"), + (0x1D6D2, "M", "ρ"), + (0x1D6D3, "M", "σ"), + (0x1D6D5, "M", "τ"), + (0x1D6D6, "M", "υ"), + (0x1D6D7, "M", "φ"), + (0x1D6D8, "M", "χ"), + (0x1D6D9, "M", "ψ"), + (0x1D6DA, "M", "ω"), + (0x1D6DB, "M", "∂"), + (0x1D6DC, "M", "ε"), + (0x1D6DD, "M", "θ"), + (0x1D6DE, "M", "κ"), + (0x1D6DF, "M", "φ"), + (0x1D6E0, "M", "ρ"), + (0x1D6E1, "M", "π"), + (0x1D6E2, "M", "α"), + (0x1D6E3, "M", "β"), + (0x1D6E4, "M", "γ"), + (0x1D6E5, "M", "δ"), + (0x1D6E6, "M", "ε"), + (0x1D6E7, "M", "ζ"), + (0x1D6E8, "M", "η"), + ] + + +def _seg_68() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D6E9, "M", "θ"), + (0x1D6EA, "M", "ι"), + (0x1D6EB, "M", "κ"), + (0x1D6EC, "M", "λ"), + (0x1D6ED, "M", "μ"), + (0x1D6EE, "M", "ν"), + (0x1D6EF, "M", "ξ"), + (0x1D6F0, "M", "ο"), + (0x1D6F1, "M", "π"), + (0x1D6F2, "M", "ρ"), + (0x1D6F3, "M", "θ"), + (0x1D6F4, "M", "σ"), + (0x1D6F5, "M", "τ"), + (0x1D6F6, "M", "υ"), + (0x1D6F7, "M", "φ"), + (0x1D6F8, "M", "χ"), + (0x1D6F9, "M", "ψ"), + (0x1D6FA, "M", "ω"), + (0x1D6FB, "M", "∇"), + (0x1D6FC, "M", "α"), + (0x1D6FD, "M", "β"), + (0x1D6FE, "M", "γ"), + (0x1D6FF, "M", "δ"), + (0x1D700, "M", "ε"), + (0x1D701, "M", "ζ"), + (0x1D702, "M", "η"), + (0x1D703, "M", "θ"), + (0x1D704, "M", "ι"), + (0x1D705, "M", "κ"), + (0x1D706, "M", "λ"), + (0x1D707, "M", "μ"), + (0x1D708, "M", "ν"), + (0x1D709, "M", "ξ"), + (0x1D70A, "M", "ο"), + (0x1D70B, "M", "π"), + (0x1D70C, "M", "ρ"), + (0x1D70D, "M", "σ"), + (0x1D70F, "M", "τ"), + (0x1D710, "M", "υ"), + (0x1D711, "M", "φ"), + (0x1D712, "M", "χ"), + (0x1D713, "M", "ψ"), + (0x1D714, "M", "ω"), + (0x1D715, "M", "∂"), + (0x1D716, "M", "ε"), + (0x1D717, "M", "θ"), + (0x1D718, "M", "κ"), + (0x1D719, "M", "φ"), + (0x1D71A, "M", "ρ"), + (0x1D71B, "M", "π"), + (0x1D71C, "M", "α"), + (0x1D71D, "M", "β"), + (0x1D71E, "M", "γ"), + (0x1D71F, "M", "δ"), + (0x1D720, "M", "ε"), + (0x1D721, "M", "ζ"), + (0x1D722, "M", "η"), + (0x1D723, "M", "θ"), + (0x1D724, "M", "ι"), + (0x1D725, "M", "κ"), + (0x1D726, "M", "λ"), + (0x1D727, "M", "μ"), + (0x1D728, "M", "ν"), + (0x1D729, "M", "ξ"), + (0x1D72A, "M", "ο"), + (0x1D72B, "M", "π"), + (0x1D72C, "M", "ρ"), + (0x1D72D, "M", "θ"), + (0x1D72E, "M", "σ"), + (0x1D72F, "M", "τ"), + (0x1D730, "M", "υ"), + (0x1D731, "M", "φ"), + (0x1D732, "M", "χ"), + (0x1D733, "M", "ψ"), + (0x1D734, "M", "ω"), + (0x1D735, "M", "∇"), + (0x1D736, "M", "α"), + (0x1D737, "M", "β"), + (0x1D738, "M", "γ"), + (0x1D739, "M", "δ"), + (0x1D73A, "M", "ε"), + (0x1D73B, "M", "ζ"), + (0x1D73C, "M", "η"), + (0x1D73D, "M", "θ"), + (0x1D73E, "M", "ι"), + (0x1D73F, "M", "κ"), + (0x1D740, "M", "λ"), + (0x1D741, "M", "μ"), + (0x1D742, "M", "ν"), + (0x1D743, "M", "ξ"), + (0x1D744, "M", "ο"), + (0x1D745, "M", "π"), + (0x1D746, "M", "ρ"), + (0x1D747, "M", "σ"), + (0x1D749, "M", "τ"), + (0x1D74A, "M", "υ"), + (0x1D74B, "M", "φ"), + (0x1D74C, "M", "χ"), + (0x1D74D, "M", "ψ"), + (0x1D74E, "M", "ω"), + ] + + +def _seg_69() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D74F, "M", "∂"), + (0x1D750, "M", "ε"), + (0x1D751, "M", "θ"), + (0x1D752, "M", "κ"), + (0x1D753, "M", "φ"), + (0x1D754, "M", "ρ"), + (0x1D755, "M", "π"), + (0x1D756, "M", "α"), + (0x1D757, "M", "β"), + (0x1D758, "M", "γ"), + (0x1D759, "M", "δ"), + (0x1D75A, "M", "ε"), + (0x1D75B, "M", "ζ"), + (0x1D75C, "M", "η"), + (0x1D75D, "M", "θ"), + (0x1D75E, "M", "ι"), + (0x1D75F, "M", "κ"), + (0x1D760, "M", "λ"), + (0x1D761, "M", "μ"), + (0x1D762, "M", "ν"), + (0x1D763, "M", "ξ"), + (0x1D764, "M", "ο"), + (0x1D765, "M", "π"), + (0x1D766, "M", "ρ"), + (0x1D767, "M", "θ"), + (0x1D768, "M", "σ"), + (0x1D769, "M", "τ"), + (0x1D76A, "M", "υ"), + (0x1D76B, "M", "φ"), + (0x1D76C, "M", "χ"), + (0x1D76D, "M", "ψ"), + (0x1D76E, "M", "ω"), + (0x1D76F, "M", "∇"), + (0x1D770, "M", "α"), + (0x1D771, "M", "β"), + (0x1D772, "M", "γ"), + (0x1D773, "M", "δ"), + (0x1D774, "M", "ε"), + (0x1D775, "M", "ζ"), + (0x1D776, "M", "η"), + (0x1D777, "M", "θ"), + (0x1D778, "M", "ι"), + (0x1D779, "M", "κ"), + (0x1D77A, "M", "λ"), + (0x1D77B, "M", "μ"), + (0x1D77C, "M", "ν"), + (0x1D77D, "M", "ξ"), + (0x1D77E, "M", "ο"), + (0x1D77F, "M", "π"), + (0x1D780, "M", "ρ"), + (0x1D781, "M", "σ"), + (0x1D783, "M", "τ"), + (0x1D784, "M", "υ"), + (0x1D785, "M", "φ"), + (0x1D786, "M", "χ"), + (0x1D787, "M", "ψ"), + (0x1D788, "M", "ω"), + (0x1D789, "M", "∂"), + (0x1D78A, "M", "ε"), + (0x1D78B, "M", "θ"), + (0x1D78C, "M", "κ"), + (0x1D78D, "M", "φ"), + (0x1D78E, "M", "ρ"), + (0x1D78F, "M", "π"), + (0x1D790, "M", "α"), + (0x1D791, "M", "β"), + (0x1D792, "M", "γ"), + (0x1D793, "M", "δ"), + (0x1D794, "M", "ε"), + (0x1D795, "M", "ζ"), + (0x1D796, "M", "η"), + (0x1D797, "M", "θ"), + (0x1D798, "M", "ι"), + (0x1D799, "M", "κ"), + (0x1D79A, "M", "λ"), + (0x1D79B, "M", "μ"), + (0x1D79C, "M", "ν"), + (0x1D79D, "M", "ξ"), + (0x1D79E, "M", "ο"), + (0x1D79F, "M", "π"), + (0x1D7A0, "M", "ρ"), + (0x1D7A1, "M", "θ"), + (0x1D7A2, "M", "σ"), + (0x1D7A3, "M", "τ"), + (0x1D7A4, "M", "υ"), + (0x1D7A5, "M", "φ"), + (0x1D7A6, "M", "χ"), + (0x1D7A7, "M", "ψ"), + (0x1D7A8, "M", "ω"), + (0x1D7A9, "M", "∇"), + (0x1D7AA, "M", "α"), + (0x1D7AB, "M", "β"), + (0x1D7AC, "M", "γ"), + (0x1D7AD, "M", "δ"), + (0x1D7AE, "M", "ε"), + (0x1D7AF, "M", "ζ"), + (0x1D7B0, "M", "η"), + (0x1D7B1, "M", "θ"), + (0x1D7B2, "M", "ι"), + (0x1D7B3, "M", "κ"), + ] + + +def _seg_70() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D7B4, "M", "λ"), + (0x1D7B5, "M", "μ"), + (0x1D7B6, "M", "ν"), + (0x1D7B7, "M", "ξ"), + (0x1D7B8, "M", "ο"), + (0x1D7B9, "M", "π"), + (0x1D7BA, "M", "ρ"), + (0x1D7BB, "M", "σ"), + (0x1D7BD, "M", "τ"), + (0x1D7BE, "M", "υ"), + (0x1D7BF, "M", "φ"), + (0x1D7C0, "M", "χ"), + (0x1D7C1, "M", "ψ"), + (0x1D7C2, "M", "ω"), + (0x1D7C3, "M", "∂"), + (0x1D7C4, "M", "ε"), + (0x1D7C5, "M", "θ"), + (0x1D7C6, "M", "κ"), + (0x1D7C7, "M", "φ"), + (0x1D7C8, "M", "ρ"), + (0x1D7C9, "M", "π"), + (0x1D7CA, "M", "ϝ"), + (0x1D7CC, "X"), + (0x1D7CE, "M", "0"), + (0x1D7CF, "M", "1"), + (0x1D7D0, "M", "2"), + (0x1D7D1, "M", "3"), + (0x1D7D2, "M", "4"), + (0x1D7D3, "M", "5"), + (0x1D7D4, "M", "6"), + (0x1D7D5, "M", "7"), + (0x1D7D6, "M", "8"), + (0x1D7D7, "M", "9"), + (0x1D7D8, "M", "0"), + (0x1D7D9, "M", "1"), + (0x1D7DA, "M", "2"), + (0x1D7DB, "M", "3"), + (0x1D7DC, "M", "4"), + (0x1D7DD, "M", "5"), + (0x1D7DE, "M", "6"), + (0x1D7DF, "M", "7"), + (0x1D7E0, "M", "8"), + (0x1D7E1, "M", "9"), + (0x1D7E2, "M", "0"), + (0x1D7E3, "M", "1"), + (0x1D7E4, "M", "2"), + (0x1D7E5, "M", "3"), + (0x1D7E6, "M", "4"), + (0x1D7E7, "M", "5"), + (0x1D7E8, "M", "6"), + (0x1D7E9, "M", "7"), + (0x1D7EA, "M", "8"), + (0x1D7EB, "M", "9"), + (0x1D7EC, "M", "0"), + (0x1D7ED, "M", "1"), + (0x1D7EE, "M", "2"), + (0x1D7EF, "M", "3"), + (0x1D7F0, "M", "4"), + (0x1D7F1, "M", "5"), + (0x1D7F2, "M", "6"), + (0x1D7F3, "M", "7"), + (0x1D7F4, "M", "8"), + (0x1D7F5, "M", "9"), + (0x1D7F6, "M", "0"), + (0x1D7F7, "M", "1"), + (0x1D7F8, "M", "2"), + (0x1D7F9, "M", "3"), + (0x1D7FA, "M", "4"), + (0x1D7FB, "M", "5"), + (0x1D7FC, "M", "6"), + (0x1D7FD, "M", "7"), + (0x1D7FE, "M", "8"), + (0x1D7FF, "M", "9"), + (0x1D800, "V"), + (0x1DA8C, "X"), + (0x1DA9B, "V"), + (0x1DAA0, "X"), + (0x1DAA1, "V"), + (0x1DAB0, "X"), + (0x1DF00, "V"), + (0x1DF1F, "X"), + (0x1DF25, "V"), + (0x1DF2B, "X"), + (0x1E000, "V"), + (0x1E007, "X"), + (0x1E008, "V"), + (0x1E019, "X"), + (0x1E01B, "V"), + (0x1E022, "X"), + (0x1E023, "V"), + (0x1E025, "X"), + (0x1E026, "V"), + (0x1E02B, "X"), + (0x1E030, "M", "а"), + (0x1E031, "M", "б"), + (0x1E032, "M", "в"), + (0x1E033, "M", "г"), + (0x1E034, "M", "д"), + (0x1E035, "M", "е"), + (0x1E036, "M", "ж"), + ] + + +def _seg_71() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1E037, "M", "з"), + (0x1E038, "M", "и"), + (0x1E039, "M", "к"), + (0x1E03A, "M", "л"), + (0x1E03B, "M", "м"), + (0x1E03C, "M", "о"), + (0x1E03D, "M", "п"), + (0x1E03E, "M", "р"), + (0x1E03F, "M", "с"), + (0x1E040, "M", "т"), + (0x1E041, "M", "у"), + (0x1E042, "M", "ф"), + (0x1E043, "M", "х"), + (0x1E044, "M", "ц"), + (0x1E045, "M", "ч"), + (0x1E046, "M", "ш"), + (0x1E047, "M", "ы"), + (0x1E048, "M", "э"), + (0x1E049, "M", "ю"), + (0x1E04A, "M", "ꚉ"), + (0x1E04B, "M", "ә"), + (0x1E04C, "M", "і"), + (0x1E04D, "M", "ј"), + (0x1E04E, "M", "ө"), + (0x1E04F, "M", "ү"), + (0x1E050, "M", "ӏ"), + (0x1E051, "M", "а"), + (0x1E052, "M", "б"), + (0x1E053, "M", "в"), + (0x1E054, "M", "г"), + (0x1E055, "M", "д"), + (0x1E056, "M", "е"), + (0x1E057, "M", "ж"), + (0x1E058, "M", "з"), + (0x1E059, "M", "и"), + (0x1E05A, "M", "к"), + (0x1E05B, "M", "л"), + (0x1E05C, "M", "о"), + (0x1E05D, "M", "п"), + (0x1E05E, "M", "с"), + (0x1E05F, "M", "у"), + (0x1E060, "M", "ф"), + (0x1E061, "M", "х"), + (0x1E062, "M", "ц"), + (0x1E063, "M", "ч"), + (0x1E064, "M", "ш"), + (0x1E065, "M", "ъ"), + (0x1E066, "M", "ы"), + (0x1E067, "M", "ґ"), + (0x1E068, "M", "і"), + (0x1E069, "M", "ѕ"), + (0x1E06A, "M", "џ"), + (0x1E06B, "M", "ҫ"), + (0x1E06C, "M", "ꙑ"), + (0x1E06D, "M", "ұ"), + (0x1E06E, "X"), + (0x1E08F, "V"), + (0x1E090, "X"), + (0x1E100, "V"), + (0x1E12D, "X"), + (0x1E130, "V"), + (0x1E13E, "X"), + (0x1E140, "V"), + (0x1E14A, "X"), + (0x1E14E, "V"), + (0x1E150, "X"), + (0x1E290, "V"), + (0x1E2AF, "X"), + (0x1E2C0, "V"), + (0x1E2FA, "X"), + (0x1E2FF, "V"), + (0x1E300, "X"), + (0x1E4D0, "V"), + (0x1E4FA, "X"), + (0x1E7E0, "V"), + (0x1E7E7, "X"), + (0x1E7E8, "V"), + (0x1E7EC, "X"), + (0x1E7ED, "V"), + (0x1E7EF, "X"), + (0x1E7F0, "V"), + (0x1E7FF, "X"), + (0x1E800, "V"), + (0x1E8C5, "X"), + (0x1E8C7, "V"), + (0x1E8D7, "X"), + (0x1E900, "M", "𞤢"), + (0x1E901, "M", "𞤣"), + (0x1E902, "M", "𞤤"), + (0x1E903, "M", "𞤥"), + (0x1E904, "M", "𞤦"), + (0x1E905, "M", "𞤧"), + (0x1E906, "M", "𞤨"), + (0x1E907, "M", "𞤩"), + (0x1E908, "M", "𞤪"), + (0x1E909, "M", "𞤫"), + (0x1E90A, "M", "𞤬"), + (0x1E90B, "M", "𞤭"), + (0x1E90C, "M", "𞤮"), + (0x1E90D, "M", "𞤯"), + ] + + +def _seg_72() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1E90E, "M", "𞤰"), + (0x1E90F, "M", "𞤱"), + (0x1E910, "M", "𞤲"), + (0x1E911, "M", "𞤳"), + (0x1E912, "M", "𞤴"), + (0x1E913, "M", "𞤵"), + (0x1E914, "M", "𞤶"), + (0x1E915, "M", "𞤷"), + (0x1E916, "M", "𞤸"), + (0x1E917, "M", "𞤹"), + (0x1E918, "M", "𞤺"), + (0x1E919, "M", "𞤻"), + (0x1E91A, "M", "𞤼"), + (0x1E91B, "M", "𞤽"), + (0x1E91C, "M", "𞤾"), + (0x1E91D, "M", "𞤿"), + (0x1E91E, "M", "𞥀"), + (0x1E91F, "M", "𞥁"), + (0x1E920, "M", "𞥂"), + (0x1E921, "M", "𞥃"), + (0x1E922, "V"), + (0x1E94C, "X"), + (0x1E950, "V"), + (0x1E95A, "X"), + (0x1E95E, "V"), + (0x1E960, "X"), + (0x1EC71, "V"), + (0x1ECB5, "X"), + (0x1ED01, "V"), + (0x1ED3E, "X"), + (0x1EE00, "M", "ا"), + (0x1EE01, "M", "ب"), + (0x1EE02, "M", "ج"), + (0x1EE03, "M", "د"), + (0x1EE04, "X"), + (0x1EE05, "M", "و"), + (0x1EE06, "M", "ز"), + (0x1EE07, "M", "ح"), + (0x1EE08, "M", "ط"), + (0x1EE09, "M", "ي"), + (0x1EE0A, "M", "ك"), + (0x1EE0B, "M", "ل"), + (0x1EE0C, "M", "م"), + (0x1EE0D, "M", "ن"), + (0x1EE0E, "M", "س"), + (0x1EE0F, "M", "ع"), + (0x1EE10, "M", "ف"), + (0x1EE11, "M", "ص"), + (0x1EE12, "M", "ق"), + (0x1EE13, "M", "ر"), + (0x1EE14, "M", "ش"), + (0x1EE15, "M", "ت"), + (0x1EE16, "M", "ث"), + (0x1EE17, "M", "خ"), + (0x1EE18, "M", "ذ"), + (0x1EE19, "M", "ض"), + (0x1EE1A, "M", "ظ"), + (0x1EE1B, "M", "غ"), + (0x1EE1C, "M", "ٮ"), + (0x1EE1D, "M", "ں"), + (0x1EE1E, "M", "ڡ"), + (0x1EE1F, "M", "ٯ"), + (0x1EE20, "X"), + (0x1EE21, "M", "ب"), + (0x1EE22, "M", "ج"), + (0x1EE23, "X"), + (0x1EE24, "M", "ه"), + (0x1EE25, "X"), + (0x1EE27, "M", "ح"), + (0x1EE28, "X"), + (0x1EE29, "M", "ي"), + (0x1EE2A, "M", "ك"), + (0x1EE2B, "M", "ل"), + (0x1EE2C, "M", "م"), + (0x1EE2D, "M", "ن"), + (0x1EE2E, "M", "س"), + (0x1EE2F, "M", "ع"), + (0x1EE30, "M", "ف"), + (0x1EE31, "M", "ص"), + (0x1EE32, "M", "ق"), + (0x1EE33, "X"), + (0x1EE34, "M", "ش"), + (0x1EE35, "M", "ت"), + (0x1EE36, "M", "ث"), + (0x1EE37, "M", "خ"), + (0x1EE38, "X"), + (0x1EE39, "M", "ض"), + (0x1EE3A, "X"), + (0x1EE3B, "M", "غ"), + (0x1EE3C, "X"), + (0x1EE42, "M", "ج"), + (0x1EE43, "X"), + (0x1EE47, "M", "ح"), + (0x1EE48, "X"), + (0x1EE49, "M", "ي"), + (0x1EE4A, "X"), + (0x1EE4B, "M", "ل"), + (0x1EE4C, "X"), + (0x1EE4D, "M", "ن"), + (0x1EE4E, "M", "س"), + ] + + +def _seg_73() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1EE4F, "M", "ع"), + (0x1EE50, "X"), + (0x1EE51, "M", "ص"), + (0x1EE52, "M", "ق"), + (0x1EE53, "X"), + (0x1EE54, "M", "ش"), + (0x1EE55, "X"), + (0x1EE57, "M", "خ"), + (0x1EE58, "X"), + (0x1EE59, "M", "ض"), + (0x1EE5A, "X"), + (0x1EE5B, "M", "غ"), + (0x1EE5C, "X"), + (0x1EE5D, "M", "ں"), + (0x1EE5E, "X"), + (0x1EE5F, "M", "ٯ"), + (0x1EE60, "X"), + (0x1EE61, "M", "ب"), + (0x1EE62, "M", "ج"), + (0x1EE63, "X"), + (0x1EE64, "M", "ه"), + (0x1EE65, "X"), + (0x1EE67, "M", "ح"), + (0x1EE68, "M", "ط"), + (0x1EE69, "M", "ي"), + (0x1EE6A, "M", "ك"), + (0x1EE6B, "X"), + (0x1EE6C, "M", "م"), + (0x1EE6D, "M", "ن"), + (0x1EE6E, "M", "س"), + (0x1EE6F, "M", "ع"), + (0x1EE70, "M", "ف"), + (0x1EE71, "M", "ص"), + (0x1EE72, "M", "ق"), + (0x1EE73, "X"), + (0x1EE74, "M", "ش"), + (0x1EE75, "M", "ت"), + (0x1EE76, "M", "ث"), + (0x1EE77, "M", "خ"), + (0x1EE78, "X"), + (0x1EE79, "M", "ض"), + (0x1EE7A, "M", "ظ"), + (0x1EE7B, "M", "غ"), + (0x1EE7C, "M", "ٮ"), + (0x1EE7D, "X"), + (0x1EE7E, "M", "ڡ"), + (0x1EE7F, "X"), + (0x1EE80, "M", "ا"), + (0x1EE81, "M", "ب"), + (0x1EE82, "M", "ج"), + (0x1EE83, "M", "د"), + (0x1EE84, "M", "ه"), + (0x1EE85, "M", "و"), + (0x1EE86, "M", "ز"), + (0x1EE87, "M", "ح"), + (0x1EE88, "M", "ط"), + (0x1EE89, "M", "ي"), + (0x1EE8A, "X"), + (0x1EE8B, "M", "ل"), + (0x1EE8C, "M", "م"), + (0x1EE8D, "M", "ن"), + (0x1EE8E, "M", "س"), + (0x1EE8F, "M", "ع"), + (0x1EE90, "M", "ف"), + (0x1EE91, "M", "ص"), + (0x1EE92, "M", "ق"), + (0x1EE93, "M", "ر"), + (0x1EE94, "M", "ش"), + (0x1EE95, "M", "ت"), + (0x1EE96, "M", "ث"), + (0x1EE97, "M", "خ"), + (0x1EE98, "M", "ذ"), + (0x1EE99, "M", "ض"), + (0x1EE9A, "M", "ظ"), + (0x1EE9B, "M", "غ"), + (0x1EE9C, "X"), + (0x1EEA1, "M", "ب"), + (0x1EEA2, "M", "ج"), + (0x1EEA3, "M", "د"), + (0x1EEA4, "X"), + (0x1EEA5, "M", "و"), + (0x1EEA6, "M", "ز"), + (0x1EEA7, "M", "ح"), + (0x1EEA8, "M", "ط"), + (0x1EEA9, "M", "ي"), + (0x1EEAA, "X"), + (0x1EEAB, "M", "ل"), + (0x1EEAC, "M", "م"), + (0x1EEAD, "M", "ن"), + (0x1EEAE, "M", "س"), + (0x1EEAF, "M", "ع"), + (0x1EEB0, "M", "ف"), + (0x1EEB1, "M", "ص"), + (0x1EEB2, "M", "ق"), + (0x1EEB3, "M", "ر"), + (0x1EEB4, "M", "ش"), + (0x1EEB5, "M", "ت"), + (0x1EEB6, "M", "ث"), + (0x1EEB7, "M", "خ"), + (0x1EEB8, "M", "ذ"), + ] + + +def _seg_74() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1EEB9, "M", "ض"), + (0x1EEBA, "M", "ظ"), + (0x1EEBB, "M", "غ"), + (0x1EEBC, "X"), + (0x1EEF0, "V"), + (0x1EEF2, "X"), + (0x1F000, "V"), + (0x1F02C, "X"), + (0x1F030, "V"), + (0x1F094, "X"), + (0x1F0A0, "V"), + (0x1F0AF, "X"), + (0x1F0B1, "V"), + (0x1F0C0, "X"), + (0x1F0C1, "V"), + (0x1F0D0, "X"), + (0x1F0D1, "V"), + (0x1F0F6, "X"), + (0x1F101, "3", "0,"), + (0x1F102, "3", "1,"), + (0x1F103, "3", "2,"), + (0x1F104, "3", "3,"), + (0x1F105, "3", "4,"), + (0x1F106, "3", "5,"), + (0x1F107, "3", "6,"), + (0x1F108, "3", "7,"), + (0x1F109, "3", "8,"), + (0x1F10A, "3", "9,"), + (0x1F10B, "V"), + (0x1F110, "3", "(a)"), + (0x1F111, "3", "(b)"), + (0x1F112, "3", "(c)"), + (0x1F113, "3", "(d)"), + (0x1F114, "3", "(e)"), + (0x1F115, "3", "(f)"), + (0x1F116, "3", "(g)"), + (0x1F117, "3", "(h)"), + (0x1F118, "3", "(i)"), + (0x1F119, "3", "(j)"), + (0x1F11A, "3", "(k)"), + (0x1F11B, "3", "(l)"), + (0x1F11C, "3", "(m)"), + (0x1F11D, "3", "(n)"), + (0x1F11E, "3", "(o)"), + (0x1F11F, "3", "(p)"), + (0x1F120, "3", "(q)"), + (0x1F121, "3", "(r)"), + (0x1F122, "3", "(s)"), + (0x1F123, "3", "(t)"), + (0x1F124, "3", "(u)"), + (0x1F125, "3", "(v)"), + (0x1F126, "3", "(w)"), + (0x1F127, "3", "(x)"), + (0x1F128, "3", "(y)"), + (0x1F129, "3", "(z)"), + (0x1F12A, "M", "〔s〕"), + (0x1F12B, "M", "c"), + (0x1F12C, "M", "r"), + (0x1F12D, "M", "cd"), + (0x1F12E, "M", "wz"), + (0x1F12F, "V"), + (0x1F130, "M", "a"), + (0x1F131, "M", "b"), + (0x1F132, "M", "c"), + (0x1F133, "M", "d"), + (0x1F134, "M", "e"), + (0x1F135, "M", "f"), + (0x1F136, "M", "g"), + (0x1F137, "M", "h"), + (0x1F138, "M", "i"), + (0x1F139, "M", "j"), + (0x1F13A, "M", "k"), + (0x1F13B, "M", "l"), + (0x1F13C, "M", "m"), + (0x1F13D, "M", "n"), + (0x1F13E, "M", "o"), + (0x1F13F, "M", "p"), + (0x1F140, "M", "q"), + (0x1F141, "M", "r"), + (0x1F142, "M", "s"), + (0x1F143, "M", "t"), + (0x1F144, "M", "u"), + (0x1F145, "M", "v"), + (0x1F146, "M", "w"), + (0x1F147, "M", "x"), + (0x1F148, "M", "y"), + (0x1F149, "M", "z"), + (0x1F14A, "M", "hv"), + (0x1F14B, "M", "mv"), + (0x1F14C, "M", "sd"), + (0x1F14D, "M", "ss"), + (0x1F14E, "M", "ppv"), + (0x1F14F, "M", "wc"), + (0x1F150, "V"), + (0x1F16A, "M", "mc"), + (0x1F16B, "M", "md"), + (0x1F16C, "M", "mr"), + (0x1F16D, "V"), + (0x1F190, "M", "dj"), + (0x1F191, "V"), + ] + + +def _seg_75() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1F1AE, "X"), + (0x1F1E6, "V"), + (0x1F200, "M", "ほか"), + (0x1F201, "M", "ココ"), + (0x1F202, "M", "サ"), + (0x1F203, "X"), + (0x1F210, "M", "手"), + (0x1F211, "M", "字"), + (0x1F212, "M", "双"), + (0x1F213, "M", "デ"), + (0x1F214, "M", "二"), + (0x1F215, "M", "多"), + (0x1F216, "M", "解"), + (0x1F217, "M", "天"), + (0x1F218, "M", "交"), + (0x1F219, "M", "映"), + (0x1F21A, "M", "無"), + (0x1F21B, "M", "料"), + (0x1F21C, "M", "前"), + (0x1F21D, "M", "後"), + (0x1F21E, "M", "再"), + (0x1F21F, "M", "新"), + (0x1F220, "M", "初"), + (0x1F221, "M", "終"), + (0x1F222, "M", "生"), + (0x1F223, "M", "販"), + (0x1F224, "M", "声"), + (0x1F225, "M", "吹"), + (0x1F226, "M", "演"), + (0x1F227, "M", "投"), + (0x1F228, "M", "捕"), + (0x1F229, "M", "一"), + (0x1F22A, "M", "三"), + (0x1F22B, "M", "遊"), + (0x1F22C, "M", "左"), + (0x1F22D, "M", "中"), + (0x1F22E, "M", "右"), + (0x1F22F, "M", "指"), + (0x1F230, "M", "走"), + (0x1F231, "M", "打"), + (0x1F232, "M", "禁"), + (0x1F233, "M", "空"), + (0x1F234, "M", "合"), + (0x1F235, "M", "満"), + (0x1F236, "M", "有"), + (0x1F237, "M", "月"), + (0x1F238, "M", "申"), + (0x1F239, "M", "割"), + (0x1F23A, "M", "営"), + (0x1F23B, "M", "配"), + (0x1F23C, "X"), + (0x1F240, "M", "〔本〕"), + (0x1F241, "M", "〔三〕"), + (0x1F242, "M", "〔二〕"), + (0x1F243, "M", "〔安〕"), + (0x1F244, "M", "〔点〕"), + (0x1F245, "M", "〔打〕"), + (0x1F246, "M", "〔盗〕"), + (0x1F247, "M", "〔勝〕"), + (0x1F248, "M", "〔敗〕"), + (0x1F249, "X"), + (0x1F250, "M", "得"), + (0x1F251, "M", "可"), + (0x1F252, "X"), + (0x1F260, "V"), + (0x1F266, "X"), + (0x1F300, "V"), + (0x1F6D8, "X"), + (0x1F6DC, "V"), + (0x1F6ED, "X"), + (0x1F6F0, "V"), + (0x1F6FD, "X"), + (0x1F700, "V"), + (0x1F777, "X"), + (0x1F77B, "V"), + (0x1F7DA, "X"), + (0x1F7E0, "V"), + (0x1F7EC, "X"), + (0x1F7F0, "V"), + (0x1F7F1, "X"), + (0x1F800, "V"), + (0x1F80C, "X"), + (0x1F810, "V"), + (0x1F848, "X"), + (0x1F850, "V"), + (0x1F85A, "X"), + (0x1F860, "V"), + (0x1F888, "X"), + (0x1F890, "V"), + (0x1F8AE, "X"), + (0x1F8B0, "V"), + (0x1F8B2, "X"), + (0x1F900, "V"), + (0x1FA54, "X"), + (0x1FA60, "V"), + (0x1FA6E, "X"), + (0x1FA70, "V"), + (0x1FA7D, "X"), + (0x1FA80, "V"), + (0x1FA89, "X"), + ] + + +def _seg_76() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1FA90, "V"), + (0x1FABE, "X"), + (0x1FABF, "V"), + (0x1FAC6, "X"), + (0x1FACE, "V"), + (0x1FADC, "X"), + (0x1FAE0, "V"), + (0x1FAE9, "X"), + (0x1FAF0, "V"), + (0x1FAF9, "X"), + (0x1FB00, "V"), + (0x1FB93, "X"), + (0x1FB94, "V"), + (0x1FBCB, "X"), + (0x1FBF0, "M", "0"), + (0x1FBF1, "M", "1"), + (0x1FBF2, "M", "2"), + (0x1FBF3, "M", "3"), + (0x1FBF4, "M", "4"), + (0x1FBF5, "M", "5"), + (0x1FBF6, "M", "6"), + (0x1FBF7, "M", "7"), + (0x1FBF8, "M", "8"), + (0x1FBF9, "M", "9"), + (0x1FBFA, "X"), + (0x20000, "V"), + (0x2A6E0, "X"), + (0x2A700, "V"), + (0x2B73A, "X"), + (0x2B740, "V"), + (0x2B81E, "X"), + (0x2B820, "V"), + (0x2CEA2, "X"), + (0x2CEB0, "V"), + (0x2EBE1, "X"), + (0x2EBF0, "V"), + (0x2EE5E, "X"), + (0x2F800, "M", "丽"), + (0x2F801, "M", "丸"), + (0x2F802, "M", "乁"), + (0x2F803, "M", "𠄢"), + (0x2F804, "M", "你"), + (0x2F805, "M", "侮"), + (0x2F806, "M", "侻"), + (0x2F807, "M", "倂"), + (0x2F808, "M", "偺"), + (0x2F809, "M", "備"), + (0x2F80A, "M", "僧"), + (0x2F80B, "M", "像"), + (0x2F80C, "M", "㒞"), + (0x2F80D, "M", "𠘺"), + (0x2F80E, "M", "免"), + (0x2F80F, "M", "兔"), + (0x2F810, "M", "兤"), + (0x2F811, "M", "具"), + (0x2F812, "M", "𠔜"), + (0x2F813, "M", "㒹"), + (0x2F814, "M", "內"), + (0x2F815, "M", "再"), + (0x2F816, "M", "𠕋"), + (0x2F817, "M", "冗"), + (0x2F818, "M", "冤"), + (0x2F819, "M", "仌"), + (0x2F81A, "M", "冬"), + (0x2F81B, "M", "况"), + (0x2F81C, "M", "𩇟"), + (0x2F81D, "M", "凵"), + (0x2F81E, "M", "刃"), + (0x2F81F, "M", "㓟"), + (0x2F820, "M", "刻"), + (0x2F821, "M", "剆"), + (0x2F822, "M", "割"), + (0x2F823, "M", "剷"), + (0x2F824, "M", "㔕"), + (0x2F825, "M", "勇"), + (0x2F826, "M", "勉"), + (0x2F827, "M", "勤"), + (0x2F828, "M", "勺"), + (0x2F829, "M", "包"), + (0x2F82A, "M", "匆"), + (0x2F82B, "M", "北"), + (0x2F82C, "M", "卉"), + (0x2F82D, "M", "卑"), + (0x2F82E, "M", "博"), + (0x2F82F, "M", "即"), + (0x2F830, "M", "卽"), + (0x2F831, "M", "卿"), + (0x2F834, "M", "𠨬"), + (0x2F835, "M", "灰"), + (0x2F836, "M", "及"), + (0x2F837, "M", "叟"), + (0x2F838, "M", "𠭣"), + (0x2F839, "M", "叫"), + (0x2F83A, "M", "叱"), + (0x2F83B, "M", "吆"), + (0x2F83C, "M", "咞"), + (0x2F83D, "M", "吸"), + (0x2F83E, "M", "呈"), + (0x2F83F, "M", "周"), + (0x2F840, "M", "咢"), + ] + + +def _seg_77() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2F841, "M", "哶"), + (0x2F842, "M", "唐"), + (0x2F843, "M", "啓"), + (0x2F844, "M", "啣"), + (0x2F845, "M", "善"), + (0x2F847, "M", "喙"), + (0x2F848, "M", "喫"), + (0x2F849, "M", "喳"), + (0x2F84A, "M", "嗂"), + (0x2F84B, "M", "圖"), + (0x2F84C, "M", "嘆"), + (0x2F84D, "M", "圗"), + (0x2F84E, "M", "噑"), + (0x2F84F, "M", "噴"), + (0x2F850, "M", "切"), + (0x2F851, "M", "壮"), + (0x2F852, "M", "城"), + (0x2F853, "M", "埴"), + (0x2F854, "M", "堍"), + (0x2F855, "M", "型"), + (0x2F856, "M", "堲"), + (0x2F857, "M", "報"), + (0x2F858, "M", "墬"), + (0x2F859, "M", "𡓤"), + (0x2F85A, "M", "売"), + (0x2F85B, "M", "壷"), + (0x2F85C, "M", "夆"), + (0x2F85D, "M", "多"), + (0x2F85E, "M", "夢"), + (0x2F85F, "M", "奢"), + (0x2F860, "M", "𡚨"), + (0x2F861, "M", "𡛪"), + (0x2F862, "M", "姬"), + (0x2F863, "M", "娛"), + (0x2F864, "M", "娧"), + (0x2F865, "M", "姘"), + (0x2F866, "M", "婦"), + (0x2F867, "M", "㛮"), + (0x2F868, "X"), + (0x2F869, "M", "嬈"), + (0x2F86A, "M", "嬾"), + (0x2F86C, "M", "𡧈"), + (0x2F86D, "M", "寃"), + (0x2F86E, "M", "寘"), + (0x2F86F, "M", "寧"), + (0x2F870, "M", "寳"), + (0x2F871, "M", "𡬘"), + (0x2F872, "M", "寿"), + (0x2F873, "M", "将"), + (0x2F874, "X"), + (0x2F875, "M", "尢"), + (0x2F876, "M", "㞁"), + (0x2F877, "M", "屠"), + (0x2F878, "M", "屮"), + (0x2F879, "M", "峀"), + (0x2F87A, "M", "岍"), + (0x2F87B, "M", "𡷤"), + (0x2F87C, "M", "嵃"), + (0x2F87D, "M", "𡷦"), + (0x2F87E, "M", "嵮"), + (0x2F87F, "M", "嵫"), + (0x2F880, "M", "嵼"), + (0x2F881, "M", "巡"), + (0x2F882, "M", "巢"), + (0x2F883, "M", "㠯"), + (0x2F884, "M", "巽"), + (0x2F885, "M", "帨"), + (0x2F886, "M", "帽"), + (0x2F887, "M", "幩"), + (0x2F888, "M", "㡢"), + (0x2F889, "M", "𢆃"), + (0x2F88A, "M", "㡼"), + (0x2F88B, "M", "庰"), + (0x2F88C, "M", "庳"), + (0x2F88D, "M", "庶"), + (0x2F88E, "M", "廊"), + (0x2F88F, "M", "𪎒"), + (0x2F890, "M", "廾"), + (0x2F891, "M", "𢌱"), + (0x2F893, "M", "舁"), + (0x2F894, "M", "弢"), + (0x2F896, "M", "㣇"), + (0x2F897, "M", "𣊸"), + (0x2F898, "M", "𦇚"), + (0x2F899, "M", "形"), + (0x2F89A, "M", "彫"), + (0x2F89B, "M", "㣣"), + (0x2F89C, "M", "徚"), + (0x2F89D, "M", "忍"), + (0x2F89E, "M", "志"), + (0x2F89F, "M", "忹"), + (0x2F8A0, "M", "悁"), + (0x2F8A1, "M", "㤺"), + (0x2F8A2, "M", "㤜"), + (0x2F8A3, "M", "悔"), + (0x2F8A4, "M", "𢛔"), + (0x2F8A5, "M", "惇"), + (0x2F8A6, "M", "慈"), + (0x2F8A7, "M", "慌"), + (0x2F8A8, "M", "慎"), + ] + + +def _seg_78() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2F8A9, "M", "慌"), + (0x2F8AA, "M", "慺"), + (0x2F8AB, "M", "憎"), + (0x2F8AC, "M", "憲"), + (0x2F8AD, "M", "憤"), + (0x2F8AE, "M", "憯"), + (0x2F8AF, "M", "懞"), + (0x2F8B0, "M", "懲"), + (0x2F8B1, "M", "懶"), + (0x2F8B2, "M", "成"), + (0x2F8B3, "M", "戛"), + (0x2F8B4, "M", "扝"), + (0x2F8B5, "M", "抱"), + (0x2F8B6, "M", "拔"), + (0x2F8B7, "M", "捐"), + (0x2F8B8, "M", "𢬌"), + (0x2F8B9, "M", "挽"), + (0x2F8BA, "M", "拼"), + (0x2F8BB, "M", "捨"), + (0x2F8BC, "M", "掃"), + (0x2F8BD, "M", "揤"), + (0x2F8BE, "M", "𢯱"), + (0x2F8BF, "M", "搢"), + (0x2F8C0, "M", "揅"), + (0x2F8C1, "M", "掩"), + (0x2F8C2, "M", "㨮"), + (0x2F8C3, "M", "摩"), + (0x2F8C4, "M", "摾"), + (0x2F8C5, "M", "撝"), + (0x2F8C6, "M", "摷"), + (0x2F8C7, "M", "㩬"), + (0x2F8C8, "M", "敏"), + (0x2F8C9, "M", "敬"), + (0x2F8CA, "M", "𣀊"), + (0x2F8CB, "M", "旣"), + (0x2F8CC, "M", "書"), + (0x2F8CD, "M", "晉"), + (0x2F8CE, "M", "㬙"), + (0x2F8CF, "M", "暑"), + (0x2F8D0, "M", "㬈"), + (0x2F8D1, "M", "㫤"), + (0x2F8D2, "M", "冒"), + (0x2F8D3, "M", "冕"), + (0x2F8D4, "M", "最"), + (0x2F8D5, "M", "暜"), + (0x2F8D6, "M", "肭"), + (0x2F8D7, "M", "䏙"), + (0x2F8D8, "M", "朗"), + (0x2F8D9, "M", "望"), + (0x2F8DA, "M", "朡"), + (0x2F8DB, "M", "杞"), + (0x2F8DC, "M", "杓"), + (0x2F8DD, "M", "𣏃"), + (0x2F8DE, "M", "㭉"), + (0x2F8DF, "M", "柺"), + (0x2F8E0, "M", "枅"), + (0x2F8E1, "M", "桒"), + (0x2F8E2, "M", "梅"), + (0x2F8E3, "M", "𣑭"), + (0x2F8E4, "M", "梎"), + (0x2F8E5, "M", "栟"), + (0x2F8E6, "M", "椔"), + (0x2F8E7, "M", "㮝"), + (0x2F8E8, "M", "楂"), + (0x2F8E9, "M", "榣"), + (0x2F8EA, "M", "槪"), + (0x2F8EB, "M", "檨"), + (0x2F8EC, "M", "𣚣"), + (0x2F8ED, "M", "櫛"), + (0x2F8EE, "M", "㰘"), + (0x2F8EF, "M", "次"), + (0x2F8F0, "M", "𣢧"), + (0x2F8F1, "M", "歔"), + (0x2F8F2, "M", "㱎"), + (0x2F8F3, "M", "歲"), + (0x2F8F4, "M", "殟"), + (0x2F8F5, "M", "殺"), + (0x2F8F6, "M", "殻"), + (0x2F8F7, "M", "𣪍"), + (0x2F8F8, "M", "𡴋"), + (0x2F8F9, "M", "𣫺"), + (0x2F8FA, "M", "汎"), + (0x2F8FB, "M", "𣲼"), + (0x2F8FC, "M", "沿"), + (0x2F8FD, "M", "泍"), + (0x2F8FE, "M", "汧"), + (0x2F8FF, "M", "洖"), + (0x2F900, "M", "派"), + (0x2F901, "M", "海"), + (0x2F902, "M", "流"), + (0x2F903, "M", "浩"), + (0x2F904, "M", "浸"), + (0x2F905, "M", "涅"), + (0x2F906, "M", "𣴞"), + (0x2F907, "M", "洴"), + (0x2F908, "M", "港"), + (0x2F909, "M", "湮"), + (0x2F90A, "M", "㴳"), + (0x2F90B, "M", "滋"), + (0x2F90C, "M", "滇"), + ] + + +def _seg_79() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2F90D, "M", "𣻑"), + (0x2F90E, "M", "淹"), + (0x2F90F, "M", "潮"), + (0x2F910, "M", "𣽞"), + (0x2F911, "M", "𣾎"), + (0x2F912, "M", "濆"), + (0x2F913, "M", "瀹"), + (0x2F914, "M", "瀞"), + (0x2F915, "M", "瀛"), + (0x2F916, "M", "㶖"), + (0x2F917, "M", "灊"), + (0x2F918, "M", "災"), + (0x2F919, "M", "灷"), + (0x2F91A, "M", "炭"), + (0x2F91B, "M", "𠔥"), + (0x2F91C, "M", "煅"), + (0x2F91D, "M", "𤉣"), + (0x2F91E, "M", "熜"), + (0x2F91F, "X"), + (0x2F920, "M", "爨"), + (0x2F921, "M", "爵"), + (0x2F922, "M", "牐"), + (0x2F923, "M", "𤘈"), + (0x2F924, "M", "犀"), + (0x2F925, "M", "犕"), + (0x2F926, "M", "𤜵"), + (0x2F927, "M", "𤠔"), + (0x2F928, "M", "獺"), + (0x2F929, "M", "王"), + (0x2F92A, "M", "㺬"), + (0x2F92B, "M", "玥"), + (0x2F92C, "M", "㺸"), + (0x2F92E, "M", "瑇"), + (0x2F92F, "M", "瑜"), + (0x2F930, "M", "瑱"), + (0x2F931, "M", "璅"), + (0x2F932, "M", "瓊"), + (0x2F933, "M", "㼛"), + (0x2F934, "M", "甤"), + (0x2F935, "M", "𤰶"), + (0x2F936, "M", "甾"), + (0x2F937, "M", "𤲒"), + (0x2F938, "M", "異"), + (0x2F939, "M", "𢆟"), + (0x2F93A, "M", "瘐"), + (0x2F93B, "M", "𤾡"), + (0x2F93C, "M", "𤾸"), + (0x2F93D, "M", "𥁄"), + (0x2F93E, "M", "㿼"), + (0x2F93F, "M", "䀈"), + (0x2F940, "M", "直"), + (0x2F941, "M", "𥃳"), + (0x2F942, "M", "𥃲"), + (0x2F943, "M", "𥄙"), + (0x2F944, "M", "𥄳"), + (0x2F945, "M", "眞"), + (0x2F946, "M", "真"), + (0x2F948, "M", "睊"), + (0x2F949, "M", "䀹"), + (0x2F94A, "M", "瞋"), + (0x2F94B, "M", "䁆"), + (0x2F94C, "M", "䂖"), + (0x2F94D, "M", "𥐝"), + (0x2F94E, "M", "硎"), + (0x2F94F, "M", "碌"), + (0x2F950, "M", "磌"), + (0x2F951, "M", "䃣"), + (0x2F952, "M", "𥘦"), + (0x2F953, "M", "祖"), + (0x2F954, "M", "𥚚"), + (0x2F955, "M", "𥛅"), + (0x2F956, "M", "福"), + (0x2F957, "M", "秫"), + (0x2F958, "M", "䄯"), + (0x2F959, "M", "穀"), + (0x2F95A, "M", "穊"), + (0x2F95B, "M", "穏"), + (0x2F95C, "M", "𥥼"), + (0x2F95D, "M", "𥪧"), + (0x2F95F, "X"), + (0x2F960, "M", "䈂"), + (0x2F961, "M", "𥮫"), + (0x2F962, "M", "篆"), + (0x2F963, "M", "築"), + (0x2F964, "M", "䈧"), + (0x2F965, "M", "𥲀"), + (0x2F966, "M", "糒"), + (0x2F967, "M", "䊠"), + (0x2F968, "M", "糨"), + (0x2F969, "M", "糣"), + (0x2F96A, "M", "紀"), + (0x2F96B, "M", "𥾆"), + (0x2F96C, "M", "絣"), + (0x2F96D, "M", "䌁"), + (0x2F96E, "M", "緇"), + (0x2F96F, "M", "縂"), + (0x2F970, "M", "繅"), + (0x2F971, "M", "䌴"), + (0x2F972, "M", "𦈨"), + (0x2F973, "M", "𦉇"), + ] + + +def _seg_80() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2F974, "M", "䍙"), + (0x2F975, "M", "𦋙"), + (0x2F976, "M", "罺"), + (0x2F977, "M", "𦌾"), + (0x2F978, "M", "羕"), + (0x2F979, "M", "翺"), + (0x2F97A, "M", "者"), + (0x2F97B, "M", "𦓚"), + (0x2F97C, "M", "𦔣"), + (0x2F97D, "M", "聠"), + (0x2F97E, "M", "𦖨"), + (0x2F97F, "M", "聰"), + (0x2F980, "M", "𣍟"), + (0x2F981, "M", "䏕"), + (0x2F982, "M", "育"), + (0x2F983, "M", "脃"), + (0x2F984, "M", "䐋"), + (0x2F985, "M", "脾"), + (0x2F986, "M", "媵"), + (0x2F987, "M", "𦞧"), + (0x2F988, "M", "𦞵"), + (0x2F989, "M", "𣎓"), + (0x2F98A, "M", "𣎜"), + (0x2F98B, "M", "舁"), + (0x2F98C, "M", "舄"), + (0x2F98D, "M", "辞"), + (0x2F98E, "M", "䑫"), + (0x2F98F, "M", "芑"), + (0x2F990, "M", "芋"), + (0x2F991, "M", "芝"), + (0x2F992, "M", "劳"), + (0x2F993, "M", "花"), + (0x2F994, "M", "芳"), + (0x2F995, "M", "芽"), + (0x2F996, "M", "苦"), + (0x2F997, "M", "𦬼"), + (0x2F998, "M", "若"), + (0x2F999, "M", "茝"), + (0x2F99A, "M", "荣"), + (0x2F99B, "M", "莭"), + (0x2F99C, "M", "茣"), + (0x2F99D, "M", "莽"), + (0x2F99E, "M", "菧"), + (0x2F99F, "M", "著"), + (0x2F9A0, "M", "荓"), + (0x2F9A1, "M", "菊"), + (0x2F9A2, "M", "菌"), + (0x2F9A3, "M", "菜"), + (0x2F9A4, "M", "𦰶"), + (0x2F9A5, "M", "𦵫"), + (0x2F9A6, "M", "𦳕"), + (0x2F9A7, "M", "䔫"), + (0x2F9A8, "M", "蓱"), + (0x2F9A9, "M", "蓳"), + (0x2F9AA, "M", "蔖"), + (0x2F9AB, "M", "𧏊"), + (0x2F9AC, "M", "蕤"), + (0x2F9AD, "M", "𦼬"), + (0x2F9AE, "M", "䕝"), + (0x2F9AF, "M", "䕡"), + (0x2F9B0, "M", "𦾱"), + (0x2F9B1, "M", "𧃒"), + (0x2F9B2, "M", "䕫"), + (0x2F9B3, "M", "虐"), + (0x2F9B4, "M", "虜"), + (0x2F9B5, "M", "虧"), + (0x2F9B6, "M", "虩"), + (0x2F9B7, "M", "蚩"), + (0x2F9B8, "M", "蚈"), + (0x2F9B9, "M", "蜎"), + (0x2F9BA, "M", "蛢"), + (0x2F9BB, "M", "蝹"), + (0x2F9BC, "M", "蜨"), + (0x2F9BD, "M", "蝫"), + (0x2F9BE, "M", "螆"), + (0x2F9BF, "X"), + (0x2F9C0, "M", "蟡"), + (0x2F9C1, "M", "蠁"), + (0x2F9C2, "M", "䗹"), + (0x2F9C3, "M", "衠"), + (0x2F9C4, "M", "衣"), + (0x2F9C5, "M", "𧙧"), + (0x2F9C6, "M", "裗"), + (0x2F9C7, "M", "裞"), + (0x2F9C8, "M", "䘵"), + (0x2F9C9, "M", "裺"), + (0x2F9CA, "M", "㒻"), + (0x2F9CB, "M", "𧢮"), + (0x2F9CC, "M", "𧥦"), + (0x2F9CD, "M", "䚾"), + (0x2F9CE, "M", "䛇"), + (0x2F9CF, "M", "誠"), + (0x2F9D0, "M", "諭"), + (0x2F9D1, "M", "變"), + (0x2F9D2, "M", "豕"), + (0x2F9D3, "M", "𧲨"), + (0x2F9D4, "M", "貫"), + (0x2F9D5, "M", "賁"), + (0x2F9D6, "M", "贛"), + (0x2F9D7, "M", "起"), + ] + + +def _seg_81() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2F9D8, "M", "𧼯"), + (0x2F9D9, "M", "𠠄"), + (0x2F9DA, "M", "跋"), + (0x2F9DB, "M", "趼"), + (0x2F9DC, "M", "跰"), + (0x2F9DD, "M", "𠣞"), + (0x2F9DE, "M", "軔"), + (0x2F9DF, "M", "輸"), + (0x2F9E0, "M", "𨗒"), + (0x2F9E1, "M", "𨗭"), + (0x2F9E2, "M", "邔"), + (0x2F9E3, "M", "郱"), + (0x2F9E4, "M", "鄑"), + (0x2F9E5, "M", "𨜮"), + (0x2F9E6, "M", "鄛"), + (0x2F9E7, "M", "鈸"), + (0x2F9E8, "M", "鋗"), + (0x2F9E9, "M", "鋘"), + (0x2F9EA, "M", "鉼"), + (0x2F9EB, "M", "鏹"), + (0x2F9EC, "M", "鐕"), + (0x2F9ED, "M", "𨯺"), + (0x2F9EE, "M", "開"), + (0x2F9EF, "M", "䦕"), + (0x2F9F0, "M", "閷"), + (0x2F9F1, "M", "𨵷"), + (0x2F9F2, "M", "䧦"), + (0x2F9F3, "M", "雃"), + (0x2F9F4, "M", "嶲"), + (0x2F9F5, "M", "霣"), + (0x2F9F6, "M", "𩅅"), + (0x2F9F7, "M", "𩈚"), + (0x2F9F8, "M", "䩮"), + (0x2F9F9, "M", "䩶"), + (0x2F9FA, "M", "韠"), + (0x2F9FB, "M", "𩐊"), + (0x2F9FC, "M", "䪲"), + (0x2F9FD, "M", "𩒖"), + (0x2F9FE, "M", "頋"), + (0x2FA00, "M", "頩"), + (0x2FA01, "M", "𩖶"), + (0x2FA02, "M", "飢"), + (0x2FA03, "M", "䬳"), + (0x2FA04, "M", "餩"), + (0x2FA05, "M", "馧"), + (0x2FA06, "M", "駂"), + (0x2FA07, "M", "駾"), + (0x2FA08, "M", "䯎"), + (0x2FA09, "M", "𩬰"), + (0x2FA0A, "M", "鬒"), + (0x2FA0B, "M", "鱀"), + (0x2FA0C, "M", "鳽"), + (0x2FA0D, "M", "䳎"), + (0x2FA0E, "M", "䳭"), + (0x2FA0F, "M", "鵧"), + (0x2FA10, "M", "𪃎"), + (0x2FA11, "M", "䳸"), + (0x2FA12, "M", "𪄅"), + (0x2FA13, "M", "𪈎"), + (0x2FA14, "M", "𪊑"), + (0x2FA15, "M", "麻"), + (0x2FA16, "M", "䵖"), + (0x2FA17, "M", "黹"), + (0x2FA18, "M", "黾"), + (0x2FA19, "M", "鼅"), + (0x2FA1A, "M", "鼏"), + (0x2FA1B, "M", "鼖"), + (0x2FA1C, "M", "鼻"), + (0x2FA1D, "M", "𪘀"), + (0x2FA1E, "X"), + (0x30000, "V"), + (0x3134B, "X"), + (0x31350, "V"), + (0x323B0, "X"), + (0xE0100, "I"), + (0xE01F0, "X"), + ] + + +uts46data = tuple( + _seg_0() + + _seg_1() + + _seg_2() + + _seg_3() + + _seg_4() + + _seg_5() + + _seg_6() + + _seg_7() + + _seg_8() + + _seg_9() + + _seg_10() + + _seg_11() + + _seg_12() + + _seg_13() + + _seg_14() + + _seg_15() + + _seg_16() + + _seg_17() + + _seg_18() + + _seg_19() + + _seg_20() + + _seg_21() + + _seg_22() + + _seg_23() + + _seg_24() + + _seg_25() + + _seg_26() + + _seg_27() + + _seg_28() + + _seg_29() + + _seg_30() + + _seg_31() + + _seg_32() + + _seg_33() + + _seg_34() + + _seg_35() + + _seg_36() + + _seg_37() + + _seg_38() + + _seg_39() + + _seg_40() + + _seg_41() + + _seg_42() + + _seg_43() + + _seg_44() + + _seg_45() + + _seg_46() + + _seg_47() + + _seg_48() + + _seg_49() + + _seg_50() + + _seg_51() + + _seg_52() + + _seg_53() + + _seg_54() + + _seg_55() + + _seg_56() + + _seg_57() + + _seg_58() + + _seg_59() + + _seg_60() + + _seg_61() + + _seg_62() + + _seg_63() + + _seg_64() + + _seg_65() + + _seg_66() + + _seg_67() + + _seg_68() + + _seg_69() + + _seg_70() + + _seg_71() + + _seg_72() + + _seg_73() + + _seg_74() + + _seg_75() + + _seg_76() + + _seg_77() + + _seg_78() + + _seg_79() + + _seg_80() + + _seg_81() +) # type: Tuple[Union[Tuple[int, str], Tuple[int, str, str]], ...] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/INSTALLER b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/METADATA b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..b17e40a80501c964516e2bdb55638bdbc01d9df2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/METADATA @@ -0,0 +1,238 @@ +Metadata-Version: 2.4 +Name: ImageIO +Version: 2.37.2 +Summary: Read and write images and video across all major formats. Supports scientific and volumetric data. +Author: ImageIO contributors +Maintainer-email: Sebastian Wallkotter , Almar Klein +License-Expression: BSD-2-Clause +Project-URL: homepage, https://github.com/imageio/imageio +Project-URL: download, http://pypi.python.org/pypi/imageio +Project-URL: source, https://github.com/imageio/imageio +Project-URL: documentation, https://imageio.readthedocs.io +Keywords: image,video,imread,imwrite,io,animation,ffmpeg,image processing,numpy,format conversion,scientific imaging,medical imaging,volumetric,video encoding,multimedia,gif,tiff,png,jpeg +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Science/Research +Classifier: Intended Audience :: Education +Classifier: Intended Audience :: Developers +Classifier: Operating System :: MacOS :: MacOS X +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: POSIX +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: 3.14 +Requires-Python: >=3.9 +Description-Content-Type: text/x-rst +License-File: LICENSE +Requires-Dist: numpy +Requires-Dist: pillow>=8.3.2 +Provides-Extra: bsdf +Provides-Extra: dicom +Provides-Extra: feisem +Provides-Extra: ffmpeg +Requires-Dist: imageio-ffmpeg; extra == "ffmpeg" +Requires-Dist: psutil; extra == "ffmpeg" +Provides-Extra: freeimage +Requires-Dist: fsspec[http]; extra == "freeimage" +Provides-Extra: lytro +Provides-Extra: numpy +Provides-Extra: pillow-heif +Requires-Dist: pillow-heif; extra == "pillow-heif" +Provides-Extra: pillow +Provides-Extra: simpleitk +Provides-Extra: spe +Provides-Extra: swf +Provides-Extra: tifffile +Requires-Dist: tifffile; extra == "tifffile" +Provides-Extra: pyav +Requires-Dist: av; extra == "pyav" +Provides-Extra: fits +Requires-Dist: astropy; extra == "fits" +Provides-Extra: rawpy +Requires-Dist: rawpy; extra == "rawpy" +Requires-Dist: numpy>2; extra == "rawpy" +Provides-Extra: gdal +Requires-Dist: gdal; extra == "gdal" +Provides-Extra: itk +Requires-Dist: itk; extra == "itk" +Provides-Extra: linting +Requires-Dist: black; extra == "linting" +Requires-Dist: flake8; extra == "linting" +Provides-Extra: test +Requires-Dist: pytest; extra == "test" +Requires-Dist: pytest-cov; extra == "test" +Requires-Dist: fsspec[github]; extra == "test" +Provides-Extra: docs +Requires-Dist: sphinx<6; extra == "docs" +Requires-Dist: numpydoc; extra == "docs" +Requires-Dist: pydata-sphinx-theme; extra == "docs" +Provides-Extra: dev +Requires-Dist: pytest; extra == "dev" +Requires-Dist: pytest-cov; extra == "dev" +Requires-Dist: fsspec[github]; extra == "dev" +Requires-Dist: black; extra == "dev" +Requires-Dist: flake8; extra == "dev" +Provides-Extra: all-plugins +Requires-Dist: av; extra == "all-plugins" +Requires-Dist: astropy; extra == "all-plugins" +Requires-Dist: fsspec[http]; extra == "all-plugins" +Requires-Dist: imageio-ffmpeg; extra == "all-plugins" +Requires-Dist: numpy>2; extra == "all-plugins" +Requires-Dist: pillow-heif; extra == "all-plugins" +Requires-Dist: psutil; extra == "all-plugins" +Requires-Dist: rawpy; extra == "all-plugins" +Requires-Dist: tifffile; extra == "all-plugins" +Provides-Extra: all-plugins-pypy +Requires-Dist: fsspec[http]; extra == "all-plugins-pypy" +Requires-Dist: imageio-ffmpeg; extra == "all-plugins-pypy" +Requires-Dist: pillow-heif; extra == "all-plugins-pypy" +Requires-Dist: psutil; extra == "all-plugins-pypy" +Requires-Dist: tifffile; extra == "all-plugins-pypy" +Provides-Extra: full +Requires-Dist: astropy; extra == "full" +Requires-Dist: av; extra == "full" +Requires-Dist: black; extra == "full" +Requires-Dist: flake8; extra == "full" +Requires-Dist: fsspec[github,http]; extra == "full" +Requires-Dist: imageio-ffmpeg; extra == "full" +Requires-Dist: numpydoc; extra == "full" +Requires-Dist: numpy>2; extra == "full" +Requires-Dist: pillow-heif; extra == "full" +Requires-Dist: psutil; extra == "full" +Requires-Dist: pydata-sphinx-theme; extra == "full" +Requires-Dist: pytest; extra == "full" +Requires-Dist: pytest-cov; extra == "full" +Requires-Dist: rawpy; extra == "full" +Requires-Dist: sphinx<6; extra == "full" +Requires-Dist: tifffile; extra == "full" +Dynamic: license-file + +# IMAGEIO + +[![CI](https://github.com/imageio/imageio/workflows/CI/badge.svg)](https://github.com/imageio/imageio/actions/workflows/ci.yml) +[![CD](https://github.com/imageio/imageio/workflows/CD/badge.svg)](https://github.com/imageio/imageio/actions/workflows/cd.yml) +[![codecov](https://codecov.io/gh/imageio/imageio/branch/master/graph/badge.svg?token=81Zhu9MDec)](https://codecov.io/gh/imageio/imageio) +[![Docs](https://readthedocs.org/projects/imageio/badge/?version=latest)](https://imageio.readthedocs.io) + +[![Supported Python Versions](https://img.shields.io/pypi/pyversions/imageio.svg)](https://pypi.python.org/pypi/imageio/) +[![PyPI Version](https://img.shields.io/pypi/v/imageio.svg)](https://pypi.python.org/pypi/imageio/) +![PyPI Downloads](https://img.shields.io/pypi/dm/imageio?color=blue) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1488561.svg)](https://doi.org/10.5281/zenodo.1488561) + +Website: + +Imageio is a mature Python library that makes it easy to read and write image +and video data. This includes animated images, video, volumetric data, and +scientific formats. It is cross-platform, runs on Python 3.9+, and is easy to +install. + +Professional support is available via +[Tidelift](https://tidelift.com/funding/github/pypi/imageio). + +## Example + +Here's a minimal example of how to use imageio. See the docs for [more +examples](https://imageio.readthedocs.io/en/stable/examples.html). + +```python +import imageio.v3 as iio +im = iio.imread('imageio:chelsea.png') # read a standard image +im.shape # im is a NumPy array of shape (300, 451, 3) +iio.imwrite('chelsea.jpg', im) # convert to jpg +``` + +## API in a nutshell + +You just have to remember a handful of functions: + +```python +imread() # for reading +imwrite() # for writing +imiter() # for iterating image series (animations/videos/OME-TIFF/...) +improps() # for standardized metadata +immeta() # for format-specific metadata +imopen() # for advanced usage +``` + +See the [API docs](https://imageio.readthedocs.io/en/stable/reference/index.html) for more information. + +## Features + +- Simple interface via a concise set of functions +- Easy to + [install](https://imageio.readthedocs.io/en/stable/getting_started/installation.html) + using Conda or pip +- Few core dependencies (only NumPy and Pillow) +- Pure Python, runs on Python 3.9+, and PyPy +- Cross platform, runs on Windows, Linux, macOS +- More than 295 supported + [formats](https://imageio.readthedocs.io/en/stable/formats/index.html) +- Read/Write support for various + [resources](https://imageio.readthedocs.io/en/stable/getting_started/requests.html) + (files, URLs, bytes, FileLike objects, ...) +- High code quality and large test suite including functional, regression, and + integration tests + +## Dependencies + +Minimal requirements: + +- Python 3.9+ +- NumPy +- Pillow >= 8.3.2 + +Optional Python packages: + +- imageio-ffmpeg (for working with video files) +- pyav (for working with video files) +- tifffile (for working with TIFF files) +- itk or SimpleITK (for ITK plugin) +- astropy (for FITS plugin) +- [imageio-flif](https://codeberg.org/monilophyta/imageio-flif) (for working + with [FLIF](https://github.com/FLIF-hub/FLIF) image files) + +## Security contact information + +To report a security vulnerability, please use the [Tidelift security +contact](https://tidelift.com/security). Tidelift will coordinate the fix and +disclosure. + +## ImageIO for enterprise + +Available as part of the Tidelift Subscription. + +The maintainers of imageio and thousands of other packages are working with +Tidelift to deliver commercial support and maintenance for the open source +dependencies you use to build your applications. Save time, reduce risk, and +improve code health, while paying the maintainers of the exact dependencies you +use. ([Learn +more](https://tidelift.com/subscription/pkg/pypi-imageio?utm_source=pypi-imageio&utm_medium=referral&utm_campaign=readme)) + +## Details + +The core of ImageIO is a set of user-facing APIs combined with a plugin manager. +API calls choose sensible defaults and then call the plugin manager, which +deduces the correct plugin/backend to use for the given resource and file +format. The plugin manager adds sensible backend-specific defaults and then +calls one of ImageIOs many backends to perform the actual loading. This allows +ImageIO to take care of most of the gory details of loading images for you, +while still allowing you to customize the behavior when and where you need to. +You can find a more detailed explanation of this process in [our +documentation](https://imageio.readthedocs.io/en/stable/user_guide/overview.html). + +## Contributing + +We welcome contributions of any kind. Here are some suggestions on how you are +able to contribute + +- add missing formats to the format list +- suggest/implement support for new backends +- report/fix any bugs you encounter while using ImageIO + +To assist you in getting started with contributing code, take a look at the +[development +section](https://imageio.readthedocs.io/en/stable/development/index.html) of the +docs. You will find instructions on setting up the dev environment as well as +examples on how to contribute code. diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/RECORD b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..03c05ba4f06fa1c9bddb9421150e4cde0ff5ef8d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/RECORD @@ -0,0 +1,117 @@ +../../../bin/imageio_download_bin,sha256=HmhRtAd0N735jsfrOhICQGEhxyk73bYXQXp5oUvv584,289 +../../../bin/imageio_remove_bin,sha256=_BGD7us1S_Jrw0cAlT_ZfUrPgrg8tcf3McdJbB0yb6A,285 +imageio-2.37.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +imageio-2.37.2.dist-info/METADATA,sha256=OmZzehqOIyS3iWPz-tO8EBRulvo6_iuu6S5A48q6CjA,9660 +imageio-2.37.2.dist-info/RECORD,, +imageio-2.37.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +imageio-2.37.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91 +imageio-2.37.2.dist-info/entry_points.txt,sha256=0-yB6XGfrx1OMPw_xigPramTcwi5M4jX6L5Edrz0OoU,130 +imageio-2.37.2.dist-info/licenses/LICENSE,sha256=NWQXdLE3tizsANCvKTE6nRPeDBUsl3o05ZGyMdq2Kg8,1307 +imageio-2.37.2.dist-info/top_level.txt,sha256=iSUjc-wEw-xbMTvMOSKg85n0-E7Ms--Mo4FLMC-J2YM,8 +imageio/__init__.py,sha256=nD4L_izRJVbjJHmw5aWNQcA4KtHUZhT_NYvs7ZOxDUo,3342 +imageio/__main__.py,sha256=ML406biyXilwpd81xaoG7-eC32DkJi3jW3oSkWf_SC0,5266 +imageio/__pycache__/__init__.cpython-312.pyc,, +imageio/__pycache__/__main__.cpython-312.pyc,, +imageio/__pycache__/freeze.cpython-312.pyc,, +imageio/__pycache__/testing.cpython-312.pyc,, +imageio/__pycache__/typing.cpython-312.pyc,, +imageio/__pycache__/v2.cpython-312.pyc,, +imageio/__pycache__/v3.cpython-312.pyc,, +imageio/config/__init__.py,sha256=8NOpL5ePrkiioJb9hRBw3rydc4iNZkMwp7VdQlP4jDc,307 +imageio/config/__pycache__/__init__.cpython-312.pyc,, +imageio/config/__pycache__/extensions.cpython-312.pyc,, +imageio/config/__pycache__/plugins.cpython-312.pyc,, +imageio/config/extensions.py,sha256=ahIRDqU7r5y-ev9wDMekWrewDRq89E4ijoFIoA_dqAQ,47023 +imageio/config/extensions.pyi,sha256=X9d4plQJc8zFsHRzIl6kcdgmN-x9ldEtQ9iVxepkRHI,606 +imageio/config/plugins.py,sha256=fh4AiZof4Z_chpixhCi37OHZEnNalt58v6CprgEmlOE,20277 +imageio/config/plugins.pyi,sha256=pzH8pacqU5uldsvYOee_nhd2Hkk3mR8VQBtjeVnkkHY,706 +imageio/core/__init__.py,sha256=ka3YhBNXYqsnu9n1Db60DhHTn5tylYzfED_RYXgMz8U,638 +imageio/core/__pycache__/__init__.cpython-312.pyc,, +imageio/core/__pycache__/fetching.cpython-312.pyc,, +imageio/core/__pycache__/findlib.cpython-312.pyc,, +imageio/core/__pycache__/format.cpython-312.pyc,, +imageio/core/__pycache__/imopen.cpython-312.pyc,, +imageio/core/__pycache__/legacy_plugin_wrapper.cpython-312.pyc,, +imageio/core/__pycache__/request.cpython-312.pyc,, +imageio/core/__pycache__/util.cpython-312.pyc,, +imageio/core/__pycache__/v3_plugin_api.cpython-312.pyc,, +imageio/core/fetching.py,sha256=VRsPHlEDZmUgUCjnUGnEALb55uCRLlGOZWtnM6lsRf8,9175 +imageio/core/findlib.py,sha256=6_vXFk2zudTIKNL5auUKKNF0PP-u4HcdCef3YQYnKH4,5550 +imageio/core/format.py,sha256=glQcJOZHEOST3u0jOa338ZxJBX_daEe6xl7-UKxuU6E,30917 +imageio/core/format.pyi,sha256=5BZF-xwp5BmG8C5ahfL48z_a2MITN0509Uf6f1phZRw,3336 +imageio/core/imopen.py,sha256=SA4OJj93B09CHsKSILdH1w3zdVWvRSopNWlGlS0f4t0,9752 +imageio/core/imopen.pyi,sha256=8jLI2tKUTqFe79mccw95fRAIanJPHi6gQmzB2ClESlk,2215 +imageio/core/legacy_plugin_wrapper.py,sha256=CYGXhJY-18HkVYqyzlepM7NcZ9VLvBjFjNj64HOBqBM,12136 +imageio/core/legacy_plugin_wrapper.pyi,sha256=X1ksqM_V4KJkUhQ69PfIc44vf26GMo3j0L8ofj5Tu-g,1065 +imageio/core/request.py,sha256=iNhNHVdLq1I_tSSiTKfW0ILevnIOaElk1zIC2UJjlUc,27108 +imageio/core/request.pyi,sha256=kBedyIWgvaLTiZM8d6UVLSKBoCB0ShGEwCWdlR5Q3Ao,2316 +imageio/core/util.py,sha256=YyXhqaaG1HHCCVajE0ILev_EV_EvxHjUo973k8sNubY,17961 +imageio/core/v3_plugin_api.py,sha256=w8wUjlT7_N6aU76DYGF3ubYYfUHTyfStvK5_xosZLPQ,15560 +imageio/freeze.py,sha256=hi9MNZz-ridgQBWcAqnd92sULek2lgmBSTmuott5lus,170 +imageio/plugins/__init__.py,sha256=GSxtio0ph5QHP2asdLvyzW8lVfiRqOii8kaqYsBO9CE,3469 +imageio/plugins/__pycache__/__init__.cpython-312.pyc,, +imageio/plugins/__pycache__/_bsdf.cpython-312.pyc,, +imageio/plugins/__pycache__/_dicom.cpython-312.pyc,, +imageio/plugins/__pycache__/_freeimage.cpython-312.pyc,, +imageio/plugins/__pycache__/_swf.cpython-312.pyc,, +imageio/plugins/__pycache__/_tifffile.cpython-312.pyc,, +imageio/plugins/__pycache__/bsdf.cpython-312.pyc,, +imageio/plugins/__pycache__/dicom.cpython-312.pyc,, +imageio/plugins/__pycache__/example.cpython-312.pyc,, +imageio/plugins/__pycache__/feisem.cpython-312.pyc,, +imageio/plugins/__pycache__/ffmpeg.cpython-312.pyc,, +imageio/plugins/__pycache__/fits.cpython-312.pyc,, +imageio/plugins/__pycache__/freeimage.cpython-312.pyc,, +imageio/plugins/__pycache__/freeimagemulti.cpython-312.pyc,, +imageio/plugins/__pycache__/gdal.cpython-312.pyc,, +imageio/plugins/__pycache__/grab.cpython-312.pyc,, +imageio/plugins/__pycache__/lytro.cpython-312.pyc,, +imageio/plugins/__pycache__/npz.cpython-312.pyc,, +imageio/plugins/__pycache__/opencv.cpython-312.pyc,, +imageio/plugins/__pycache__/pillow.cpython-312.pyc,, +imageio/plugins/__pycache__/pillow_info.cpython-312.pyc,, +imageio/plugins/__pycache__/pillow_legacy.cpython-312.pyc,, +imageio/plugins/__pycache__/pillowmulti.cpython-312.pyc,, +imageio/plugins/__pycache__/pyav.cpython-312.pyc,, +imageio/plugins/__pycache__/rawpy.cpython-312.pyc,, +imageio/plugins/__pycache__/simpleitk.cpython-312.pyc,, +imageio/plugins/__pycache__/spe.cpython-312.pyc,, +imageio/plugins/__pycache__/swf.cpython-312.pyc,, +imageio/plugins/__pycache__/tifffile.cpython-312.pyc,, +imageio/plugins/__pycache__/tifffile_v3.cpython-312.pyc,, +imageio/plugins/_bsdf.py,sha256=b-QjkZvz9DPDbygiKhee-47Ld2eOqxpYEdZ1mnrRPJ4,32753 +imageio/plugins/_dicom.py,sha256=MolRDau1FizY4r1OFOCKLoh2rrVM5rEA7RzwjkilTow,34072 +imageio/plugins/_freeimage.py,sha256=ngi7IlGweJ2xEPNN3Sd7UumbD47YD3gIkjHnVBPxhBk,51739 +imageio/plugins/_swf.py,sha256=kh3H2v98bgHpVagGNbhGUodh0s-weiESraX6qzMnD2k,25760 +imageio/plugins/_tifffile.py,sha256=qVF7vm-b5_ENbtYN8Eia9BvooncRnWF6YSAowjUOnFo,371594 +imageio/plugins/bsdf.py,sha256=oatc1PxdGp-c5umyHKp3QKmre1ODLaoZyt0Ss1OTB98,12851 +imageio/plugins/dicom.py,sha256=mQYNbTyum4jVhjZQ8TU-4A5csHpQfT-BRBBCP5fu6Zs,12621 +imageio/plugins/example.py,sha256=8t2GZGoecuf4PRonqdjrS2BZq-LsHy4BVFcH3JMf2s4,5499 +imageio/plugins/feisem.py,sha256=AKwZv7Zac0_grnr-wnzU7R0Zf2KSUe91k06evPa1NI8,3360 +imageio/plugins/ffmpeg.py,sha256=piW75hVO32K9AuvSpBRClI3QhC3bwDgwC8Wxj6P2qnk,30141 +imageio/plugins/fits.py,sha256=XnlmeC79sIiIPd_7IDx05-p3-b2unO4CVR0nWAA4ph0,4531 +imageio/plugins/freeimage.py,sha256=SuzYuGvCtZIiXIr51dWRTl5CATzRUqb8pNCSIg9YZv8,14645 +imageio/plugins/freeimagemulti.py,sha256=oYDLs9kypNcgSkxZ706Ox67RVbFtY3fJZm4TYZXb-JE,11287 +imageio/plugins/gdal.py,sha256=3to_O_Jeo5CHdlHmkN398B_i9pi5YAKUbM1mD5Zm_jc,1652 +imageio/plugins/grab.py,sha256=g6KbKVQUquHro_BW6He7NNmivVV-UtcsCJoDt3rdly0,2776 +imageio/plugins/lytro.py,sha256=-drf8D3ZLhK8kCRlCDUST_VzCVrnnaH4ULv581842fw,25309 +imageio/plugins/npz.py,sha256=7ZQr-4lQEKbfjaF6rOmpq9pQgDTUHvkZa_NHZkJWBQo,2670 +imageio/plugins/opencv.py,sha256=C2nBQQFDXuz6LOyJ1P3-S6e_7h-pJgLow7h7w4Si2tg,11629 +imageio/plugins/pillow.py,sha256=ZX_uWeIaSraRpaFvFSROBYVnjlw4ZVAZ1Hw_t77kvPk,22409 +imageio/plugins/pillow_info.py,sha256=Bt5iJtQnAh6mGViPIxhxRQPNidqay9-6BleAJZkhN1w,36624 +imageio/plugins/pillow_legacy.py,sha256=C2Ptltl03XEucIgA69cZ0iC1zx0fxWdKS8WG-09LZ2U,31579 +imageio/plugins/pillowmulti.py,sha256=zxaplDWhrC-ttI0M53rRNGQ0U5HtInD2QaqBVTwA5cE,11807 +imageio/plugins/pyav.py,sha256=AT1mpeFS-oarhJyihwgVeDey6X-v0K9oskBCNS0edKc,46575 +imageio/plugins/rawpy.py,sha256=TiKGLyipJNfwbVk3qhoWFAPKQMMYrPjVgxXv7Fnrv1o,5947 +imageio/plugins/simpleitk.py,sha256=1A5fSg_75nfOmHtSBwF588yxyDt4KTNcYDrwPA0T1-I,4106 +imageio/plugins/spe.py,sha256=2DIPhBu_jO9l8SMyzcoETm0rkdaXV2XaiBqPIEJmaZo,32171 +imageio/plugins/swf.py,sha256=WuXdpRH7ybtedSPoYjlf2i_1QEFAc_ssMTqza9iXri8,11755 +imageio/plugins/tifffile.py,sha256=RAm-eEegqQGSqxx0d8vZ32m0RUdggC3xnSzJp1xTv2o,20664 +imageio/plugins/tifffile_v3.py,sha256=Vs2ngBBptUoJ6QpT9EjyNd4-dih8zzGEvcq2mRNYFXg,14335 +imageio/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +imageio/testing.py,sha256=iBs_EccxQQ2Zm6Hk-wbA49ISBbvn8H7sWWTu4j6rDes,1595 +imageio/typing.py,sha256=qrvyFrVIs21bZCE0x802l1R-xCV4DlCNaTzPiJEZbzc,349 +imageio/v2.py,sha256=1KJ5z8Ji2nnAdy_K3vIpysG2Kg7rIcPiadNG1pwKx-E,21563 +imageio/v2.pyi,sha256=nrhf1a3H_ie-0fEZLbheMytL2TdR1q0r9vj4OGcWIhg,2251 +imageio/v3.py,sha256=ZE0IlERPT_4wryYqUOD4-LLc6dVpDZXV6N6JEQtbMiQ,9267 +imageio/v3.pyi,sha256=pGHIL0BX4IN-xlmhRXnbNIjuMPqmSw8aamc5M-gHbRA,1350 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/REQUESTED b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/REQUESTED new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/WHEEL b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..e7fa31b6f3f78deb1022c1f7927f07d4d16da822 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: setuptools (80.9.0) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/entry_points.txt b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/entry_points.txt new file mode 100644 index 0000000000000000000000000000000000000000..aa30161a1450bf11c7bfa6c548a23df8ff3636f5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/entry_points.txt @@ -0,0 +1,3 @@ +[console_scripts] +imageio_download_bin = imageio.__main__:download_bin_main +imageio_remove_bin = imageio.__main__:remove_bin_main diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/top_level.txt b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..a464e4cd37851148ffe3b7ae88921620c50cfe03 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio-2.37.2.dist-info/top_level.txt @@ -0,0 +1 @@ +imageio diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b757a8240ee73f7ea8d6dfd905f1a0ce5475b9e7 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/__main__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/__main__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3d8f0e4d7d4dec42f9a5e3562aa78d4dd68ebfe4 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/__main__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/freeze.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/freeze.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..836dda986861aac98a70003a66cfad96421e2962 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/freeze.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/testing.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/testing.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c05af20c120183d742cdf697eaf14b331aa32924 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/testing.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/typing.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/typing.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cc380b5ee2dd5e6165de8a0ce228f86c1ca28462 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/typing.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/v2.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/v2.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cbe2060d90484755ba58e2383af1a85ae732e5f6 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/v2.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/v3.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/v3.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d3203d1749cd2fc074756d8e3d3646b2093f18af Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/__pycache__/v3.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ca78dd22f2e690bc5115565e9e0c11b67929031c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/__init__.py @@ -0,0 +1,16 @@ +from .extensions import ( + extension_list, + known_extensions, + FileExtension, + video_extensions, +) +from .plugins import known_plugins, PluginConfig + +__all__ = [ + "known_plugins", + "PluginConfig", + "extension_list", + "known_extensions", + "FileExtension", + "video_extensions", +] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..02b0b1468fc854bdac5680b2c9449328051b25f7 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/__pycache__/extensions.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/__pycache__/extensions.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30d5fd6afbb8501ea2ad98a8f7a0b1721c73e516 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/__pycache__/extensions.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/__pycache__/plugins.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/__pycache__/plugins.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..392f6b4ea1f58f32db3d40a27ec6f3ae01c1aa8b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/__pycache__/plugins.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/extensions.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/extensions.py new file mode 100644 index 0000000000000000000000000000000000000000..f0c93d8cd42c3eb1d0bfeabb741ee707d467a835 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/extensions.py @@ -0,0 +1,2002 @@ +""" +A set of objects representing each file extension recognized by ImageIO. If an +extension is not listed here it is still supported, as long as there exists a +supporting backend. + +""" + + +class FileExtension: + """File Extension Metadata + + This class holds information about a image file format associated with a + given extension. This information is used to track plugins that are known to + be able to handle a particular format. It also contains additional + information about a format, which is used when creating the supported format + docs. + + Plugins known to be able to handle this format are ordered by a ``priority`` + list. This list is used to determine the ideal plugin to use when choosing a + plugin based on file extension. + + Parameters + ---------- + extension : str + The name of the extension including the initial dot, e.g. ".png". + priority : List + A list of plugin names (entries in config.known_plugins) that can handle + this format. The position of a plugin expresses a preference, e.g. + ["plugin1", "plugin2"] indicates that, if available, plugin1 should be + preferred over plugin2 when handling a request related to this format. + name : str + The full name of the format. + description : str + A description of the format. + external_link : str + A link to further information about the format. Typically, the format's + specification. + volume_support : str + If True, the format/extension supports volumetric image data. + + Examples + -------- + >>> FileExtension( + name="Bitmap", + extension=".bmp", + priority=["pillow", "BMP-PIL", "BMP-FI", "ITK"], + external_link="https://en.wikipedia.org/wiki/BMP_file_format", + ) + + """ + + def __init__( + self, + *, + extension, + priority, + name=None, + description=None, + external_link=None, + volume_support=False, + ): + self.extension = extension + self.priority = priority + self.name = name + self.description = description + self.external_link = external_link + self.default_priority = priority.copy() + self.volume_support = volume_support + + def reset(self): + self.priority = self.default_priority.copy() + + +extension_list = [ + FileExtension( + name="Hasselblad raw", + extension=".3fr", + priority=["RAW-FI"], + ), + FileExtension( + name="Sony alpha", + extension=".arw", + priority=["RAW-FI"], + ), + FileExtension( + name="Animated Portable Network Graphics", + external_link="https://en.wikipedia.org/wiki/APNG", + extension=".apng", + priority=["pillow", "pyav"], + ), + FileExtension( + name="Audio Video Interleave", + extension=".avi", + priority=["FFMPEG"], + ), + FileExtension( + name="Casio raw format", + extension=".bay", + priority=["RAW-FI"], + ), + FileExtension( + extension=".blp", + priority=["pillow"], + ), + FileExtension( + name="Bitmap", + extension=".bmp", + priority=["pillow", "BMP-PIL", "BMP-FI", "ITK", "pyav", "opencv"], + external_link="https://en.wikipedia.org/wiki/BMP_file_format", + ), + FileExtension( + name="Device-Independent Bitmap", + extension=".dip", + priority=["opencv"], + external_link="https://en.wikipedia.org/wiki/BMP_file_format", + ), + FileExtension( + name="Re-Volt mipmap", + extension=".bmq", + priority=["RAW-FI"], + ), + FileExtension( + name="Binary Structured Data Format", + extension=".bsdf", + priority=["BSDF"], + external_link="http://bsdf.io/", + ), + FileExtension( + name="Binary Universal Form for the Representation of meteorological data", + extension=".bufr", + priority=["pillow", "BUFR-PIL"], + ), + FileExtension( + name="Silicon Graphics Image", + extension=".bw", + priority=["pillow", "SGI-PIL", "SGI-FI"], + ), + FileExtension( + name="Scirra Construct", + extension=".cap", + priority=["RAW-FI"], + ), + FileExtension( + name="AMETEK High Speed Camera Format", + extension=".cine", + priority=["RAW-FI"], + external_link="https://phantomhighspeed-knowledge.secure.force.com/servlet/fileField?id=0BE1N000000kD2i#:~:text=Cine%20is%20a%20video%20file,camera%20model%20and%20image%20resolution", + ), + FileExtension(extension=".cr2", priority=["RAW-FI"]), + FileExtension( + extension=".crw", + priority=["RAW-FI"], + ), + FileExtension( + extension=".cs1", + priority=["RAW-FI"], + ), + FileExtension( + name="Computerized Tomography", + extension=".ct", + priority=["DICOM"], + ), + FileExtension( + name="Windows Cursor Icons", + extension=".cur", + priority=["pillow", "CUR-PIL"], + ), + FileExtension( + name="Dr. Halo", + extension=".cut", + priority=["CUT-FI"], + ), + FileExtension( + extension=".dc2", + priority=["RAW-FI"], + ), + FileExtension( + name="DICOM file format", + extension=".dcm", + priority=["DICOM", "ITK"], + ), + FileExtension( + extension=".dcr", + priority=["RAW-FI"], + ), + FileExtension( + name="Intel DCX", + extension=".dcx", + priority=["pillow", "DCX-PIL"], + ), + FileExtension( + name="DirectX Texture Container", + extension=".dds", + priority=["pillow", "DDS-FI", "DDS-PIL"], + ), + FileExtension( + name="Windows Bitmap", + extension=".dib", + priority=["pillow", "DIB-PIL"], + ), + FileExtension( + name="DICOM file format", + extension=".dicom", + priority=["ITK"], + ), + FileExtension( + extension=".dng", + priority=["RAW-FI"], + ), + FileExtension( + extension=".drf", + priority=["RAW-FI"], + ), + FileExtension( + extension=".dsc", + priority=["RAW-FI"], + ), + FileExtension( + name="Enhanced Compression Wavelet", + extension=".ecw", + priority=["GDAL"], + ), + FileExtension( + name="Windows Metafile", + extension=".emf", + priority=["pillow", "WMF-PIL"], + ), + FileExtension( + name="Encapsulated Postscript", + extension=".eps", + priority=["pillow", "EPS-PIL"], + ), + FileExtension( + extension=".erf", + priority=["RAW-FI"], + ), + FileExtension( + name="OpenEXR", + extension=".exr", + external_link="https://openexr.readthedocs.io/en/latest/", + priority=["EXR-FI", "pyav", "opencv"], + ), + FileExtension( + extension=".fff", + priority=["RAW-FI"], + ), + FileExtension( + name="Flexible Image Transport System File", + extension=".fit", + priority=["pillow", "FITS-PIL", "FITS"], + ), + FileExtension( + name="Flexible Image Transport System File", + extension=".fits", + priority=["pillow", "FITS-PIL", "FITS", "pyav"], + ), + FileExtension( + name="Autodesk FLC Animation", + extension=".flc", + priority=["pillow", "FLI-PIL"], + ), + FileExtension( + name="Autodesk FLI Animation", + extension=".fli", + priority=["pillow", "FLI-PIL"], + ), + FileExtension( + name="Kodak FlashPix", + extension=".fpx", + priority=["pillow", "FPX-PIL"], + ), + FileExtension( + name="Independence War 2: Edge Of Chaos Texture Format", + extension=".ftc", + priority=["pillow", "FTEX-PIL"], + ), + FileExtension( + name="Flexible Image Transport System File", + extension=".fts", + priority=["FITS"], + ), + FileExtension( + name="Independence War 2: Edge Of Chaos Texture Format", + extension=".ftu", + priority=["pillow", "FTEX-PIL"], + ), + FileExtension( + name="Flexible Image Transport System File", + extension=".fz", + priority=["FITS"], + ), + FileExtension( + name="Raw fax format CCITT G.3", + extension=".g3", + priority=["G3-FI"], + ), + FileExtension( + name="GIMP brush file", + extension=".gbr", + priority=["pillow", "GBR-PIL"], + ), + FileExtension( + name="Grassroots DICOM", + extension=".gdcm", + priority=["ITK"], + ), + FileExtension( + name="Graphics Interchange Format", + extension=".gif", + priority=["pillow", "GIF-PIL", "pyav"], + ), + FileExtension( + name="UMDS GIPL", + extension=".gipl", + priority=["ITK"], + ), + FileExtension( + name="gridded meteorological data", + extension=".grib", + priority=["pillow", "GRIB-PIL"], + ), + FileExtension( + name="Hierarchical Data Format 5", + extension=".h5", + priority=["pillow", "HDF5-PIL"], + ), + FileExtension( + name="Hierarchical Data Format 5", + extension=".hdf", + priority=["pillow", "HDF5-PIL"], + ), + FileExtension( + name="Hierarchical Data Format 5", + extension=".hdf5", + priority=["ITK"], + ), + FileExtension( + name="JPEG Extended Range", + extension=".hdp", + priority=["JPEG-XR-FI"], + ), + FileExtension( + name="High Dynamic Range Image", + extension=".hdr", + priority=["HDR-FI", "ITK", "opencv"], + ), + FileExtension( + extension=".ia", + priority=["RAW-FI"], + ), + FileExtension( + extension=".icb", + priority=["pillow"], + ), + FileExtension( + name="Mac OS Icon File", + extension=".icns", + priority=["pillow", "ICNS-PIL"], + ), + FileExtension( + name="Windows Icon File", + extension=".ico", + priority=["pillow", "ICO-FI", "ICO-PIL", "pyav"], + ), + FileExtension( + name="ILBM Interleaved Bitmap", + extension=".iff", + priority=["IFF-FI"], + ), + FileExtension( + name="IPTC/NAA", + extension=".iim", + priority=["pillow", "IPTC-PIL"], + ), + FileExtension( + extension=".iiq", + priority=["RAW-FI"], + ), + FileExtension( + name="IFUNC Image Memory", + extension=".im", + priority=["pillow", "IM-PIL"], + ), + FileExtension( + extension=".img", + priority=["ITK", "GDAL"], + ), + FileExtension( + extension=".img.gz", + priority=["ITK"], + ), + FileExtension( + name="IM Tools", + extension=".IMT", + priority=["pillow", "IMT-PIL"], + ), + FileExtension( + name="Image Processing Lab", + extension=".ipl", + priority=["ITK"], + ), + FileExtension( + name="JPEG 2000", + extension=".j2c", + priority=["pillow", "J2K-FI", "JPEG2000-PIL", "pyav"], + ), + FileExtension( + name="JPEG 2000", + extension=".j2k", + priority=["pillow", "J2K-FI", "JPEG2000-PIL", "pyav"], + ), + FileExtension( + name="JPEG", + extension=".jfif", + priority=["pillow", "JPEG-PIL"], + ), + FileExtension( + name="JPEG", + extension=".jif", + priority=["JPEG-FI"], + ), + FileExtension( + name="JPEG Network Graphics", + extension=".jng", + priority=["JNG-FI"], + ), + FileExtension( + name="JPEG 2000", + extension=".jp2", + priority=["pillow", "JP2-FI", "JPEG2000-PIL", "pyav", "opencv"], + ), + FileExtension( + name="JPEG 2000", + extension=".jpc", + priority=["pillow", "JPEG2000-PIL"], + ), + FileExtension( + name="JPEG", + extension=".jpe", + priority=["pillow", "JPEG-FI", "JPEG-PIL", "opencv"], + ), + FileExtension( + name="Joint Photographic Experts Group", + extension=".jpeg", + priority=["pillow", "JPEG-PIL", "JPEG-FI", "ITK", "GDAL", "pyav", "opencv"], + ), + FileExtension( + name="JPEG 2000", + extension=".jpf", + priority=["pillow", "JPEG2000-PIL"], + ), + FileExtension( + name="Joint Photographic Experts Group", + extension=".jpg", + priority=["pillow", "JPEG-PIL", "JPEG-FI", "ITK", "GDAL", "pyav", "opencv"], + ), + FileExtension( + name="JPEG 2000", + extension=".jpx", + priority=["pillow", "JPEG2000-PIL"], + ), + FileExtension( + name="JPEG Extended Range", + extension=".jxr", + priority=["JPEG-XR-FI"], + ), + FileExtension( + extension=".k25", + priority=["RAW-FI"], + ), + FileExtension( + extension=".kc2", + priority=["RAW-FI"], + ), + FileExtension( + extension=".kdc", + priority=["RAW-FI"], + ), + FileExtension( + name="C64 Koala Graphics", + extension=".koa", + priority=["KOALA-FI"], + ), + FileExtension( + name="ILBM Interleaved Bitmap", + extension=".lbm", + priority=["IFF-FI"], + ), + FileExtension( + name="Lytro F01", + extension=".lfp", + priority=["LYTRO-LFP"], + ), + FileExtension( + name="Lytro Illum", + extension=".lfr", + priority=["LYTRO-LFR"], + ), + FileExtension( + name="ZEISS LSM", + extension=".lsm", + priority=["tifffile", "ITK", "TIFF"], + ), + FileExtension( + name="McIdas area file", + extension=".MCIDAS", + priority=["pillow", "MCIDAS-PIL"], + external_link="https://www.ssec.wisc.edu/mcidas/doc/prog_man/2003print/progman2003-formats.html", + ), + FileExtension( + extension=".mdc", + priority=["RAW-FI"], + ), + FileExtension( + extension=".mef", + priority=["RAW-FI"], + ), + FileExtension( + name="FreeSurfer File Format", + extension=".mgh", + priority=["ITK"], + ), + FileExtension( + name="ITK MetaImage", + extension=".mha", + priority=["ITK"], + ), + FileExtension( + name="ITK MetaImage Header", + extension=".mhd", + priority=["ITK"], + ), + FileExtension( + name="Microsoft Image Composer", + extension=".mic", + priority=["pillow", "MIC-PIL"], + ), + FileExtension( + name="Matroska Multimedia Container", + extension=".mkv", + priority=["FFMPEG", "pyav"], + ), + FileExtension( + name="Medical Imaging NetCDF", + extension=".mnc", + priority=["ITK"], + ), + FileExtension( + name="Medical Imaging NetCDF 2", + extension=".mnc2", + priority=["ITK"], + ), + FileExtension( + name="Leaf Raw Image Format", + extension=".mos", + priority=["RAW-FI"], + ), + FileExtension( + name="QuickTime File Format", + extension=".mov", + priority=["FFMPEG", "pyav"], + ), + FileExtension( + name="MPEG-4 Part 14", + extension=".mp4", + priority=["FFMPEG", "pyav"], + ), + FileExtension( + name="MPEG-1 Moving Picture Experts Group", + extension=".mpeg", + priority=["FFMPEG", "pyav"], + ), + FileExtension( + name="Moving Picture Experts Group", + extension=".mpg", + priority=["pillow", "FFMPEG", "pyav"], + ), + FileExtension( + name="JPEG Multi-Picture Format", + extension=".mpo", + priority=["pillow", "MPO-PIL"], + ), + FileExtension( + name="Magnetic resonance imaging", + extension=".mri", + priority=["DICOM"], + ), + FileExtension( + extension=".mrw", + priority=["RAW-FI"], + ), + FileExtension( + name="Windows Paint", + extension=".msp", + priority=["pillow", "MSP-PIL"], + ), + FileExtension( + extension=".nef", + priority=["RAW-FI", "rawpy"], + ), + FileExtension( + extension=".nhdr", + priority=["ITK"], + ), + FileExtension( + extension=".nia", + priority=["ITK"], + ), + FileExtension( + extension=".nii", + priority=["ITK"], + ), + FileExtension( + name="nii.gz", + extension=".nii.gz", + priority=["ITK"], + ), + FileExtension( + name="Numpy Array", + extension=".npz", + priority=["NPZ"], + volume_support=True, + ), + FileExtension( + extension=".nrrd", + priority=["ITK"], + ), + FileExtension( + extension=".nrw", + priority=["RAW-FI"], + ), + FileExtension( + extension=".orf", + priority=["RAW-FI"], + ), + FileExtension( + extension=".palm", + priority=["pillow"], + ), + FileExtension( + name="Portable Bitmap", + extension=".pbm", + priority=["PGM-FI", "PGMRAW-FI", "pyav", "opencv"], + ), + FileExtension( + name="Kodak PhotoCD", + extension=".pcd", + priority=["pillow", "PCD-FI", "PCD-PIL"], + ), + FileExtension( + name="Macintosh PICT", + extension=".pct", + priority=["PICT-FI"], + ), + FileExtension( + name="Zsoft Paintbrush", + extension=".PCX", + priority=["pillow", "PCX-FI", "PCX-PIL"], + ), + FileExtension( + extension=".pdf", + priority=["pillow"], + ), + FileExtension( + extension=".pef", + priority=["RAW-FI"], + ), + FileExtension( + extension=".pfm", + priority=["PFM-FI", "pyav", "opencv"], + ), + FileExtension( + name="Portable Greymap", + extension=".pgm", + priority=["pillow", "PGM-FI", "PGMRAW-FI", "pyav", "opencv"], + ), + FileExtension( + name="Macintosh PICT", + extension=".pic", + priority=["PICT-FI", "ITK", "opencv"], + ), + FileExtension( + name="Macintosh PICT", + extension=".pict", + priority=["PICT-FI"], + ), + FileExtension( + name="Portable Network Graphics", + extension=".png", + priority=["pillow", "PNG-PIL", "PNG-FI", "ITK", "pyav", "opencv"], + ), + FileExtension( + name="Portable Image Format", + extension=".pnm", + priority=["pillow", "opencv"], + ), + FileExtension( + name="Pbmplus image", + extension=".ppm", + priority=["pillow", "PPM-PIL", "pyav"], + ), + FileExtension( + name="Pbmplus image", + extension=".pbm", + priority=["pillow", "PPM-PIL", "PPM-FI"], + ), + FileExtension( + name="Portable image format", + extension=".pxm", + priority=["opencv"], + ), + FileExtension( + name="Portable Pixelmap (ASCII)", + extension=".ppm", + priority=["PPM-FI", "opencv"], + ), + FileExtension( + name="Portable Pixelmap (Raw)", + extension=".ppm", + priority=["PPMRAW-FI"], + ), + FileExtension( + name="Ghostscript", + extension=".ps", + priority=["pillow", "EPS-PIL"], + ), + FileExtension( + name="Adope Photoshop 2.5 and 3.0", + extension=".psd", + priority=["pillow", "PSD-PIL", "PSD-FI"], + ), + FileExtension( + extension=".ptx", + priority=["RAW-FI"], + ), + FileExtension( + extension=".pxn", + priority=["RAW-FI"], + ), + FileExtension( + name="PIXAR raster image", + extension=".pxr", + priority=["pillow", "PIXAR-PIL"], + ), + FileExtension( + extension=".qtk", + priority=["RAW-FI"], + ), + FileExtension( + extension=".raf", + priority=["RAW-FI"], + ), + FileExtension( + name="Sun Raster File", + extension=".ras", + priority=["pillow", "SUN-PIL", "RAS-FI", "pyav", "opencv"], + ), + FileExtension( + name="Sun Raster File", + extension=".sr", + priority=["opencv"], + ), + FileExtension( + extension=".raw", + priority=["RAW-FI", "LYTRO-ILLUM-RAW", "LYTRO-F01-RAW", "rawpy"], + ), + FileExtension( + extension=".rdc", + priority=["RAW-FI"], + ), + FileExtension( + name="Silicon Graphics Image", + extension=".rgb", + priority=["pillow", "SGI-PIL"], + ), + FileExtension( + name="Silicon Graphics Image", + extension=".rgba", + priority=["pillow", "SGI-PIL"], + ), + FileExtension( + extension=".rw2", + priority=["RAW-FI"], + ), + FileExtension( + extension=".rwl", + priority=["RAW-FI"], + ), + FileExtension( + extension=".rwz", + priority=["RAW-FI"], + ), + FileExtension( + name="Silicon Graphics Image", + extension=".sgi", + priority=["pillow", "SGI-PIL", "pyav"], + ), + FileExtension( + name="SPE File Format", + extension=".spe", + priority=["SPE"], + ), + FileExtension( + extension=".SPIDER", + priority=["pillow", "SPIDER-PIL"], + ), + FileExtension( + extension=".sr2", + priority=["RAW-FI"], + ), + FileExtension( + extension=".srf", + priority=["RAW-FI"], + ), + FileExtension( + extension=".srw", + priority=["RAW-FI"], + ), + FileExtension( + extension=".sti", + priority=["RAW-FI"], + ), + FileExtension( + extension=".stk", + priority=["tifffile", "TIFF"], + ), + FileExtension( + name="ShockWave Flash", + extension=".swf", + priority=["SWF", "pyav"], + ), + FileExtension( + name="Truevision TGA", + extension=".targa", + priority=["pillow", "TARGA-FI"], + ), + FileExtension( + name="Truevision TGA", + extension=".tga", + priority=["pillow", "TGA-PIL", "TARGA-FI", "pyav"], + ), + FileExtension( + name="Tagged Image File", + extension=".tif", + priority=[ + "tifffile", + "TIFF", + "pillow", + "TIFF-PIL", + "TIFF-FI", + "FEI", + "ITK", + "GDAL", + "pyav", + "opencv", + ], + volume_support=True, + ), + FileExtension( + name="Tagged Image File Format", + extension=".tiff", + priority=[ + "tifffile", + "TIFF", + "pillow", + "TIFF-PIL", + "TIFF-FI", + "FEI", + "ITK", + "GDAL", + "pyav", + "opencv", + ], + volume_support=True, + ), + FileExtension( + extension=".vda", + priority=["pillow"], + ), + FileExtension( + extension=".vst", + priority=["pillow"], + ), + FileExtension( + extension=".vtk", + priority=["ITK"], + ), + FileExtension( + name="Wireless Bitmap", + extension=".wap", + priority=["WBMP-FI"], + ), + FileExtension( + name="Wireless Bitmap", + extension=".wbm", + priority=["WBMP-FI"], + ), + FileExtension( + name="Wireless Bitmap", + extension=".wbmp", + priority=["WBMP-FI"], + ), + FileExtension( + name="JPEG Extended Range", + extension=".wdp", + priority=["JPEG-XR-FI"], + ), + FileExtension( + name="Matroska", + extension=".webm", + priority=["FFMPEG", "pyav"], + ), + FileExtension( + name="Google WebP", + extension=".webp", + priority=["pillow", "WEBP-FI", "pyav", "opencv"], + ), + FileExtension( + name="Windows Meta File", + extension=".wmf", + priority=["pillow", "WMF-PIL"], + ), + FileExtension( + name="Windows Media Video", + extension=".wmv", + priority=["FFMPEG"], + ), + FileExtension( + name="X11 Bitmap", + extension=".xbm", + priority=["pillow", "XBM-PIL", "XBM-FI", "pyav"], + ), + FileExtension( + name="X11 Pixel Map", + extension=".xpm", + priority=["pillow", "XPM-PIL", "XPM-FI"], + ), + FileExtension( + name="Thumbnail Image", + extension=".XVTHUMB", + priority=["pillow", "XVTHUMB-PIL"], + ), + FileExtension( + extension=".dpx", + priority=["pyav"], + ), + FileExtension( + extension=".im1", + priority=["pyav"], + ), + FileExtension( + extension=".im24", + priority=["pyav"], + ), + FileExtension( + extension=".im8", + priority=["pyav"], + ), + FileExtension( + extension=".jls", + priority=["pyav"], + ), + FileExtension( + extension=".ljpg", + priority=["pyav"], + ), + FileExtension( + extension=".pam", + priority=["pyav"], + ), + FileExtension( + extension=".pcx", + priority=["pyav"], + ), + FileExtension( + extension=".pgmyuv", + priority=["pyav"], + ), + FileExtension( + extension=".pix", + priority=["pyav"], + ), + FileExtension( + extension=".ppm", + priority=["pyav"], + ), + FileExtension( + extension=".rs", + priority=["pyav"], + ), + FileExtension( + extension=".sun", + priority=["pyav"], + ), + FileExtension( + extension=".sunras", + priority=["pyav"], + ), + FileExtension( + extension=".xface", + priority=["pyav"], + ), + FileExtension( + extension=".xwd", + priority=["pyav"], + ), + FileExtension( + extension=".y", + priority=["pyav"], + ), + FileExtension( + name="3GP (3GPP file format)", + extension=".3g2", + priority=["pyav"], + ), + FileExtension( + name="3GP (3GPP file format)", + extension=".3gp", + priority=["pyav"], + ), + FileExtension( + name="3GP (3GPP file format)", + extension=".f4v", + priority=["pyav"], + ), + FileExtension( + name="3GP (3GPP file format)", + extension=".ism", + priority=["pyav"], + ), + FileExtension( + name="3GP (3GPP file format)", + extension=".isma", + priority=["pyav"], + ), + FileExtension( + name="3GP (3GPP file format)", + extension=".ismv", + priority=["pyav"], + ), + FileExtension( + name="3GP (3GPP file format)", + extension=".m4a", + priority=["pyav"], + ), + FileExtension( + name="3GP (3GPP file format)", + extension=".m4b", + priority=["pyav"], + ), + FileExtension( + name="3GP (3GPP file format)", + extension=".mj2", + priority=["pyav"], + ), + FileExtension( + name="3GP (3GPP file format)", + extension=".psp", + priority=["pyav"], + ), + FileExtension( + name="3GP2 (3GPP2 file format)", + extension=".3g2", + priority=["pyav"], + ), + FileExtension( + name="3GP2 (3GPP2 file format)", + extension=".3gp", + priority=["pyav"], + ), + FileExtension( + name="3GP2 (3GPP2 file format)", + extension=".f4v", + priority=["pyav"], + ), + FileExtension( + name="3GP2 (3GPP2 file format)", + extension=".ism", + priority=["pyav"], + ), + FileExtension( + name="3GP2 (3GPP2 file format)", + extension=".isma", + priority=["pyav"], + ), + FileExtension( + name="3GP2 (3GPP2 file format)", + extension=".ismv", + priority=["pyav"], + ), + FileExtension( + name="3GP2 (3GPP2 file format)", + extension=".m4a", + priority=["pyav"], + ), + FileExtension( + name="3GP2 (3GPP2 file format)", + extension=".m4b", + priority=["pyav"], + ), + FileExtension( + name="3GP2 (3GPP2 file format)", + extension=".mj2", + priority=["pyav"], + ), + FileExtension( + name="3GP2 (3GPP2 file format)", + extension=".psp", + priority=["pyav"], + ), + FileExtension( + name="3GPP AMR", + extension=".amr", + priority=["pyav"], + ), + FileExtension( + name="a64 - video for Commodore 64", + extension=".A64", + priority=["pyav"], + ), + FileExtension( + name="a64 - video for Commodore 64", + extension=".a64", + priority=["pyav"], + ), + FileExtension( + name="Adobe Filmstrip", + extension=".flm", + priority=["pyav"], + ), + FileExtension( + name="AMV", + extension=".amv", + priority=["pyav"], + ), + FileExtension( + name="ASF (Advanced / Active Streaming Format)", + extension=".asf", + priority=["pyav"], + ), + FileExtension( + name="ASF (Advanced / Active Streaming Format)", + extension=".asf", + priority=["pyav"], + ), + FileExtension( + name="ASF (Advanced / Active Streaming Format)", + extension=".wmv", + priority=["pyav"], + ), + FileExtension( + name="ASF (Advanced / Active Streaming Format)", + extension=".wmv", + priority=["pyav"], + ), + FileExtension( + name="AV1 Annex B", + extension=".obu", + priority=["pyav"], + ), + FileExtension( + name="AV1 low overhead OBU", + extension=".obu", + priority=["pyav"], + ), + FileExtension( + name="AVI (Audio Video Interleaved)", + extension=".avi", + priority=["pyav"], + ), + FileExtension( + name="AVR (Audio Visual Research)", + extension=".avr", + priority=["pyav"], + ), + FileExtension( + name="Beam Software SIFF", + extension=".vb", + priority=["pyav"], + ), + FileExtension( + name="CD Graphics", + extension=".cdg", + priority=["pyav"], + ), + FileExtension( + name="Commodore CDXL video", + extension=".cdxl", + priority=["pyav"], + ), + FileExtension( + name="Commodore CDXL video", + extension=".xl", + priority=["pyav"], + ), + FileExtension( + name="DASH Muxer", + extension=".mpd", + priority=["pyav"], + ), + FileExtension( + name="Digital Pictures SGA", + extension=".sga", + priority=["pyav"], + ), + FileExtension( + name="Discworld II BMV", + extension=".bmv", + priority=["pyav"], + ), + FileExtension( + name="DV (Digital Video)", + extension=".dif", + priority=["pyav"], + ), + FileExtension( + name="DV (Digital Video)", + extension=".dv", + priority=["pyav"], + ), + FileExtension( + name="F4V Adobe Flash Video", + extension=".f4v", + priority=["pyav"], + ), + FileExtension( + name="FLV (Flash Video)", + extension=".flv", + priority=["pyav"], + ), + FileExtension( + name="GXF (General eXchange Format)", + extension=".gxf", + priority=["pyav"], + ), + FileExtension( + name="iCE Draw File", + extension=".idf", + priority=["pyav"], + ), + FileExtension( + name="IFV CCTV DVR", + extension=".ifv", + priority=["pyav"], + ), + FileExtension( + name="iPod H.264 MP4 (MPEG-4 Part 14)", + extension=".m4a", + priority=["pyav"], + ), + FileExtension( + name="iPod H.264 MP4 (MPEG-4 Part 14)", + extension=".m4b", + priority=["pyav"], + ), + FileExtension( + name="iPod H.264 MP4 (MPEG-4 Part 14)", + extension=".m4v", + priority=["pyav"], + ), + FileExtension( + name="IVR (Internet Video Recording)", + extension=".ivr", + priority=["pyav"], + ), + FileExtension( + name="Konami PS2 SVAG", + extension=".svag", + priority=["pyav"], + ), + FileExtension( + name="KUX (YouKu)", + extension=".kux", + priority=["pyav"], + ), + FileExtension( + name="live RTMP FLV (Flash Video)", + extension=".flv", + priority=["pyav"], + ), + FileExtension( + name="Loki SDL MJPEG", + extension=".mjpg", + priority=["pyav"], + ), + FileExtension( + name="LVF", + extension=".lvf", + priority=["pyav"], + ), + FileExtension( + name="Matroska / WebM", + extension=".mk3d", + priority=["pyav"], + ), + FileExtension( + name="Matroska / WebM", + extension=".mka", + priority=["pyav"], + ), + FileExtension( + name="Matroska / WebM", + extension=".mks", + priority=["pyav"], + ), + FileExtension( + name="Microsoft XMV", + extension=".xmv", + priority=["pyav"], + ), + FileExtension( + name="MIME multipart JPEG", + extension=".mjpg", + priority=["pyav"], + ), + FileExtension( + name="MobiClip MODS", + extension=".mods", + priority=["pyav"], + ), + FileExtension( + name="MobiClip MOFLEX", + extension=".moflex", + priority=["pyav"], + ), + FileExtension( + name="Motion Pixels MVI", + extension=".mvi", + priority=["pyav"], + ), + FileExtension( + name="MP4 (MPEG-4 Part 14)", + extension=".3g2", + priority=["pyav"], + ), + FileExtension( + name="MP4 (MPEG-4 Part 14)", + extension=".3gp", + priority=["pyav"], + ), + FileExtension( + name="MP4 (MPEG-4 Part 14)", + extension=".f4v", + priority=["pyav"], + ), + FileExtension( + name="MP4 (MPEG-4 Part 14)", + extension=".ism", + priority=["pyav"], + ), + FileExtension( + name="MP4 (MPEG-4 Part 14)", + extension=".isma", + priority=["pyav"], + ), + FileExtension( + name="MP4 (MPEG-4 Part 14)", + extension=".ismv", + priority=["pyav"], + ), + FileExtension( + name="MP4 (MPEG-4 Part 14)", + extension=".m4a", + priority=["pyav"], + ), + FileExtension( + name="MP4 (MPEG-4 Part 14)", + extension=".m4b", + priority=["pyav"], + ), + FileExtension( + name="MP4 (MPEG-4 Part 14)", + extension=".mj2", + priority=["pyav"], + ), + FileExtension( + name="MP4 (MPEG-4 Part 14)", + extension=".psp", + priority=["pyav"], + ), + FileExtension( + name="MPEG-2 PS (DVD VOB)", + extension=".dvd", + priority=["pyav"], + ), + FileExtension( + name="MPEG-2 PS (SVCD)", + extension=".vob", + priority=["pyav"], + ), + FileExtension( + name="MPEG-2 PS (VOB)", + extension=".vob", + priority=["pyav"], + ), + FileExtension( + name="MPEG-TS (MPEG-2 Transport Stream)", + extension=".m2t", + priority=["pyav"], + ), + FileExtension( + name="MPEG-TS (MPEG-2 Transport Stream)", + extension=".m2ts", + priority=["pyav"], + ), + FileExtension( + name="MPEG-TS (MPEG-2 Transport Stream)", + extension=".mts", + priority=["pyav"], + ), + FileExtension( + name="MPEG-TS (MPEG-2 Transport Stream)", + extension=".ts", + priority=["pyav"], + ), + FileExtension( + name="Musepack", + extension=".mpc", + priority=["pyav"], + ), + FileExtension( + name="MXF (Material eXchange Format) Operational Pattern Atom", + extension=".mxf", + priority=["pyav"], + ), + FileExtension( + name="MXF (Material eXchange Format)", + extension=".mxf", + priority=["pyav"], + ), + FileExtension( + name="MxPEG clip", + extension=".mxg", + priority=["pyav"], + ), + FileExtension( + name="NC camera feed", + extension=".v", + priority=["pyav"], + ), + FileExtension( + name="NUT", + extension=".nut", + priority=["pyav"], + ), + FileExtension( + name="Ogg Video", + extension=".ogv", + priority=["pyav"], + ), + FileExtension( + name="Ogg", + extension=".ogg", + priority=["pyav"], + ), + FileExtension( + name="On2 IVF", + extension=".ivf", + priority=["pyav"], + ), + FileExtension( + name="PSP MP4 (MPEG-4 Part 14)", + extension=".psp", + priority=["pyav"], + ), + FileExtension( + name="Psygnosis YOP", + extension=".yop", + priority=["pyav"], + ), + FileExtension( + name="QuickTime / MOV", + extension=".3g2", + priority=["pyav"], + ), + FileExtension( + name="QuickTime / MOV", + extension=".3gp", + priority=["pyav"], + ), + FileExtension( + name="QuickTime / MOV", + extension=".f4v", + priority=["pyav"], + ), + FileExtension( + name="QuickTime / MOV", + extension=".ism", + priority=["pyav"], + ), + FileExtension( + name="QuickTime / MOV", + extension=".isma", + priority=["pyav"], + ), + FileExtension( + name="QuickTime / MOV", + extension=".ismv", + priority=["pyav"], + ), + FileExtension( + name="QuickTime / MOV", + extension=".m4a", + priority=["pyav"], + ), + FileExtension( + name="QuickTime / MOV", + extension=".m4b", + priority=["pyav"], + ), + FileExtension( + name="QuickTime / MOV", + extension=".mj2", + priority=["pyav"], + ), + FileExtension( + name="QuickTime / MOV", + extension=".psp", + priority=["pyav"], + ), + FileExtension( + name="raw AVS2-P2/IEEE1857.4 video", + extension=".avs", + priority=["pyav"], + ), + FileExtension( + name="raw AVS2-P2/IEEE1857.4 video", + extension=".avs2", + priority=["pyav"], + ), + FileExtension( + name="raw AVS3-P2/IEEE1857.10", + extension=".avs3", + priority=["pyav"], + ), + FileExtension( + name="raw Chinese AVS (Audio Video Standard) video", + extension=".cavs", + priority=["pyav"], + ), + FileExtension( + name="raw Dirac", + extension=".drc", + priority=["pyav"], + ), + FileExtension( + name="raw Dirac", + extension=".vc2", + priority=["pyav"], + ), + FileExtension( + name="raw DNxHD (SMPTE VC-3)", + extension=".dnxhd", + priority=["pyav"], + ), + FileExtension( + name="raw DNxHD (SMPTE VC-3)", + extension=".dnxhr", + priority=["pyav"], + ), + FileExtension( + name="raw GSM", + extension=".gsm", + priority=["pyav"], + ), + FileExtension( + name="raw H.261", + extension=".h261", + priority=["pyav"], + ), + FileExtension( + name="raw H.263", + extension=".h263", + priority=["pyav"], + ), + FileExtension( + name="raw H.264 video", + extension=".264", + priority=["pyav"], + ), + FileExtension( + name="raw H.264 video", + extension=".avc", + priority=["pyav"], + ), + FileExtension( + name="raw H.264 video", + extension=".h264", + priority=["pyav", "FFMPEG"], + ), + FileExtension( + name="raw H.264 video", + extension=".h26l", + priority=["pyav"], + ), + FileExtension( + name="raw HEVC video", + extension=".265", + priority=["pyav"], + ), + FileExtension( + name="raw HEVC video", + extension=".h265", + priority=["pyav"], + ), + FileExtension( + name="raw HEVC video", + extension=".hevc", + priority=["pyav"], + ), + FileExtension( + name="raw id RoQ", + extension=".roq", + priority=["pyav"], + ), + FileExtension( + name="raw Ingenient MJPEG", + extension=".cgi", + priority=["pyav"], + ), + FileExtension( + name="raw IPU Video", + extension=".ipu", + priority=["pyav"], + ), + FileExtension( + name="raw MJPEG 2000 video", + extension=".j2k", + priority=["pyav"], + ), + FileExtension( + name="raw MJPEG video", + extension=".mjpeg", + priority=["pyav"], + ), + FileExtension( + name="raw MJPEG video", + extension=".mjpg", + priority=["pyav"], + ), + FileExtension( + name="raw MJPEG video", + extension=".mpo", + priority=["pyav"], + ), + FileExtension( + name="raw MPEG-1 video", + extension=".m1v", + priority=["pyav"], + ), + FileExtension( + name="raw MPEG-1 video", + extension=".mpeg", + priority=["pyav"], + ), + FileExtension( + name="raw MPEG-1 video", + extension=".mpg", + priority=["pyav"], + ), + FileExtension( + name="raw MPEG-2 video", + extension=".m2v", + priority=["pyav"], + ), + FileExtension( + name="raw MPEG-4 video", + extension=".m4v", + priority=["pyav"], + ), + FileExtension( + name="raw VC-1 video", + extension=".vc1", + priority=["pyav"], + ), + FileExtension( + name="raw video", + extension=".cif", + priority=["pyav"], + ), + FileExtension( + name="raw video", + extension=".qcif", + priority=["pyav"], + ), + FileExtension( + name="raw video", + extension=".rgb", + priority=["pyav"], + ), + FileExtension( + name="raw video", + extension=".yuv", + priority=["pyav"], + ), + FileExtension( + name="RealMedia", + extension=".rm", + priority=["pyav"], + ), + FileExtension( + name="SDR2", + extension=".sdr2", + priority=["pyav"], + ), + FileExtension( + name="Sega FILM / CPK", + extension=".cpk", + priority=["pyav"], + ), + FileExtension( + name="SER (Simple uncompressed video format for astronomical capturing)", + extension=".ser", + priority=["pyav"], + ), + FileExtension( + name="Simbiosis Interactive IMX", + extension=".imx", + priority=["pyav"], + ), + FileExtension( + name="Square SVS", + extension=".svs", + priority=["tifffile", "pyav"], + ), + FileExtension( + name="TiVo TY Stream", + extension=".ty", + priority=["pyav"], + ), + FileExtension( + name="TiVo TY Stream", + extension=".ty+", + priority=["pyav"], + ), + FileExtension( + name="Uncompressed 4:2:2 10-bit", + extension=".v210", + priority=["pyav"], + ), + FileExtension( + name="Uncompressed 4:2:2 10-bit", + extension=".yuv10", + priority=["pyav"], + ), + FileExtension( + name="VC-1 test bitstream", + extension=".rcv", + priority=["pyav"], + ), + FileExtension( + name="Video CCTV DAT", + extension=".dat", + priority=["pyav"], + ), + FileExtension( + name="Video DAV", + extension=".dav", + priority=["pyav"], + ), + FileExtension( + name="Vivo", + extension=".viv", + priority=["pyav"], + ), + FileExtension( + name="WebM Chunk Muxer", + extension=".chk", + priority=["pyav"], + ), + FileExtension( + name="WebM", + extension=".mk3d", + priority=["pyav"], + ), + FileExtension( + name="WebM", + extension=".mka", + priority=["pyav"], + ), + FileExtension( + name="WebM", + extension=".mks", + priority=["pyav"], + ), + FileExtension( + name="Windows Television (WTV)", + extension=".wtv", + priority=["pyav"], + ), + FileExtension( + name="Xilam DERF", + extension=".adp", + priority=["pyav"], + ), + FileExtension( + name="YUV4MPEG pipe", + extension=".y4m", + priority=["pyav"], + ), + FileExtension( + extension=".qpi", + priority=["tifffile"], + ), + FileExtension( + name="PCO Camera", + extension=".pcoraw", + priority=["tifffile"], + ), + FileExtension( + name="PCO Camera", + extension=".rec", + priority=["tifffile"], + ), + FileExtension( + name="Perkin Elmer Vectra", + extension=".qptiff", + priority=["tifffile"], + ), + FileExtension( + name="Pyramid Encoded TIFF", + extension=".ptiff", + priority=["tifffile"], + ), + FileExtension( + name="Pyramid Encoded TIFF", + extension=".ptif", + priority=["tifffile"], + ), + FileExtension( + name="Opticks Gel", + extension=".gel", + priority=["tifffile"], + ), + FileExtension( + name="Zoomify Image Format", + extension=".zif", + priority=["tifffile"], + ), + FileExtension( + name="Hamamatsu Slide Scanner", + extension=".ndpi", + priority=["tifffile"], + ), + FileExtension( + name="Roche Digital Pathology", + extension=".bif", + priority=["tifffile"], + ), + FileExtension( + extension=".tf8", + priority=["tifffile"], + ), + FileExtension( + extension=".btf", + priority=["tifffile"], + ), + FileExtension( + name="High Efficiency Image File Format", + extension=".heic", + priority=["pillow"], + ), + FileExtension( + name="AV1 Image File Format", + extension=".avif", + priority=["pillow"], + ), +] +extension_list.sort(key=lambda x: x.extension) + + +known_extensions = dict() +for ext in extension_list: + if ext.extension not in known_extensions: + known_extensions[ext.extension] = list() + known_extensions[ext.extension].append(ext) + +extension_list = [ext for ext_list in known_extensions.values() for ext in ext_list] + +_video_extension_strings = [ + ".264", + ".265", + ".3g2", + ".3gp", + ".a64", + ".A64", + ".adp", + ".amr", + ".amv", + ".asf", + ".avc", + ".avi", + ".avr", + ".avs", + ".avs2", + ".avs3", + ".bmv", + ".cavs", + ".cdg", + ".cdxl", + ".cgi", + ".chk", + ".cif", + ".cpk", + ".dat", + ".dav", + ".dif", + ".dnxhd", + ".dnxhr", + ".drc", + ".dv", + ".dvd", + ".f4v", + ".flm", + ".flv", + ".gsm", + ".gxf", + ".h261", + ".h263", + ".h264", + ".h265", + ".h26l", + ".hevc", + ".idf", + ".ifv", + ".imx", + ".ipu", + ".ism", + ".isma", + ".ismv", + ".ivf", + ".ivr", + ".j2k", + ".kux", + ".lvf", + ".m1v", + ".m2t", + ".m2ts", + ".m2v", + ".m4a", + ".m4b", + ".m4v", + ".mj2", + ".mjpeg", + ".mjpg", + ".mk3d", + ".mka", + ".mks", + ".mkv", + ".mods", + ".moflex", + ".mov", + ".mp4", + ".mpc", + ".mpd", + ".mpeg", + ".mpg", + ".mpo", + ".mts", + ".mvi", + ".mxf", + ".mxg", + ".nut", + ".obu", + ".ogg", + ".ogv", + ".psp", + ".qcif", + ".rcv", + ".rgb", + ".rm", + ".roq", + ".sdr2", + ".ser", + ".sga", + ".svag", + ".svs", + ".ts", + ".ty", + ".ty+", + ".v", + ".v210", + ".vb", + ".vc1", + ".vc2", + ".viv", + ".vob", + ".webm", + ".wmv", + ".wtv", + ".xl", + ".xmv", + ".y4m", + ".yop", + ".yuv", + ".yuv10", +] +video_extensions = list() +for ext_string in _video_extension_strings: + formats = known_extensions[ext_string] + video_extensions.append(formats[0]) +video_extensions.sort(key=lambda x: x.extension) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/extensions.pyi b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/extensions.pyi new file mode 100644 index 0000000000000000000000000000000000000000..4e0f4fcbbc12531b7988ab22e0fee1264a458437 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/extensions.pyi @@ -0,0 +1,24 @@ +from typing import List, Dict, Optional + +class FileExtension: + extension: str + priority: List[str] + name: Optional[str] = None + description: Optional[str] = None + external_link: Optional[str] = None + volume_support: bool + + def __init__( + self, + *, + extension: str, + priority: List[str], + name: str = None, + description: str = None, + external_link: str = None, + ) -> None: ... + def reset(self) -> None: ... + +extension_list: List[FileExtension] +known_extensions: Dict[str, List[FileExtension]] +video_extensions: List[FileExtension] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/plugins.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/plugins.py new file mode 100644 index 0000000000000000000000000000000000000000..261dcfb17794fa0695f3e4393dfe9f8ebc72d9bd --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/plugins.py @@ -0,0 +1,782 @@ +import importlib + +from ..core.legacy_plugin_wrapper import LegacyPlugin + + +class PluginConfig: + """Plugin Configuration Metadata + + This class holds the information needed to lazy-import plugins. + + Parameters + ---------- + name : str + The name of the plugin. + class_name : str + The name of the plugin class inside the plugin module. + module_name : str + The name of the module/package from which to import the plugin. + is_legacy : bool + If True, this plugin is a v2 plugin and will be wrapped in a + LegacyPlugin. Default: False. + package_name : str + If the given module name points to a relative module, then the package + name determines the package it is relative to. + install_name : str + The name of the optional dependency that can be used to install this + plugin if it is missing. + legacy_args : Dict + A dictionary of kwargs to pass to the v2 plugin (Format) upon construction. + + Examples + -------- + >>> PluginConfig( + name="TIFF", + class_name="TiffFormat", + module_name="imageio.plugins.tifffile", + is_legacy=True, + install_name="tifffile", + legacy_args={ + "description": "TIFF format", + "extensions": ".tif .tiff .stk .lsm", + "modes": "iIvV", + }, + ) + >>> PluginConfig( + name="pillow", + class_name="PillowPlugin", + module_name="imageio.plugins.pillow" + ) + + """ + + def __init__( + self, + name, + class_name, + module_name, + *, + is_legacy=False, + package_name=None, + install_name=None, + legacy_args=None, + ): + legacy_args = legacy_args or dict() + + self.name = name + self.class_name = class_name + self.module_name = module_name + self.package_name = package_name + + self.is_legacy = is_legacy + self.install_name = install_name or self.name + self.legacy_args = {"name": name, "description": "A legacy plugin"} + self.legacy_args.update(legacy_args) + + @property + def format(self): + """For backwards compatibility with FormatManager + + Delete when migrating to v3 + """ + if not self.is_legacy: + raise RuntimeError("Can only get format for legacy plugins.") + + module = importlib.import_module(self.module_name, self.package_name) + clazz = getattr(module, self.class_name) + return clazz(**self.legacy_args) + + @property + def plugin_class(self): + """Get the plugin class (import if needed) + + Returns + ------- + plugin_class : Any + The class that can be used to instantiate plugins. + + """ + + module = importlib.import_module(self.module_name, self.package_name) + clazz = getattr(module, self.class_name) + + if self.is_legacy: + legacy_plugin = clazz(**self.legacy_args) + + def partial_legacy_plugin(request): + return LegacyPlugin(request, legacy_plugin) + + clazz = partial_legacy_plugin + + return clazz + + +known_plugins = dict() +known_plugins["pillow"] = PluginConfig( + name="pillow", class_name="PillowPlugin", module_name="imageio.plugins.pillow" +) +known_plugins["pyav"] = PluginConfig( + name="pyav", class_name="PyAVPlugin", module_name="imageio.plugins.pyav" +) +known_plugins["opencv"] = PluginConfig( + name="opencv", class_name="OpenCVPlugin", module_name="imageio.plugins.opencv" +) +known_plugins["tifffile"] = PluginConfig( + name="tifffile", + class_name="TifffilePlugin", + module_name="imageio.plugins.tifffile_v3", +) +known_plugins["SPE"] = PluginConfig( + name="spe", class_name="SpePlugin", module_name="imageio.plugins.spe" +) +known_plugins["rawpy"] = PluginConfig( + name="rawpy", class_name="RawPyPlugin", module_name="imageio.plugins.rawpy" +) + +# Legacy plugins +# ============== +# +# Which are partly registered by format, partly by plugin, and partly by a mix +# of both. We keep the naming here for backwards compatibility. +# In v3 this should become a single entry per plugin named after the plugin +# We can choose extension-specific priority in ``config.extensions``. +# +# Note: Since python 3.7 order of insertion determines the order of dict().keys() +# This means that the order here determines the order by which plugins are +# checked during the full fallback search. We don't advertise this downstream, +# but it could be a useful thing to keep in mind to choose a sensible default +# search order. + +known_plugins["TIFF"] = PluginConfig( + name="TIFF", + class_name="TiffFormat", + module_name="imageio.plugins.tifffile", + is_legacy=True, + install_name="tifffile", + legacy_args={ + "description": "TIFF format", + "extensions": ".tif .tiff .stk .lsm", + "modes": "iIvV", + }, +) + +# PILLOW plugin formats (legacy) +PILLOW_FORMATS = [ + ("BMP", "Windows Bitmap", ".bmp", "PillowFormat"), + ("BUFR", "BUFR", ".bufr", "PillowFormat"), + ("CUR", "Windows Cursor", ".cur", "PillowFormat"), + ("DCX", "Intel DCX", ".dcx", "PillowFormat"), + ("DDS", "DirectDraw Surface", ".dds", "PillowFormat"), + ("DIB", "Windows Bitmap", "", "PillowFormat"), + ("EPS", "Encapsulated Postscript", ".ps .eps", "PillowFormat"), + ("FITS", "FITS", ".fit .fits", "PillowFormat"), + ("FLI", "Autodesk FLI/FLC Animation", ".fli .flc", "PillowFormat"), + ("FPX", "FlashPix", ".fpx", "PillowFormat"), + ("FTEX", "Texture File Format (IW2:EOC)", ".ftc .ftu", "PillowFormat"), + ("GBR", "GIMP brush file", ".gbr", "PillowFormat"), + ("GIF", "Compuserve GIF", ".gif", "GIFFormat"), + ("GRIB", "GRIB", ".grib", "PillowFormat"), + ("HDF5", "HDF5", ".h5 .hdf", "PillowFormat"), + ("ICNS", "Mac OS icns resource", ".icns", "PillowFormat"), + ("ICO", "Windows Icon", ".ico", "PillowFormat"), + ("IM", "IFUNC Image Memory", ".im", "PillowFormat"), + ("IMT", "IM Tools", "", "PillowFormat"), + ("IPTC", "IPTC/NAA", ".iim", "PillowFormat"), + ("JPEG", "JPEG (ISO 10918)", ".jfif .jpe .jpg .jpeg", "JPEGFormat"), + ( + "JPEG2000", + "JPEG 2000 (ISO 15444)", + ".jp2 .j2k .jpc .jpf .jpx .j2c", + "JPEG2000Format", + ), + ("MCIDAS", "McIdas area file", "", "PillowFormat"), + ("MIC", "Microsoft Image Composer", ".mic", "PillowFormat"), + # skipped in legacy pillow + # ("MPEG", "MPEG", ".mpg .mpeg", "PillowFormat"), + ("MPO", "MPO (CIPA DC-007)", ".mpo", "PillowFormat"), + ("MSP", "Windows Paint", ".msp", "PillowFormat"), + ("PCD", "Kodak PhotoCD", ".pcd", "PillowFormat"), + ("PCX", "Paintbrush", ".pcx", "PillowFormat"), + ("PIXAR", "PIXAR raster image", ".pxr", "PillowFormat"), + ("PNG", "Portable network graphics", ".png", "PNGFormat"), + ("PPM", "Pbmplus image", ".pbm .pgm .ppm", "PillowFormat"), + ("PSD", "Adobe Photoshop", ".psd", "PillowFormat"), + ("SGI", "SGI Image File Format", ".bw .rgb .rgba .sgi", "PillowFormat"), + ("SPIDER", "Spider 2D image", "", "PillowFormat"), + ("SUN", "Sun Raster File", ".ras", "PillowFormat"), + ("TGA", "Targa", ".tga", "PillowFormat"), + ("TIFF", "Adobe TIFF", ".tif .tiff", "TIFFFormat"), + ("WMF", "Windows Metafile", ".wmf .emf", "PillowFormat"), + ("XBM", "X11 Bitmap", ".xbm", "PillowFormat"), + ("XPM", "X11 Pixel Map", ".xpm", "PillowFormat"), + ("XVTHUMB", "XV thumbnail image", "", "PillowFormat"), +] +for id, summary, ext, class_name in PILLOW_FORMATS: + config = PluginConfig( + name=id.upper() + "-PIL", + class_name=class_name, + module_name="imageio.plugins.pillow_legacy", + is_legacy=True, + install_name="pillow", + legacy_args={ + "description": summary + " via Pillow", + "extensions": ext, + "modes": "iI" if class_name == "GIFFormat" else "i", + "plugin_id": id, + }, + ) + known_plugins[config.name] = config + +known_plugins["FFMPEG"] = PluginConfig( + name="FFMPEG", + class_name="FfmpegFormat", + module_name="imageio.plugins.ffmpeg", + is_legacy=True, + install_name="ffmpeg", + legacy_args={ + "description": "Many video formats and cameras (via ffmpeg)", + "extensions": ".mov .avi .mpg .mpeg .mp4 .mkv .webm .wmv .h264", + "modes": "I", + }, +) + +known_plugins["BSDF"] = PluginConfig( + name="BSDF", + class_name="BsdfFormat", + module_name="imageio.plugins.bsdf", + is_legacy=True, + install_name="bsdf", + legacy_args={ + "description": "Format based on the Binary Structured Data Format", + "extensions": ".bsdf", + "modes": "iIvV", + }, +) + +known_plugins["DICOM"] = PluginConfig( + name="DICOM", + class_name="DicomFormat", + module_name="imageio.plugins.dicom", + is_legacy=True, + install_name="dicom", + legacy_args={ + "description": "Digital Imaging and Communications in Medicine", + "extensions": ".dcm .ct .mri", + "modes": "iIvV", + }, +) + +known_plugins["FEI"] = PluginConfig( + name="FEI", + class_name="FEISEMFormat", + module_name="imageio.plugins.feisem", + is_legacy=True, + install_name="feisem", + legacy_args={ + "description": "FEI-SEM TIFF format", + "extensions": [".tif", ".tiff"], + "modes": "iv", + }, +) + +known_plugins["FITS"] = PluginConfig( + name="FITS", + class_name="FitsFormat", + module_name="imageio.plugins.fits", + is_legacy=True, + install_name="fits", + legacy_args={ + "description": "Flexible Image Transport System (FITS) format", + "extensions": ".fits .fit .fts .fz", + "modes": "iIvV", + }, +) + +known_plugins["GDAL"] = PluginConfig( + name="GDAL", + class_name="GdalFormat", + module_name="imageio.plugins.gdal", + is_legacy=True, + install_name="gdal", + legacy_args={ + "description": "Geospatial Data Abstraction Library", + "extensions": ".tiff .tif .img .ecw .jpg .jpeg", + "modes": "iIvV", + }, +) + +known_plugins["ITK"] = PluginConfig( + name="ITK", + class_name="ItkFormat", + module_name="imageio.plugins.simpleitk", + is_legacy=True, + install_name="simpleitk", + legacy_args={ + "description": "Insight Segmentation and Registration Toolkit (ITK) format", + "extensions": " ".join( + ( + ".gipl", + ".ipl", + ".mha", + ".mhd", + ".nhdr", + ".nia", + ".hdr", + ".nrrd", + ".nii", + ".nii.gz", + ".img", + ".img.gz", + ".vtk", + ".hdf5", + ".lsm", + ".mnc", + ".mnc2", + ".mgh", + ".mnc", + ".pic", + ".bmp", + ".jpeg", + ".jpg", + ".png", + ".tiff", + ".tif", + ".dicom", + ".dcm", + ".gdcm", + ) + ), + "modes": "iIvV", + }, +) + +known_plugins["NPZ"] = PluginConfig( + name="NPZ", + class_name="NpzFormat", + module_name="imageio.plugins.npz", + is_legacy=True, + install_name="numpy", + legacy_args={ + "description": "Numpy's compressed array format", + "extensions": ".npz", + "modes": "iIvV", + }, +) + +known_plugins["SWF"] = PluginConfig( + name="SWF", + class_name="SWFFormat", + module_name="imageio.plugins.swf", + is_legacy=True, + install_name="swf", + legacy_args={ + "description": "Shockwave flash", + "extensions": ".swf", + "modes": "I", + }, +) + +known_plugins["SCREENGRAB"] = PluginConfig( + name="SCREENGRAB", + class_name="ScreenGrabFormat", + module_name="imageio.plugins.grab", + is_legacy=True, + install_name="pillow", + legacy_args={ + "description": "Grab screenshots (Windows and OS X only)", + "extensions": [], + "modes": "i", + }, +) + +known_plugins["CLIPBOARDGRAB"] = PluginConfig( + name="CLIPBOARDGRAB", + class_name="ClipboardGrabFormat", + module_name="imageio.plugins.grab", + is_legacy=True, + install_name="pillow", + legacy_args={ + "description": "Grab from clipboard (Windows only)", + "extensions": [], + "modes": "i", + }, +) + +# LYTRO plugin (legacy) +lytro_formats = [ + ("lytro-lfr", "Lytro Illum lfr image file", ".lfr", "i", "LytroLfrFormat"), + ( + "lytro-illum-raw", + "Lytro Illum raw image file", + ".raw", + "i", + "LytroIllumRawFormat", + ), + ("lytro-lfp", "Lytro F01 lfp image file", ".lfp", "i", "LytroLfpFormat"), + ("lytro-f01-raw", "Lytro F01 raw image file", ".raw", "i", "LytroF01RawFormat"), +] +for name, des, ext, mode, class_name in lytro_formats: + config = PluginConfig( + name=name.upper(), + class_name=class_name, + module_name="imageio.plugins.lytro", + is_legacy=True, + install_name="lytro", + legacy_args={ + "description": des, + "extensions": ext, + "modes": mode, + }, + ) + known_plugins[config.name] = config + +# FreeImage plugin (legacy) +FREEIMAGE_FORMATS = [ + ( + "BMP", + 0, + "Windows or OS/2 Bitmap", + ".bmp", + "i", + "FreeimageBmpFormat", + "imageio.plugins.freeimage", + ), + ( + "CUT", + 21, + "Dr. Halo", + ".cut", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "DDS", + 24, + "DirectX Surface", + ".dds", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "EXR", + 29, + "ILM OpenEXR", + ".exr", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "G3", + 27, + "Raw fax format CCITT G.3", + ".g3", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "GIF", + 25, + "Static and animated gif (FreeImage)", + ".gif", + "iI", + "GifFormat", + "imageio.plugins.freeimagemulti", + ), + ( + "HDR", + 26, + "High Dynamic Range Image", + ".hdr", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "ICO", + 1, + "Windows Icon", + ".ico", + "iI", + "IcoFormat", + "imageio.plugins.freeimagemulti", + ), + ( + "IFF", + 5, + "IFF Interleaved Bitmap", + ".iff .lbm", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "J2K", + 30, + "JPEG-2000 codestream", + ".j2k .j2c", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "JNG", + 3, + "JPEG Network Graphics", + ".jng", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "JP2", + 31, + "JPEG-2000 File Format", + ".jp2", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "JPEG", + 2, + "JPEG - JFIF Compliant", + ".jpg .jif .jpeg .jpe", + "i", + "FreeimageJpegFormat", + "imageio.plugins.freeimage", + ), + ( + "JPEG-XR", + 36, + "JPEG XR image format", + ".jxr .wdp .hdp", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "KOALA", + 4, + "C64 Koala Graphics", + ".koa", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + # not registered in legacy pillow + # ("MNG", 6, "Multiple-image Network Graphics", ".mng", "i", "FreeimageFormat", "imageio.plugins.freeimage"), + ( + "PBM", + 7, + "Portable Bitmap (ASCII)", + ".pbm", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "PBMRAW", + 8, + "Portable Bitmap (RAW)", + ".pbm", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "PCD", + 9, + "Kodak PhotoCD", + ".pcd", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "PCX", + 10, + "Zsoft Paintbrush", + ".pcx", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "PFM", + 32, + "Portable floatmap", + ".pfm", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "PGM", + 11, + "Portable Greymap (ASCII)", + ".pgm", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "PGMRAW", + 12, + "Portable Greymap (RAW)", + ".pgm", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "PICT", + 33, + "Macintosh PICT", + ".pct .pict .pic", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "PNG", + 13, + "Portable Network Graphics", + ".png", + "i", + "FreeimagePngFormat", + "imageio.plugins.freeimage", + ), + ( + "PPM", + 14, + "Portable Pixelmap (ASCII)", + ".ppm", + "i", + "FreeimagePnmFormat", + "imageio.plugins.freeimage", + ), + ( + "PPMRAW", + 15, + "Portable Pixelmap (RAW)", + ".ppm", + "i", + "FreeimagePnmFormat", + "imageio.plugins.freeimage", + ), + ( + "PSD", + 20, + "Adobe Photoshop", + ".psd", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "RAS", + 16, + "Sun Raster Image", + ".ras", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "RAW", + 34, + "RAW camera image", + ".3fr .arw .bay .bmq .cap .cine .cr2 .crw .cs1 .dc2 " + ".dcr .drf .dsc .dng .erf .fff .ia .iiq .k25 .kc2 .kdc .mdc .mef .mos .mrw .nef .nrw .orf " + ".pef .ptx .pxn .qtk .raf .raw .rdc .rw2 .rwl .rwz .sr2 .srf .srw .sti", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "SGI", + 28, + "SGI Image Format", + ".sgi .rgb .rgba .bw", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "TARGA", + 17, + "Truevision Targa", + ".tga .targa", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "TIFF", + 18, + "Tagged Image File Format", + ".tif .tiff", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "WBMP", + 19, + "Wireless Bitmap", + ".wap .wbmp .wbm", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "WebP", + 35, + "Google WebP image format", + ".webp", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "XBM", + 22, + "X11 Bitmap Format", + ".xbm", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), + ( + "XPM", + 23, + "X11 Pixmap Format", + ".xpm", + "i", + "FreeimageFormat", + "imageio.plugins.freeimage", + ), +] +for name, i, des, ext, mode, class_name, module_name in FREEIMAGE_FORMATS: + config = PluginConfig( + name=name.upper() + "-FI", + class_name=class_name, + module_name=module_name, + is_legacy=True, + install_name="freeimage", + legacy_args={ + "description": des, + "extensions": ext, + "modes": mode, + "fif": i, + }, + ) + known_plugins[config.name] = config + +# exists for backwards compatibility with FormatManager +# delete in V3 +_original_order = [x for x, config in known_plugins.items() if config.is_legacy] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/plugins.pyi b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/plugins.pyi new file mode 100644 index 0000000000000000000000000000000000000000..ab5d4a816257e0f501fce1c087e74a3d1f66dc13 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/config/plugins.pyi @@ -0,0 +1,28 @@ +from typing import Any, Dict, Optional +from ..core.v3_plugin_api import PluginV3 + +class PluginConfig: + name: str + class_name: str + module_name: str + is_legacy: bool + package_name: Optional[str] = None + install_name: Optional[str] = None + legacy_args: Optional[dict] = None + @property + def format(self) -> Any: ... + @property + def plugin_class(self) -> PluginV3: ... + def __init__( + self, + name: str, + class_name: str, + module_name: str, + *, + is_legacy: bool = False, + package_name: str = None, + install_name: str = None, + legacy_args: dict = None, + ) -> None: ... + +known_plugins: Dict[str, PluginConfig] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d7ef5515cf120156bc3f87e488fb81ae31543138 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__init__.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +# Distributed under the (new) BSD License. See LICENSE.txt for more info. + +"""This subpackage provides the core functionality of imageio +(everything but the plugins). +""" + +# flake8: noqa + +from .util import Image, Array, Dict, asarray, image_as_uint, urlopen +from .util import BaseProgressIndicator, StdoutProgressIndicator, IS_PYPY +from .util import get_platform, appdata_dir, resource_dirs, has_module +from .findlib import load_lib +from .fetching import get_remote_file, InternetNotAllowedError, NeedDownloadError +from .request import Request, read_n_bytes, RETURN_BYTES +from .format import Format, FormatManager diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cf17590bfc4b1f4536b107291f3d67d2f32a46b1 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/fetching.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/fetching.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3ec11235176ab5839ed85646420affae7173ceec Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/fetching.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/findlib.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/findlib.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..83c491158c3d4a47f3463dce3fc1d52d5d028eea Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/findlib.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/format.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/format.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dd8fee62020a1fe094ec533c2fe8795628344d33 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/format.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/imopen.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/imopen.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6a1f1619c5d91d37dbd80aefc6551b74963fba62 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/imopen.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/legacy_plugin_wrapper.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/legacy_plugin_wrapper.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..83d7fd246f5dc4211298f7d11684c99d88b3c360 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/legacy_plugin_wrapper.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/request.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/request.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aba1fe88d9495f50effff2ead51a89cca6e87016 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/request.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/util.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/util.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..062e9c8752eaf9fac3bc233637e99c1dbb868419 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/util.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/v3_plugin_api.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/v3_plugin_api.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cdb3d822407b5af5f32edba43bf00777f8d13690 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/__pycache__/v3_plugin_api.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/fetching.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/fetching.py new file mode 100644 index 0000000000000000000000000000000000000000..9c5e5a5fbfafff6bb709c982526c1cf1517177f2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/fetching.py @@ -0,0 +1,246 @@ +# -*- coding: utf-8 -*- +# Based on code from the vispy project +# Distributed under the (new) BSD License. See LICENSE.txt for more info. + +"""Data downloading and reading functions""" + +from math import log +import os +from os import path as op +import sys +import shutil +import time + +from . import appdata_dir, resource_dirs +from . import StdoutProgressIndicator, urlopen + + +class InternetNotAllowedError(IOError): + """Plugins that need resources can just use get_remote_file(), but + should catch this error and silently ignore it. + """ + + pass + + +class NeedDownloadError(IOError): + """Is raised when a remote file is requested that is not locally + available, but which needs to be explicitly downloaded by the user. + """ + + +def get_remote_file(fname, directory=None, force_download=False, auto=True): + """Get a the filename for the local version of a file from the web + + Parameters + ---------- + fname : str + The relative filename on the remote data repository to download. + These correspond to paths on + ``https://github.com/imageio/imageio-binaries/``. + directory : str | None + The directory where the file will be cached if a download was + required to obtain the file. By default, the appdata directory + is used. This is also the first directory that is checked for + a local version of the file. If the directory does not exist, + it will be created. + force_download : bool | str + If True, the file will be downloaded even if a local copy exists + (and this copy will be overwritten). Can also be a YYYY-MM-DD date + to ensure a file is up-to-date (modified date of a file on disk, + if present, is checked). + auto : bool + Whether to auto-download the file if its not present locally. Default + True. If False and a download is needed, raises NeedDownloadError. + + Returns + ------- + fname : str + The path to the file on the local system. + """ + _url_root = "https://github.com/imageio/imageio-binaries/raw/master/" + url = _url_root + fname + nfname = op.normcase(fname) # convert to native + # Get dirs to look for the resource + given_directory = directory + directory = given_directory or appdata_dir("imageio") + dirs = resource_dirs() + dirs.insert(0, directory) # Given dir has preference + # Try to find the resource locally + for dir in dirs: + filename = op.join(dir, nfname) + if op.isfile(filename): + if not force_download: # we're done + if given_directory and given_directory != dir: + filename2 = os.path.join(given_directory, nfname) + # Make sure the output directory exists + if not op.isdir(op.dirname(filename2)): + os.makedirs(op.abspath(op.dirname(filename2))) + shutil.copy(filename, filename2) + return filename2 + return filename + if isinstance(force_download, str): + ntime = time.strptime(force_download, "%Y-%m-%d") + ftime = time.gmtime(op.getctime(filename)) + if ftime >= ntime: + if given_directory and given_directory != dir: + filename2 = os.path.join(given_directory, nfname) + # Make sure the output directory exists + if not op.isdir(op.dirname(filename2)): + os.makedirs(op.abspath(op.dirname(filename2))) + shutil.copy(filename, filename2) + return filename2 + return filename + else: + print("File older than %s, updating..." % force_download) + break + + # If we get here, we're going to try to download the file + if os.getenv("IMAGEIO_NO_INTERNET", "").lower() in ("1", "true", "yes"): + raise InternetNotAllowedError( + "Will not download resource from the " + "internet because environment variable " + "IMAGEIO_NO_INTERNET is set." + ) + + # Can we proceed with auto-download? + if not auto: + raise NeedDownloadError() + + # Get filename to store to and make sure the dir exists + filename = op.join(directory, nfname) + if not op.isdir(op.dirname(filename)): + os.makedirs(op.abspath(op.dirname(filename))) + # let's go get the file + if os.getenv("CONTINUOUS_INTEGRATION", False): # pragma: no cover + # On CI, we retry a few times ... + for i in range(2): + try: + _fetch_file(url, filename) + return filename + except IOError: + time.sleep(0.5) + else: + _fetch_file(url, filename) + return filename + else: # pragma: no cover + _fetch_file(url, filename) + return filename + + +def _fetch_file(url, file_name, print_destination=True): + """Load requested file, downloading it if needed or requested + + Parameters + ---------- + url: string + The url of file to be downloaded. + file_name: string + Name, along with the path, of where downloaded file will be saved. + print_destination: bool, optional + If true, destination of where file was saved will be printed after + download finishes. + resume: bool, optional + If true, try to resume partially downloaded files. + """ + # Adapted from NISL: + # https://github.com/nisl/tutorial/blob/master/nisl/datasets.py + + print( + "Imageio: %r was not found on your computer; " + "downloading it now." % os.path.basename(file_name) + ) + + temp_file_name = file_name + ".part" + local_file = None + initial_size = 0 + errors = [] + for tries in range(4): + try: + # Checking file size and displaying it alongside the download url + remote_file = urlopen(url, timeout=5.0) + file_size = int(remote_file.headers["Content-Length"].strip()) + size_str = _sizeof_fmt(file_size) + print("Try %i. Download from %s (%s)" % (tries + 1, url, size_str)) + # Downloading data (can be extended to resume if need be) + local_file = open(temp_file_name, "wb") + _chunk_read(remote_file, local_file, initial_size=initial_size) + # temp file must be closed prior to the move + if not local_file.closed: + local_file.close() + shutil.move(temp_file_name, file_name) + if print_destination is True: + sys.stdout.write("File saved as %s.\n" % file_name) + break + except Exception as e: + errors.append(e) + print("Error while fetching file: %s." % str(e)) + finally: + if local_file is not None: + if not local_file.closed: + local_file.close() + else: + raise IOError( + "Unable to download %r. Perhaps there is no internet " + "connection? If there is, please report this problem." + % os.path.basename(file_name) + ) + + +def _chunk_read(response, local_file, chunk_size=8192, initial_size=0): + """Download a file chunk by chunk and show advancement + + Can also be used when resuming downloads over http. + + Parameters + ---------- + response: urllib.response.addinfourl + Response to the download request in order to get file size. + local_file: file + Hard disk file where data should be written. + chunk_size: integer, optional + Size of downloaded chunks. Default: 8192 + initial_size: int, optional + If resuming, indicate the initial size of the file. + """ + # Adapted from NISL: + # https://github.com/nisl/tutorial/blob/master/nisl/datasets.py + + bytes_so_far = initial_size + # Returns only amount left to download when resuming, not the size of the + # entire file + total_size = int(response.headers["Content-Length"].strip()) + total_size += initial_size + + progress = StdoutProgressIndicator("Downloading") + progress.start("", "bytes", total_size) + + while True: + chunk = response.read(chunk_size) + bytes_so_far += len(chunk) + if not chunk: + break + _chunk_write(chunk, local_file, progress) + progress.finish("Done") + + +def _chunk_write(chunk, local_file, progress): + """Write a chunk to file and update the progress bar""" + local_file.write(chunk) + progress.increase_progress(len(chunk)) + time.sleep(0) # Give other threads a chance, e.g. those that handle stdout pipes + + +def _sizeof_fmt(num): + """Turn number of bytes into human-readable str""" + units = ["bytes", "kB", "MB", "GB", "TB", "PB"] + decimals = [0, 0, 1, 2, 2, 2] + """Human friendly file size""" + if num > 1: + exponent = min(int(log(num, 1024)), len(units) - 1) + quotient = float(num) / 1024**exponent + unit = units[exponent] + num_decimals = decimals[exponent] + format_string = "{0:.%sf} {1}" % num_decimals + return format_string.format(quotient, unit) + return "0 bytes" if num == 0 else "1 byte" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/findlib.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/findlib.py new file mode 100644 index 0000000000000000000000000000000000000000..f45fd5ab2964c680ded0001f8a435f466c89f176 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/findlib.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2015-1018, imageio contributors +# Copyright (C) 2013, Zach Pincus, Almar Klein and others + +"""This module contains generic code to find and load a dynamic library.""" + +import os +import sys +import ctypes + + +LOCALDIR = os.path.abspath(os.path.dirname(__file__)) + +# Flag that can be patched / set to True to disable loading non-system libs +SYSTEM_LIBS_ONLY = False + + +def looks_lib(fname): + """Returns True if the given filename looks like a dynamic library. + Based on extension, but cross-platform and more flexible. + """ + fname = fname.lower() + if sys.platform.startswith("win"): + return fname.endswith(".dll") + elif sys.platform.startswith("darwin"): + return fname.endswith(".dylib") + else: + return fname.endswith(".so") or ".so." in fname + + +def generate_candidate_libs(lib_names, lib_dirs=None): + """Generate a list of candidate filenames of what might be the dynamic + library corresponding with the given list of names. + Returns (lib_dirs, lib_paths) + """ + lib_dirs = lib_dirs or [] + + # Get system dirs to search + sys_lib_dirs = [ + "/lib", + "/usr/lib", + "/usr/lib/x86_64-linux-gnu", + "/usr/lib/aarch64-linux-gnu", + "/usr/local/lib", + "/opt/local/lib", + ] + + # Get Python dirs to search (shared if for Pyzo) + py_sub_dirs = ["bin", "lib", "DLLs", "Library/bin", "shared"] + py_lib_dirs = [os.path.join(sys.prefix, d) for d in py_sub_dirs] + if hasattr(sys, "base_prefix"): + py_lib_dirs += [os.path.join(sys.base_prefix, d) for d in py_sub_dirs] + + # Get user dirs to search (i.e. HOME) + home_dir = os.path.expanduser("~") + user_lib_dirs = [os.path.join(home_dir, d) for d in ["lib"]] + + # Select only the dirs for which a directory exists, and remove duplicates + potential_lib_dirs = lib_dirs + sys_lib_dirs + py_lib_dirs + user_lib_dirs + lib_dirs = [] + for ld in potential_lib_dirs: + if os.path.isdir(ld) and ld not in lib_dirs: + lib_dirs.append(ld) + + # Now attempt to find libraries of that name in the given directory + # (case-insensitive) + lib_paths = [] + for lib_dir in lib_dirs: + # Get files, prefer short names, last version + files = os.listdir(lib_dir) + files = reversed(sorted(files)) + files = sorted(files, key=len) + for lib_name in lib_names: + # Test all filenames for name and ext + for fname in files: + if fname.lower().startswith(lib_name) and looks_lib(fname): + lib_paths.append(os.path.join(lib_dir, fname)) + + # Return (only the items which are files) + lib_paths = [lp for lp in lib_paths if os.path.isfile(lp)] + return lib_dirs, lib_paths + + +def load_lib(exact_lib_names, lib_names, lib_dirs=None): + """load_lib(exact_lib_names, lib_names, lib_dirs=None) + + Load a dynamic library. + + This function first tries to load the library from the given exact + names. When that fails, it tries to find the library in common + locations. It searches for files that start with one of the names + given in lib_names (case insensitive). The search is performed in + the given lib_dirs and a set of common library dirs. + + Returns ``(ctypes_library, library_path)`` + """ + + # Checks + assert isinstance(exact_lib_names, list) + assert isinstance(lib_names, list) + if lib_dirs is not None: + assert isinstance(lib_dirs, list) + exact_lib_names = [n for n in exact_lib_names if n] + lib_names = [n for n in lib_names if n] + + # Get reference name (for better messages) + if lib_names: + the_lib_name = lib_names[0] + elif exact_lib_names: + the_lib_name = exact_lib_names[0] + else: + raise ValueError("No library name given.") + + # Collect filenames of potential libraries + # First try a few bare library names that ctypes might be able to find + # in the default locations for each platform. + if SYSTEM_LIBS_ONLY: + lib_dirs, lib_paths = [], [] + else: + lib_dirs, lib_paths = generate_candidate_libs(lib_names, lib_dirs) + lib_paths = exact_lib_names + lib_paths + + # Select loader + if sys.platform.startswith("win"): + loader = ctypes.windll + else: + loader = ctypes.cdll + + # Try to load until success + the_lib = None + errors = [] + for fname in lib_paths: + try: + the_lib = loader.LoadLibrary(fname) + break + except Exception as err: + # Don't record errors when it couldn't load the library from an + # exact name -- this fails often, and doesn't provide any useful + # debugging information anyway, beyond "couldn't find library..." + if fname not in exact_lib_names: + errors.append((fname, err)) + + # No success ... + if the_lib is None: + if errors: + # No library loaded, and load-errors reported for some + # candidate libs + err_txt = ["%s:\n%s" % (lib, str(e)) for lib, e in errors] + msg = ( + "One or more %s libraries were found, but " + + "could not be loaded due to the following errors:\n%s" + ) + raise OSError(msg % (the_lib_name, "\n\n".join(err_txt))) + else: + # No errors, because no potential libraries found at all! + msg = "Could not find a %s library in any of:\n%s" + raise OSError(msg % (the_lib_name, "\n".join(lib_dirs))) + + # Done + return the_lib, fname diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/format.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/format.py new file mode 100644 index 0000000000000000000000000000000000000000..109cd8e77fe3111378e6dd196f73b16cd197a436 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/format.py @@ -0,0 +1,881 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +""" + +.. note:: + imageio is under construction, some details with regard to the + Reader and Writer classes may change. + +These are the main classes of imageio. They expose an interface for +advanced users and plugin developers. A brief overview: + + * imageio.FormatManager - for keeping track of registered formats. + * imageio.Format - representation of a file format reader/writer + * imageio.Format.Reader - object used during the reading of a file. + * imageio.Format.Writer - object used during saving a file. + * imageio.Request - used to store the filename and other info. + +Plugins need to implement a Format class and register +a format object using ``imageio.formats.add_format()``. + +""" + +# todo: do we even use the known extensions? + +# Some notes: +# +# The classes in this module use the Request object to pass filename and +# related info around. This request object is instantiated in +# imageio.get_reader and imageio.get_writer. + +import sys +import warnings +import contextlib + +import numpy as np +from pathlib import Path + +from . import Array, asarray +from .request import ImageMode +from ..config import known_plugins, known_extensions, PluginConfig, FileExtension +from ..config.plugins import _original_order +from .imopen import imopen + + +# survived for backwards compatibility +# I don't know if external plugin code depends on it existing +# We no longer do +MODENAMES = ImageMode + + +def _get_config(plugin): + """Old Plugin resolution logic. + + Remove once we remove the old format manager. + """ + + extension_name = None + + if Path(plugin).suffix.lower() in known_extensions: + extension_name = Path(plugin).suffix.lower() + elif plugin in known_plugins: + pass + elif plugin.lower() in known_extensions: + extension_name = plugin.lower() + elif "." + plugin.lower() in known_extensions: + extension_name = "." + plugin.lower() + else: + raise IndexError(f"No format known by name `{plugin}`.") + + if extension_name is not None: + for plugin_name in [ + x + for file_extension in known_extensions[extension_name] + for x in file_extension.priority + ]: + if known_plugins[plugin_name].is_legacy: + plugin = plugin_name + break + + return known_plugins[plugin] + + +class Format(object): + """Represents an implementation to read/write a particular file format + + A format instance is responsible for 1) providing information about + a format; 2) determining whether a certain file can be read/written + with this format; 3) providing a reader/writer class. + + Generally, imageio will select the right format and use that to + read/write an image. A format can also be explicitly chosen in all + read/write functions. Use ``print(format)``, or ``help(format_name)`` + to see its documentation. + + To implement a specific format, one should create a subclass of + Format and the Format.Reader and Format.Writer classes. See + :class:`imageio.plugins` for details. + + Parameters + ---------- + name : str + A short name of this format. Users can select a format using its name. + description : str + A one-line description of the format. + extensions : str | list | None + List of filename extensions that this format supports. If a + string is passed it should be space or comma separated. The + extensions are used in the documentation and to allow users to + select a format by file extension. It is not used to determine + what format to use for reading/saving a file. + modes : str + A string containing the modes that this format can handle ('iIvV'), + “i” for an image, “I” for multiple images, “v” for a volume, + “V” for multiple volumes. + This attribute is used in the documentation and to select the + formats when reading/saving a file. + """ + + def __init__(self, name, description, extensions=None, modes=None): + """Initialize the Plugin. + + Parameters + ---------- + name : str + A short name of this format. Users can select a format using its name. + description : str + A one-line description of the format. + extensions : str | list | None + List of filename extensions that this format supports. If a + string is passed it should be space or comma separated. The + extensions are used in the documentation and to allow users to + select a format by file extension. It is not used to determine + what format to use for reading/saving a file. + modes : str + A string containing the modes that this format can handle ('iIvV'), + “i” for an image, “I” for multiple images, “v” for a volume, + “V” for multiple volumes. + This attribute is used in the documentation and to select the + formats when reading/saving a file. + """ + + # Store name and description + self._name = name.upper() + self._description = description + + # Store extensions, do some effort to normalize them. + # They are stored as a list of lowercase strings without leading dots. + if extensions is None: + extensions = [] + elif isinstance(extensions, str): + extensions = extensions.replace(",", " ").split(" ") + # + if isinstance(extensions, (tuple, list)): + self._extensions = tuple( + ["." + e.strip(".").lower() for e in extensions if e] + ) + else: + raise ValueError("Invalid value for extensions given.") + + # Store mode + self._modes = modes or "" + if not isinstance(self._modes, str): + raise ValueError("Invalid value for modes given.") + for m in self._modes: + if m not in "iIvV?": + raise ValueError("Invalid value for mode given.") + + def __repr__(self): + # Short description + return "" % (self.name, self.description) + + def __str__(self): + return self.doc + + @property + def doc(self): + """The documentation for this format (name + description + docstring).""" + # Our docsring is assumed to be indented by four spaces. The + # first line needs special attention. + return "%s - %s\n\n %s\n" % ( + self.name, + self.description, + self.__doc__.strip(), + ) + + @property + def name(self): + """The name of this format.""" + return self._name + + @property + def description(self): + """A short description of this format.""" + return self._description + + @property + def extensions(self): + """A list of file extensions supported by this plugin. + These are all lowercase with a leading dot. + """ + return self._extensions + + @property + def modes(self): + """A string specifying the modes that this format can handle.""" + return self._modes + + def get_reader(self, request): + """get_reader(request) + + Return a reader object that can be used to read data and info + from the given file. Users are encouraged to use + imageio.get_reader() instead. + """ + select_mode = request.mode[1] if request.mode[1] in "iIvV" else "" + if select_mode not in self.modes: + raise RuntimeError( + f"Format {self.name} cannot read in {request.mode.image_mode} mode" + ) + return self.Reader(self, request) + + def get_writer(self, request): + """get_writer(request) + + Return a writer object that can be used to write data and info + to the given file. Users are encouraged to use + imageio.get_writer() instead. + """ + select_mode = request.mode[1] if request.mode[1] in "iIvV" else "" + if select_mode not in self.modes: + raise RuntimeError( + f"Format {self.name} cannot write in {request.mode.image_mode} mode" + ) + return self.Writer(self, request) + + def can_read(self, request): + """can_read(request) + + Get whether this format can read data from the specified uri. + """ + return self._can_read(request) + + def can_write(self, request): + """can_write(request) + + Get whether this format can write data to the speciefed uri. + """ + return self._can_write(request) + + def _can_read(self, request): # pragma: no cover + """Check if Plugin can read from ImageResource. + + This method is called when the format manager is searching for a format + to read a certain image. Return True if this format can do it. + + The format manager is aware of the extensions and the modes that each + format can handle. It will first ask all formats that *seem* to be able + to read it whether they can. If none can, it will ask the remaining + formats if they can: the extension might be missing, and this allows + formats to provide functionality for certain extensions, while giving + preference to other plugins. + + If a format says it can, it should live up to it. The format would + ideally check the request.firstbytes and look for a header of some kind. + + Parameters + ---------- + request : Request + A request that can be used to access the ImageResource and obtain + metadata about it. + + Returns + ------- + can_read : bool + True if the plugin can read from the ImageResource, False otherwise. + + """ + return None # Plugins must implement this + + def _can_write(self, request): # pragma: no cover + """Check if Plugin can write to ImageResource. + + Parameters + ---------- + request : Request + A request that can be used to access the ImageResource and obtain + metadata about it. + + Returns + ------- + can_read : bool + True if the plugin can write to the ImageResource, False otherwise. + + """ + return None # Plugins must implement this + + # ----- + + class _BaseReaderWriter(object): + """Base class for the Reader and Writer class to implement common + functionality. It implements a similar approach for opening/closing + and context management as Python's file objects. + """ + + def __init__(self, format, request): + self.__closed = False + self._BaseReaderWriter_last_index = -1 + self._format = format + self._request = request + # Open the reader/writer + self._open(**self.request.kwargs.copy()) + + @property + def format(self): + """The :class:`.Format` object corresponding to the current + read/write operation. + """ + return self._format + + @property + def request(self): + """The :class:`.Request` object corresponding to the + current read/write operation. + """ + return self._request + + def __enter__(self): + self._checkClosed() + return self + + def __exit__(self, type, value, traceback): + if value is None: + # Otherwise error in close hide the real error. + self.close() + + def __del__(self): + try: + self.close() + except Exception: # pragma: no cover + pass # Suppress noise when called during interpreter shutdown + + def close(self): + """Flush and close the reader/writer. + This method has no effect if it is already closed. + """ + if self.__closed: + return + self.__closed = True + self._close() + # Process results and clean request object + self.request.finish() + + @property + def closed(self): + """Whether the reader/writer is closed.""" + return self.__closed + + def _checkClosed(self, msg=None): + """Internal: raise an ValueError if reader/writer is closed""" + if self.closed: + what = self.__class__.__name__ + msg = msg or ("I/O operation on closed %s." % what) + raise RuntimeError(msg) + + # To implement + + def _open(self, **kwargs): + """_open(**kwargs) + + Plugins should probably implement this. + + It is called when reader/writer is created. Here the + plugin can do its initialization. The given keyword arguments + are those that were given by the user at imageio.read() or + imageio.write(). + """ + raise NotImplementedError() + + def _close(self): + """_close() + + Plugins should probably implement this. + + It is called when the reader/writer is closed. Here the plugin + can do a cleanup, flush, etc. + + """ + raise NotImplementedError() + + # ----- + + class Reader(_BaseReaderWriter): + """ + The purpose of a reader object is to read data from an image + resource, and should be obtained by calling :func:`.get_reader`. + + A reader can be used as an iterator to read multiple images, + and (if the format permits) only reads data from the file when + new data is requested (i.e. streaming). A reader can also be + used as a context manager so that it is automatically closed. + + Plugins implement Reader's for different formats. Though rare, + plugins may provide additional functionality (beyond what is + provided by the base reader class). + """ + + def get_length(self): + """get_length() + + Get the number of images in the file. (Note: you can also + use ``len(reader_object)``.) + + The result can be: + * 0 for files that only have meta data + * 1 for singleton images (e.g. in PNG, JPEG, etc.) + * N for image series + * inf for streams (series of unknown length) + """ + return self._get_length() + + def get_data(self, index, **kwargs): + """get_data(index, **kwargs) + + Read image data from the file, using the image index. The + returned image has a 'meta' attribute with the meta data. + Raises IndexError if the index is out of range. + + Some formats may support additional keyword arguments. These are + listed in the documentation of those formats. + """ + self._checkClosed() + self._BaseReaderWriter_last_index = index + try: + im, meta = self._get_data(index, **kwargs) + except StopIteration: + raise IndexError(index) + return Array(im, meta) # Array tests im and meta + + def get_next_data(self, **kwargs): + """get_next_data(**kwargs) + + Read the next image from the series. + + Some formats may support additional keyword arguments. These are + listed in the documentation of those formats. + """ + return self.get_data(self._BaseReaderWriter_last_index + 1, **kwargs) + + def set_image_index(self, index, **kwargs): + """set_image_index(index) + + Set the internal pointer such that the next call to + get_next_data() returns the image specified by the index + """ + self._checkClosed() + n = self.get_length() + self._BaseReaderWriter_last_index = min(max(index - 1, -1), n) + + def get_meta_data(self, index=None): + """get_meta_data(index=None) + + Read meta data from the file. using the image index. If the + index is omitted or None, return the file's (global) meta data. + + Note that ``get_data`` also provides the meta data for the returned + image as an attribute of that image. + + The meta data is a dict, which shape depends on the format. + E.g. for JPEG, the dict maps group names to subdicts and each + group is a dict with name-value pairs. The groups represent + the different metadata formats (EXIF, XMP, etc.). + """ + self._checkClosed() + meta = self._get_meta_data(index) + if not isinstance(meta, dict): + raise ValueError( + "Meta data must be a dict, not %r" % meta.__class__.__name__ + ) + return meta + + def iter_data(self): + """iter_data() + + Iterate over all images in the series. (Note: you can also + iterate over the reader object.) + + """ + self._checkClosed() + n = self.get_length() + i = 0 + while i < n: + try: + im, meta = self._get_data(i) + except StopIteration: + return + except IndexError: + if n == float("inf"): + return + raise + yield Array(im, meta) + i += 1 + + # Compatibility + + def __iter__(self): + return self.iter_data() + + def __len__(self): + n = self.get_length() + if n == float("inf"): + n = sys.maxsize + return n + + # To implement + + def _get_length(self): + """_get_length() + + Plugins must implement this. + + The returned scalar specifies the number of images in the series. + See Reader.get_length for more information. + """ + raise NotImplementedError() + + def _get_data(self, index): + """_get_data() + + Plugins must implement this, but may raise an IndexError in + case the plugin does not support random access. + + It should return the image and meta data: (ndarray, dict). + """ + raise NotImplementedError() + + def _get_meta_data(self, index): + """_get_meta_data(index) + + Plugins must implement this. + + It should return the meta data as a dict, corresponding to the + given index, or to the file's (global) meta data if index is + None. + """ + raise NotImplementedError() + + # ----- + + class Writer(_BaseReaderWriter): + """ + The purpose of a writer object is to write data to an image + resource, and should be obtained by calling :func:`.get_writer`. + + A writer will (if the format permits) write data to the file + as soon as new data is provided (i.e. streaming). A writer can + also be used as a context manager so that it is automatically + closed. + + Plugins implement Writer's for different formats. Though rare, + plugins may provide additional functionality (beyond what is + provided by the base writer class). + """ + + def append_data(self, im, meta=None): + """append_data(im, meta={}) + + Append an image (and meta data) to the file. The final meta + data that is used consists of the meta data on the given + image (if applicable), updated with the given meta data. + """ + self._checkClosed() + # Check image data + if not isinstance(im, np.ndarray): + raise ValueError("append_data requires ndarray as first arg") + # Get total meta dict + total_meta = {} + if hasattr(im, "meta") and isinstance(im.meta, dict): + total_meta.update(im.meta) + if meta is None: + pass + elif not isinstance(meta, dict): + raise ValueError("Meta must be a dict.") + else: + total_meta.update(meta) + + # Decouple meta info + im = asarray(im) + # Call + return self._append_data(im, total_meta) + + def set_meta_data(self, meta): + """set_meta_data(meta) + + Sets the file's (global) meta data. The meta data is a dict which + shape depends on the format. E.g. for JPEG the dict maps + group names to subdicts, and each group is a dict with + name-value pairs. The groups represents the different + metadata formats (EXIF, XMP, etc.). + + Note that some meta formats may not be supported for + writing, and individual fields may be ignored without + warning if they are invalid. + """ + self._checkClosed() + if not isinstance(meta, dict): + raise ValueError("Meta must be a dict.") + else: + return self._set_meta_data(meta) + + # To implement + + def _append_data(self, im, meta): + # Plugins must implement this + raise NotImplementedError() + + def _set_meta_data(self, meta): + # Plugins must implement this + raise NotImplementedError() + + +class FormatManager(object): + """ + The FormatManager is a singleton plugin factory. + + The format manager supports getting a format object using indexing (by + format name or extension). When used as an iterator, this object + yields all registered format objects. + + See also :func:`.help`. + """ + + @property + def _formats(self): + available_formats = list() + + for config in known_plugins.values(): + with contextlib.suppress(ImportError): + # if an exception is raised, then format not installed + if config.is_legacy and config.format is not None: + available_formats.append(config) + + return available_formats + + def __repr__(self): + return f"" + + def __iter__(self): + return iter(x.format for x in self._formats) + + def __len__(self): + return len(self._formats) + + def __str__(self): + ss = [] + for config in self._formats: + ext = config.legacy_args["extensions"] + desc = config.legacy_args["description"] + s = f"{config.name} - {desc} [{ext}]" + ss.append(s) + return "\n".join(ss) + + def __getitem__(self, name): + warnings.warn( + "The usage of `FormatManager` is deprecated and it will be " + "removed in Imageio v3. Use `iio.imopen` instead.", + DeprecationWarning, + stacklevel=2, + ) + + if not isinstance(name, str): + raise ValueError( + "Looking up a format should be done by name or by extension." + ) + + if name == "": + raise ValueError("No format matches the empty string.") + + # Test if name is existing file + if Path(name).is_file(): + # legacy compatibility - why test reading here?? + try: + return imopen(name, "r", legacy_mode=True)._format + except ValueError: + # no plugin can read the file + pass + + config = _get_config(name.upper()) + + try: + return config.format + except ImportError: + raise ImportError( + f"The `{config.name}` format is not installed. " + f"Use `pip install imageio[{config.install_name}]` to install it." + ) + + def sort(self, *names): + """sort(name1, name2, name3, ...) + + Sort the formats based on zero or more given names; a format with + a name that matches one of the given names will take precedence + over other formats. A match means an equal name, or ending with + that name (though the former counts higher). Case insensitive. + + Format preference will match the order of the given names: using + ``sort('TIFF', '-FI', '-PIL')`` would prefer the FreeImage formats + over the Pillow formats, but prefer TIFF even more. Each time + this is called, the starting point is the default format order, + and calling ``sort()`` with no arguments will reset the order. + + Be aware that using the function can affect the behavior of + other code that makes use of imageio. + + Also see the ``IMAGEIO_FORMAT_ORDER`` environment variable. + """ + + warnings.warn( + "`FormatManager` is deprecated and it will be removed in ImageIO v3." + " Migrating `FormatManager.sort` depends on your use-case:\n" + "\t- modify `iio.config.known_plugins` to specify the search order for " + "unrecognized formats.\n" + "\t- modify `iio.config.known_extensions[].priority`" + " to control a specific extension.", + DeprecationWarning, + stacklevel=2, + ) + + # Check and sanitize input + for name in names: + if not isinstance(name, str): + raise TypeError("formats.sort() accepts only string names.") + if any(c in name for c in ".,"): + raise ValueError( + "Names given to formats.sort() should not " + "contain dots `.` or commas `,`." + ) + + should_reset = len(names) == 0 + if should_reset: + names = _original_order + + sane_names = [name.strip().upper() for name in names if name != ""] + + # enforce order for every extension that uses it + flat_extensions = [ + ext for ext_list in known_extensions.values() for ext in ext_list + ] + for extension in flat_extensions: + if should_reset: + extension.reset() + continue + + for name in reversed(sane_names): + for plugin in [x for x in extension.default_priority]: + if plugin.endswith(name): + extension.priority.remove(plugin) + extension.priority.insert(0, plugin) + + old_order = known_plugins.copy() + known_plugins.clear() + + for name in sane_names: + plugin = old_order.pop(name, None) + if plugin is not None: + known_plugins[name] = plugin + + known_plugins.update(old_order) + + def add_format(self, iio_format, overwrite=False): + """add_format(format, overwrite=False) + + Register a format, so that imageio can use it. If a format with the + same name already exists, an error is raised, unless overwrite is True, + in which case the current format is replaced. + """ + + warnings.warn( + "`FormatManager` is deprecated and it will be removed in ImageIO v3." + "To migrate `FormatManager.add_format` add the plugin directly to " + "`iio.config.known_plugins`.", + DeprecationWarning, + stacklevel=2, + ) + + if not isinstance(iio_format, Format): + raise ValueError("add_format needs argument to be a Format object") + elif not overwrite and iio_format.name in self.get_format_names(): + raise ValueError( + f"A Format named {iio_format.name} is already registered, use" + " `overwrite=True` to replace." + ) + + config = PluginConfig( + name=iio_format.name.upper(), + class_name=iio_format.__class__.__name__, + module_name=iio_format.__class__.__module__, + is_legacy=True, + install_name="unknown", + legacy_args={ + "name": iio_format.name, + "description": iio_format.description, + "extensions": " ".join(iio_format.extensions), + "modes": iio_format.modes, + }, + ) + + known_plugins[config.name] = config + + for extension in iio_format.extensions: + # be conservative and always treat it as a unique file format + ext = FileExtension( + extension=extension, + priority=[config.name], + name="Unique Format", + description="A format inserted at runtime." + f" It is being read by the `{config.name}` plugin.", + ) + known_extensions.setdefault(extension, list()).append(ext) + + def search_read_format(self, request): + """search_read_format(request) + + Search a format that can read a file according to the given request. + Returns None if no appropriate format was found. (used internally) + """ + + try: + # in legacy_mode imopen returns a LegacyPlugin + return imopen(request, request.mode.io_mode, legacy_mode=True)._format + except AttributeError: + warnings.warn( + "ImageIO now uses a v3 plugin when reading this format." + " Please migrate to the v3 API (preferred) or use imageio.v2.", + DeprecationWarning, + stacklevel=2, + ) + return None + except ValueError: + # no plugin can read this request + # but the legacy API doesn't raise + return None + + def search_write_format(self, request): + """search_write_format(request) + + Search a format that can write a file according to the given request. + Returns None if no appropriate format was found. (used internally) + """ + + try: + # in legacy_mode imopen returns a LegacyPlugin + return imopen(request, request.mode.io_mode, legacy_mode=True)._format + except AttributeError: + warnings.warn( + "ImageIO now uses a v3 plugin when writing this format." + " Please migrate to the v3 API (preferred) or use imageio.v2.", + DeprecationWarning, + stacklevel=2, + ) + return None + except ValueError: + # no plugin can write this request + # but the legacy API doesn't raise + return None + + def get_format_names(self): + """Get the names of all registered formats.""" + + warnings.warn( + "`FormatManager` is deprecated and it will be removed in ImageIO v3." + "To migrate `FormatManager.get_format_names` use `iio.config.known_plugins.keys()` instead.", + DeprecationWarning, + stacklevel=2, + ) + + return [f.name for f in self._formats] + + def show(self): + """Show a nicely formatted list of available formats""" + print(self) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/format.pyi b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/format.pyi new file mode 100644 index 0000000000000000000000000000000000000000..c1c10b1dac2619d8a61d5a35bf9d584189a41433 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/format.pyi @@ -0,0 +1,87 @@ +from typing import Any, Dict, List, Optional, Union + +import numpy as np + +from ..typing import ArrayLike +from . import Array +from .request import Request +from ..config import PluginConfig + +def _get_config(plugin: str) -> PluginConfig: ... + +class Format(object): + @property + def doc(self) -> str: ... + @property + def name(self) -> str: ... + @property + def description(self) -> str: ... + @property + def extensions(self) -> List[str]: ... + @property + def modes(self) -> str: ... + def __init__( + self, + name: str, + description: str, + extensions: Union[str, list, tuple, None] = None, + modes: str = None, + ) -> None: ... + def __repr__(self) -> str: ... + def __str__(self) -> str: ... + def get_reader(self, request: Request) -> Reader: ... + def get_writer(self, request: Request) -> Writer: ... + def can_read(self, request: Request) -> bool: ... + def can_write(self, request: Request) -> bool: ... + def _can_read(self, request: Request) -> bool: ... + def _can_write(self, request: Request) -> bool: ... + + class _BaseReaderWriter(object): + @property + def format(self) -> Format: ... + @property + def request(self) -> Request: ... + @property + def closed(self) -> bool: ... + def __init__(self, format: Format, request: Request) -> None: ... + def __enter__(self) -> Format._BaseReaderWriter: ... + def __exit__(self, type, value, traceback) -> None: ... + def __del__(self) -> None: ... + def close(self) -> None: ... + def _checkClosed(self, msg=None) -> None: ... + def _open(self, **kwargs) -> None: ... + def _close(self) -> None: ... + + class Reader(_BaseReaderWriter): + def get_length(self) -> int: ... + def get_data(self, index: int, **kwargs) -> Array: ... + def get_next_data(self, **kwargs) -> Dict[str, Any]: ... + def set_image_index(self, index: int, **kwargs) -> None: ... + def get_meta_data(self, index: int = None) -> Dict[str, Any]: ... + def iter_data(self) -> Array: ... + def __iter__(self) -> Array: ... + def __len__(self) -> int: ... + def _get_length(self) -> int: ... + def _get_data(self, index: int) -> Array: ... + def _get_meta_data(self, index: int) -> Dict[str, Any]: ... + + class Writer(_BaseReaderWriter): + def append_data(self, im: ArrayLike, meta: Dict[str, Any] = None) -> None: ... + def set_meta_data(self, meta: Dict[str, Any]) -> None: ... + def _append_data(self, im: ArrayLike, meta: Dict[str, Any]) -> None: ... + def _set_meta_data(self, meta: Dict[str, Any]) -> None: ... + +class FormatManager(object): + @property + def _formats(self) -> List[Format]: ... + def __repr__(self) -> str: ... + def __iter__(self) -> Format: ... + def __len__(self) -> int: ... + def __str__(self) -> str: ... + def __getitem__(self, name: str) -> Format: ... + def sort(self, *names: str) -> None: ... + def add_format(self, iio_format: Format, overwrite: bool = False) -> None: ... + def search_read_format(self, request: Request) -> Optional[Format]: ... + def search_write_format(self, request: Request) -> Optional[Format]: ... + def get_format_names(self) -> List[str]: ... + def show(self) -> None: ... diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/imopen.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/imopen.py new file mode 100644 index 0000000000000000000000000000000000000000..a84b2a90a21d74500582e478eee41178a1f955f6 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/imopen.py @@ -0,0 +1,281 @@ +from pathlib import Path +import warnings + +from ..config import known_plugins +from ..config.extensions import known_extensions +from .request import ( + SPECIAL_READ_URIS, + URI_FILENAME, + InitializationError, + IOMode, + Request, +) + + +def imopen( + uri, + io_mode, + *, + plugin=None, + extension=None, + format_hint=None, + legacy_mode=False, + **kwargs, +): + """Open an ImageResource. + + .. warning:: + This warning is for pypy users. If you are not using a context manager, + remember to deconstruct the returned plugin to avoid leaking the file + handle to an unclosed file. + + Parameters + ---------- + uri : str or pathlib.Path or bytes or file or Request + The :doc:`ImageResource <../../user_guide/requests>` to load the + image from. + io_mode : str + The mode in which the file is opened. Possible values are:: + + ``r`` - open the file for reading + ``w`` - open the file for writing + + Depreciated since v2.9: + A second character can be added to give the reader a hint on what + the user expects. This will be ignored by new plugins and will + only have an effect on legacy plugins. Possible values are:: + + ``i`` for a single image, + ``I`` for multiple images, + ``v`` for a single volume, + ``V`` for multiple volumes, + ``?`` for don't care + + plugin : str, Plugin, or None + The plugin to use. If set to None imopen will perform a + search for a matching plugin. If not None, this takes priority over + the provided format hint. + extension : str + If not None, treat the provided ImageResource as if it had the given + extension. This affects the order in which backends are considered, and + when writing this may also influence the format used when encoding. + format_hint : str + Deprecated. Use `extension` instead. + legacy_mode : bool + If true use the v2 behavior when searching for a suitable + plugin. This will ignore v3 plugins and will check ``plugin`` + against known extensions if no plugin with the given name can be found. + **kwargs : Any + Additional keyword arguments will be passed to the plugin upon + construction. + + Notes + ----- + Registered plugins are controlled via the ``known_plugins`` dict in + ``imageio.config``. + + Passing a ``Request`` as the uri is only supported if ``legacy_mode`` + is ``True``. In this case ``io_mode`` is ignored. + + Using the kwarg ``format_hint`` does not enforce the given format. It merely + provides a `hint` to the selection process and plugin. The selection + processes uses this hint for optimization; however, a plugin's decision how + to read a ImageResource will - typically - still be based on the content of + the resource. + + + Examples + -------- + + >>> import imageio.v3 as iio + >>> with iio.imopen("/path/to/image.png", "r") as file: + >>> im = file.read() + + >>> with iio.imopen("/path/to/output.jpg", "w") as file: + >>> file.write(im) + + """ + + if isinstance(uri, Request) and legacy_mode: + warnings.warn( + "`iio.core.Request` is a low-level object and using it" + " directly as input to `imopen` is discouraged. This will raise" + " an exception in ImageIO v3.", + DeprecationWarning, + stacklevel=2, + ) + + request = uri + uri = request.raw_uri + io_mode = request.mode.io_mode + request.format_hint = format_hint + else: + request = Request(uri, io_mode, format_hint=format_hint, extension=extension) + + source = "" if isinstance(uri, bytes) else uri + + # fast-path based on plugin + # (except in legacy mode) + if plugin is not None: + if isinstance(plugin, str): + try: + config = known_plugins[plugin] + except KeyError: + request.finish() + raise ValueError( + f"`{plugin}` is not a registered plugin name." + ) from None + + def loader(request, **kwargs): + return config.plugin_class(request, **kwargs) + + else: + + def loader(request, **kwargs): + return plugin(request, **kwargs) + + try: + return loader(request, **kwargs) + except InitializationError as class_specific: + err_from = class_specific + err_type = RuntimeError if legacy_mode else IOError + err_msg = f"`{plugin}` can not handle the given uri." + except ImportError: + err_from = None + err_type = ImportError + err_msg = ( + f"The `{config.name}` plugin is not installed. " + f"Use `pip install imageio[{config.install_name}]` to install it." + ) + except Exception as generic_error: + err_from = generic_error + err_type = IOError + err_msg = f"An unknown error occurred while initializing plugin `{plugin}`." + + request.finish() + raise err_type(err_msg) from err_from + + # fast-path based on format_hint + if request.format_hint is not None: + for candidate_format in known_extensions[format_hint]: + for plugin_name in candidate_format.priority: + config = known_plugins[plugin_name] + + try: + candidate_plugin = config.plugin_class + except ImportError: + # not installed + continue + + try: + plugin_instance = candidate_plugin(request, **kwargs) + except InitializationError: + # file extension doesn't match file type + continue + + return plugin_instance + else: + resource = ( + "" if isinstance(request.raw_uri, bytes) else request.raw_uri + ) + warnings.warn(f"`{resource}` can not be opened as a `{format_hint}` file.") + + # fast-path based on file extension + if request.extension in known_extensions: + for candidate_format in known_extensions[request.extension]: + for plugin_name in candidate_format.priority: + config = known_plugins[plugin_name] + + try: + candidate_plugin = config.plugin_class + except ImportError: + # not installed + continue + + try: + plugin_instance = candidate_plugin(request, **kwargs) + except InitializationError: + # file extension doesn't match file type + continue + + return plugin_instance + + # error out for read-only special targets + # this is hacky; can we come up with a better solution for this? + if request.mode.io_mode == IOMode.write: + if isinstance(uri, str) and uri.startswith(SPECIAL_READ_URIS): + request.finish() + err_type = ValueError if legacy_mode else IOError + err_msg = f"`{source}` is read-only." + raise err_type(err_msg) + + # error out for directories + # this is a bit hacky and should be cleaned once we decide + # how to gracefully handle DICOM + if request._uri_type == URI_FILENAME and Path(request.raw_uri).is_dir(): + request.finish() + err_type = ValueError if legacy_mode else IOError + err_msg = ( + "ImageIO does not generally support reading folders. " + "Limited support may be available via specific plugins. " + "Specify the plugin explicitly using the `plugin` kwarg, e.g. `plugin='DICOM'`" + ) + raise err_type(err_msg) + + # close the current request here and use fresh/new ones while trying each + # plugin This is slow (means potentially reopening a resource several + # times), but should only happen rarely because this is the fallback if all + # else fails. + request.finish() + + # fallback option: try all plugins + for config in known_plugins.values(): + # each plugin gets its own request + request = Request(uri, io_mode, format_hint=format_hint) + + try: + plugin_instance = config.plugin_class(request, **kwargs) + except InitializationError: + continue + except ImportError: + continue + else: + return plugin_instance + + err_type = ValueError if legacy_mode else IOError + err_msg = f"Could not find a backend to open `{source}`` with iomode `{io_mode}`." + + # check if a missing plugin could help + if request.extension in known_extensions: + missing_plugins = list() + + formats = known_extensions[request.extension] + plugin_names = [ + plugin for file_format in formats for plugin in file_format.priority + ] + for name in plugin_names: + config = known_plugins[name] + + try: + config.plugin_class + continue + except ImportError: + missing_plugins.append(config) + + if len(missing_plugins) > 0: + install_candidates = "\n".join( + [ + ( + f" {config.name}: " + f"pip install imageio[{config.install_name}]" + ) + for config in missing_plugins + ] + ) + err_msg += ( + "\nBased on the extension, the following plugins might add capable backends:\n" + f"{install_candidates}" + ) + + request.finish() + raise err_type(err_msg) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/imopen.pyi b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/imopen.pyi new file mode 100644 index 0000000000000000000000000000000000000000..86e1664896c098b93869fcefb84d9b8e8d6f3a1b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/imopen.pyi @@ -0,0 +1,97 @@ +from typing import Literal, Type, TypeVar, overload + +from ..plugins.opencv import OpenCVPlugin +from ..plugins.pillow import PillowPlugin +from ..plugins.pyav import PyAVPlugin +from ..plugins.rawpy import RawPyPlugin +from ..plugins.tifffile_v3 import TifffilePlugin +from ..typing import ImageResource +from .legacy_plugin_wrapper import LegacyPlugin +from .v3_plugin_api import PluginV3 + +CustomPlugin = TypeVar("CustomPlugin", bound=PluginV3) + +@overload +def imopen( + uri: ImageResource, + io_mode: Literal["r", "w"], + *, + extension: str = None, + format_hint: str = None, +) -> PluginV3: ... +@overload +def imopen( + uri: ImageResource, + io_mode: Literal["r", "w"], + *, + plugin: str = None, + format_hint: str = None, + extension: str = None, + legacy_mode: Literal[True], + **kwargs, +) -> LegacyPlugin: ... +@overload +def imopen( + uri: ImageResource, + io_mode: Literal["r", "w"], + *, + format_hint: str = None, + extension: str = None, + legacy_mode: Literal[False] = False, +) -> PluginV3: ... +@overload +def imopen( + uri: ImageResource, + io_mode: Literal["r", "w"], + *, + plugin: Literal["pillow"], + extension: str = None, + format_hint: str = None, +) -> PillowPlugin: ... +@overload +def imopen( + uri: ImageResource, + io_mode: Literal["r", "w"], + *, + plugin: Literal["rawpy"], + extension: str = None, + format_hint: str = None, +) -> RawPyPlugin: ... +@overload +def imopen( + uri: ImageResource, + io_mode: Literal["r", "w"], + *, + plugin: Literal["pyav"], + extension: str = None, + format_hint: str = None, + container: str = None, +) -> PyAVPlugin: ... +@overload +def imopen( + uri, + io_mode: Literal["r", "w"], + *, + plugin: Literal["opencv"], + extension: str = None, + format_hint: str = None, +) -> OpenCVPlugin: ... +@overload +def imopen( + uri, + io_mode: Literal["r", "w"], + *, + plugin: Literal["tifffile"], + extension: str = None, + format_hint: str = None, +) -> TifffilePlugin: ... +@overload +def imopen( + uri: ImageResource, + io_mode: Literal["r", "w"], + *, + plugin: Type[CustomPlugin], + extension: str = None, + format_hint: str = None, + **kwargs, +) -> CustomPlugin: ... diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/legacy_plugin_wrapper.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/legacy_plugin_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..71d2a0ac426a5811143f1e75eff8e3dd8e0c94b9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/legacy_plugin_wrapper.py @@ -0,0 +1,363 @@ +from pathlib import Path + +import numpy as np + +from ..config import known_extensions +from .request import InitializationError, IOMode +from .v3_plugin_api import ImageProperties, PluginV3 + + +def _legacy_default_index(format): + if format._name == "FFMPEG": + index = Ellipsis + elif format._name == "GIF-PIL": + index = Ellipsis + else: + index = 0 + + return index + + +class LegacyPlugin(PluginV3): + """A plugin to make old (v2.9) plugins compatible with v3.0 + + .. depreciated:: 2.9 + `legacy_get_reader` will be removed in a future version of imageio. + `legacy_get_writer` will be removed in a future version of imageio. + + This plugin is a wrapper around the old FormatManager class and exposes + all the old plugins via the new API. On top of this it has + ``legacy_get_reader`` and ``legacy_get_writer`` methods to allow using + it with the v2.9 API. + + Methods + ------- + read(index=None, **kwargs) + Read the image at position ``index``. + write(image, **kwargs) + Write image to the URI. + iter(**kwargs) + Iteratively yield images from the given URI. + get_meta(index=None) + Return the metadata for the image at position ``index``. + legacy_get_reader(**kwargs) + Returns the v2.9 image reader. (depreciated) + legacy_get_writer(**kwargs) + Returns the v2.9 image writer. (depreciated) + + Examples + -------- + + >>> import imageio.v3 as iio + >>> with iio.imopen("/path/to/image.tiff", "r", legacy_mode=True) as file: + >>> reader = file.legacy_get_reader() # depreciated + >>> for im in file.iter(): + >>> print(im.shape) + + """ + + def __init__(self, request, legacy_plugin): + """Instantiate a new Legacy Plugin + + Parameters + ---------- + uri : {str, pathlib.Path, bytes, file} + The resource to load the image from, e.g. a filename, pathlib.Path, + http address or file object, see the docs for more info. + legacy_plugin : Format + The (legacy) format to use to interface with the URI. + + """ + self._request = request + self._format = legacy_plugin + + source = ( + "" + if isinstance(self._request.raw_uri, bytes) + else self._request.raw_uri + ) + if self._request.mode.io_mode == IOMode.read: + if not self._format.can_read(request): + raise InitializationError( + f"`{self._format.name}`" f" can not read `{source}`." + ) + else: + if not self._format.can_write(request): + raise InitializationError( + f"`{self._format.name}`" f" can not write to `{source}`." + ) + + def legacy_get_reader(self, **kwargs): + """legacy_get_reader(**kwargs) + + a utility method to provide support vor the V2.9 API + + Parameters + ---------- + kwargs : ... + Further keyword arguments are passed to the reader. See :func:`.help` + to see what arguments are available for a particular format. + """ + + # Note: this will break thread-safety + self._request._kwargs = kwargs + + # safeguard for DICOM plugin reading from folders + try: + assert Path(self._request.filename).is_dir() + except OSError: + pass # not a valid path on this OS + except AssertionError: + pass # not a folder + else: + return self._format.get_reader(self._request) + + self._request.get_file().seek(0) + return self._format.get_reader(self._request) + + def read(self, *, index=None, **kwargs): + """ + Parses the given URI and creates a ndarray from it. + + Parameters + ---------- + index : {integer, None} + If the URI contains a list of ndimages return the index-th + image. If None, stack all images into an ndimage along the + 0-th dimension (equivalent to np.stack(imgs, axis=0)). + kwargs : ... + Further keyword arguments are passed to the reader. See + :func:`.help` to see what arguments are available for a particular + format. + + Returns + ------- + ndimage : np.ndarray + A numpy array containing the decoded image data. + + """ + + if index is None: + index = _legacy_default_index(self._format) + + if index is Ellipsis: + img = np.stack([im for im in self.iter(**kwargs)]) + return img + + reader = self.legacy_get_reader(**kwargs) + return reader.get_data(index) + + def legacy_get_writer(self, **kwargs): + """legacy_get_writer(**kwargs) + + Returns a :class:`.Writer` object which can be used to write data + and meta data to the specified file. + + Parameters + ---------- + kwargs : ... + Further keyword arguments are passed to the writer. See :func:`.help` + to see what arguments are available for a particular format. + """ + + # Note: this will break thread-safety + self._request._kwargs = kwargs + return self._format.get_writer(self._request) + + def write(self, ndimage, *, is_batch=None, metadata=None, **kwargs): + """ + Write an ndimage to the URI specified in path. + + If the URI points to a file on the current host and the file does not + yet exist it will be created. If the file exists already, it will be + appended if possible; otherwise, it will be replaced. + + Parameters + ---------- + ndimage : numpy.ndarray + The ndimage or list of ndimages to write. + is_batch : bool + If True, treat the supplied ndimage as a batch of images. If False, + treat the supplied ndimage as a single image. If None, try to + determine ``is_batch`` from the ndimage's shape and ndim. + metadata : dict + The metadata passed to write alongside the image. + kwargs : ... + Further keyword arguments are passed to the writer. See + :func:`.help` to see what arguments are available for a + particular format. + + + Returns + ------- + buffer : bytes + When writing to the special target "", this function will + return the encoded image data as a bytes string. Otherwise it + returns None. + + Notes + ----- + Automatically determining ``is_batch`` may fail for some images due to + shape aliasing. For example, it may classify a channel-first color image + as a batch of gray images. In most cases this automatic deduction works + fine (it has for almost a decade), but if you do have one of those edge + cases (or are worried that you might) consider explicitly setting + ``is_batch``. + + """ + + if is_batch or isinstance(ndimage, (list, tuple)): + pass # ndimage is list of images + elif is_batch is False: + ndimage = [ndimage] + else: + # Write the largest possible block by guessing the meaning of each + # dimension from the shape/ndim and then checking if any batch + # dimensions are left. + ndimage = np.asanyarray(ndimage) + batch_dims = ndimage.ndim + + # two spatial dimensions + batch_dims = max(batch_dims - 2, 0) + + # packed (channel-last) image + if ndimage.ndim >= 3 and ndimage.shape[-1] < 5: + batch_dims = max(batch_dims - 1, 0) + + # format supports volumetric images + ext_infos = known_extensions.get(self._request.extension, list()) + for ext_info in ext_infos: + if self._format.name in ext_info.priority and ext_info.volume_support: + batch_dims = max(batch_dims - 1, 0) + break + + if batch_dims == 0: + ndimage = [ndimage] + + with self.legacy_get_writer(**kwargs) as writer: + for image in ndimage: + image = np.asanyarray(image) + + if image.ndim < 2: + raise ValueError( + "The image must have at least two spatial dimensions." + ) + + if not np.issubdtype(image.dtype, np.number) and not np.issubdtype( + image.dtype, bool + ): + raise ValueError( + f"All images have to be numeric, and not `{image.dtype}`." + ) + + writer.append_data(image, metadata) + + return writer.request.get_result() + + def iter(self, **kwargs): + """Iterate over a list of ndimages given by the URI + + Parameters + ---------- + kwargs : ... + Further keyword arguments are passed to the reader. See + :func:`.help` to see what arguments are available for a particular + format. + """ + + reader = self.legacy_get_reader(**kwargs) + for image in reader: + yield image + + def properties(self, index=None): + """Standardized ndimage metadata. + + Parameters + ---------- + index : int + The index of the ndimage for which to return properties. If the + index is out of bounds a ``ValueError`` is raised. If ``None``, + return the properties for the ndimage stack. If this is impossible, + e.g., due to shape mismatch, an exception will be raised. + + Returns + ------- + properties : ImageProperties + A dataclass filled with standardized image metadata. + + """ + + if index is None: + index = _legacy_default_index(self._format) + + # for backwards compatibility ... actually reads pixel data :( + if index is Ellipsis: + image = self.read(index=0) + n_images = self.legacy_get_reader().get_length() + return ImageProperties( + shape=(n_images, *image.shape), + dtype=image.dtype, + n_images=n_images, + is_batch=True, + ) + + image = self.read(index=index) + return ImageProperties( + shape=image.shape, + dtype=image.dtype, + is_batch=False, + ) + + def get_meta(self, *, index=None): + """Read ndimage metadata from the URI + + Parameters + ---------- + index : {integer, None} + If the URI contains a list of ndimages return the metadata + corresponding to the index-th image. If None, behavior depends on + the used api + + Legacy-style API: return metadata of the first element (index=0) + New-style API: Behavior depends on the used Plugin. + + Returns + ------- + metadata : dict + A dictionary of metadata. + + """ + + return self.metadata(index=index, exclude_applied=False) + + def metadata(self, index=None, exclude_applied: bool = True): + """Format-Specific ndimage metadata. + + Parameters + ---------- + index : int + The index of the ndimage to read. If the index is out of bounds a + ``ValueError`` is raised. If ``None``, global metadata is returned. + exclude_applied : bool + This parameter exists for compatibility and has no effect. Legacy + plugins always report all metadata they find. + + Returns + ------- + metadata : dict + A dictionary filled with format-specific metadata fields and their + values. + + """ + + if index is None: + index = _legacy_default_index(self._format) + + return self.legacy_get_reader().get_meta_data(index=index) + + def __del__(self) -> None: + pass + # turns out we can't close the file here for LegacyPlugin + # because it would break backwards compatibility + # with legacy_get_writer and legacy_get_reader + # self._request.finish() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/legacy_plugin_wrapper.pyi b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/legacy_plugin_wrapper.pyi new file mode 100644 index 0000000000000000000000000000000000000000..f551b2f84af0e7e6d70655d0334ff5ac0c130087 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/legacy_plugin_wrapper.pyi @@ -0,0 +1,27 @@ +import numpy as np +from typing import Optional, Dict, Any, Union, List, Iterator + +from .request import Request +from .v3_plugin_api import PluginV3, ImageProperties +from .format import Format +from ..typing import ArrayLike + +class LegacyPlugin(PluginV3): + def __init__(self, request: Request, legacy_plugin: Format) -> None: ... + def legacy_get_reader(self, **kwargs) -> Format.Reader: ... + def read(self, *, index: Optional[int] = 0, **kwargs) -> np.ndarray: ... + def legacy_get_writer(self, **kwargs) -> Format.Writer: ... + def write( + self, + ndimage: Union[ArrayLike, List[ArrayLike]], + *, + is_batch: bool = None, + **kwargs, + ) -> Optional[bytes]: ... + def iter(self, **kwargs) -> Iterator[np.ndarray]: ... + def properties(self, index: Optional[int] = 0) -> ImageProperties: ... + def get_meta(self, *, index: Optional[int] = 0) -> Dict[str, Any]: ... + def metadata( + self, index: Optional[int] = 0, exclude_applied: bool = True + ) -> Dict[str, Any]: ... + def __del__(self) -> None: ... diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/request.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/request.py new file mode 100644 index 0000000000000000000000000000000000000000..28c3626d4f0fa4c9e33a95417fccad19cec7f506 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/request.py @@ -0,0 +1,757 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +""" +Definition of the Request object, which acts as a kind of bridge between +what the user wants and what the plugins can. +""" + +import os +from io import BytesIO +import zipfile +import tempfile +import shutil +import enum +import warnings + +from ..core import urlopen, get_remote_file + +from pathlib import Path +from urllib.parse import urlparse +from typing import Optional + +# URI types +URI_BYTES = 1 +URI_FILE = 2 +URI_FILENAME = 3 +URI_ZIPPED = 4 +URI_HTTP = 5 +URI_FTP = 6 + + +class IOMode(str, enum.Enum): + """Available Image modes + + This is a helper enum for ``Request.Mode`` which is a composite of a + ``Request.ImageMode`` and ``Request.IOMode``. The IOMode that tells the + plugin if the resource should be read from or written to. Available values are + + - read ("r"): Read from the specified resource + - write ("w"): Write to the specified resource + + """ + + read = "r" + write = "w" + + +class ImageMode(str, enum.Enum): + """Available Image modes + + This is a helper enum for ``Request.Mode`` which is a composite of a + ``Request.ImageMode`` and ``Request.IOMode``. The image mode that tells the + plugin the desired (and expected) image shape. Available values are + + - single_image ("i"): Return a single image extending in two spacial + dimensions + - multi_image ("I"): Return a list of images extending in two spacial + dimensions + - single_volume ("v"): Return an image extending into multiple dimensions. + E.g. three spacial dimensions for image stacks, or two spatial and one + time dimension for videos + - multi_volume ("V"): Return a list of images extending into multiple + dimensions. + - any_mode ("?"): Return an image in any format (the plugin decides the + appropriate action). + + """ + + single_image = "i" + multi_image = "I" + single_volume = "v" + multi_volume = "V" + any_mode = "?" + + +@enum.unique +class Mode(str, enum.Enum): + """The mode to use when interacting with the resource + + ``Request.Mode`` is a composite of ``Request.ImageMode`` and + ``Request.IOMode``. The image mode that tells the plugin the desired (and + expected) image shape and the ``Request.IOMode`` tells the plugin the way + the resource should be interacted with. For a detailed description of the + available modes, see the documentation for ``Request.ImageMode`` and + ``Request.IOMode`` respectively. + + Available modes are all combinations of ``Request.IOMode`` and ``Request.ImageMode``: + + - read_single_image ("ri") + - read_multi_image ("rI") + - read_single_volume ("rv") + - read_multi_volume ("rV") + - read_any ("r?") + - write_single_image ("wi") + - write_multi_image ("wI") + - write_single_volume ("wv") + - write_multi_volume ("wV") + - write_any ("w?") + + Examples + -------- + >>> Request.Mode("rI") # a list of simple images should be read from the resource + >>> Request.Mode("wv") # a single volume should be written to the resource + + """ + + read_single_image = "ri" + read_multi_image = "rI" + read_single_volume = "rv" + read_multi_volume = "rV" + read_any = "r?" + write_single_image = "wi" + write_multi_image = "wI" + write_single_volume = "wv" + write_multi_volume = "wV" + write_any = "w?" + + @classmethod + def _missing_(cls, value): + """Enable Mode("r") and Mode("w") + + The sunder method ``_missing_`` is called whenever the constructor fails + to directly look up the corresponding enum value from the given input. + In our case, we use it to convert the modes "r" and "w" (from the v3 + API) into their legacy versions "r?" and "w?". + + More info on _missing_: + https://docs.python.org/3/library/enum.html#supported-sunder-names + """ + + if value == "r": + return cls("r?") + elif value == "w": + return cls("w?") + else: + raise ValueError(f"{value} is no valid Mode.") + + @property + def io_mode(self) -> IOMode: + return IOMode(self.value[0]) + + @property + def image_mode(self) -> ImageMode: + return ImageMode(self.value[1]) + + def __getitem__(self, key): + """For backwards compatibility with the old non-enum modes""" + if key == 0: + return self.io_mode + elif key == 1: + return self.image_mode + else: + raise IndexError(f"Mode has no item {key}") + + +SPECIAL_READ_URIS = "", "" + +# The user can use this string in a write call to get the data back as bytes. +RETURN_BYTES = "" + +# Example images that will be auto-downloaded +EXAMPLE_IMAGES = { + "astronaut.png": "Image of the astronaut Eileen Collins", + "camera.png": "A grayscale image of a photographer", + "checkerboard.png": "Black and white image of a chekerboard", + "wood.jpg": "A (repeatable) texture of wooden planks", + "bricks.jpg": "A (repeatable) texture of stone bricks", + "clock.png": "Photo of a clock with motion blur (Stefan van der Walt)", + "coffee.png": "Image of a cup of coffee (Rachel Michetti)", + "chelsea.png": "Image of Stefan's cat", + "wikkie.png": "Image of Almar's cat", + "coins.png": "Image showing greek coins from Pompeii", + "horse.png": "Image showing the silhouette of a horse (Andreas Preuss)", + "hubble_deep_field.png": "Photograph taken by Hubble telescope (NASA)", + "immunohistochemistry.png": "Immunohistochemical (IHC) staining", + "moon.png": "Image showing a portion of the surface of the moon", + "page.png": "A scanned page of text", + "text.png": "A photograph of handdrawn text", + "bacterial_colony.tif": "Multi-page TIFF image of a bacterial colony", + "calcium_imaging.tif": "Neuronal calcium imaging video", + "chelsea.zip": "The chelsea.png in a zipfile (for testing)", + "chelsea.bsdf": "The chelsea.png in a BSDF file(for testing)", + "newtonscradle.gif": "Animated GIF of a newton's cradle", + "cockatoo.mp4": "Video file of a cockatoo", + "cockatoo_yuv420.mp4": "Video file of a cockatoo with yuv420 pixel format", + "stent.npz": "Volumetric image showing a stented abdominal aorta", + "meadow_cube.jpg": "A cubemap image of a meadow, e.g. to render a skybox.", +} + + +class Request(object): + """ImageResource handling utility. + + Represents a request for reading or saving an image resource. This + object wraps information to that request and acts as an interface + for the plugins to several resources; it allows the user to read + from filenames, files, http, zipfiles, raw bytes, etc., but offer + a simple interface to the plugins via ``get_file()`` and + ``get_local_filename()``. + + For each read/write operation a single Request instance is used and passed + to the can_read/can_write method of a format, and subsequently to + the Reader/Writer class. This allows rudimentary passing of + information between different formats and between a format and + associated reader/writer. + + Parameters + ---------- + uri : {str, bytes, file} + The resource to load the image from. + mode : str + The first character is "r" or "w", indicating a read or write + request. The second character is used to indicate the kind of data: + "i" for an image, "I" for multiple images, "v" for a volume, + "V" for multiple volumes, "?" for don't care. + + """ + + def __init__(self, uri, mode, *, extension=None, format_hint: str = None, **kwargs): + # General + self.raw_uri = uri + self._uri_type = None + self._filename = None + self._extension = None + self._format_hint = None + self._kwargs = kwargs + self._result = None # Some write actions may have a result + + # To handle the user-side + self._filename_zip = None # not None if a zipfile is used + self._bytes = None # Incoming bytes + self._zipfile = None # To store a zipfile instance (if used) + + # To handle the plugin side + self._file = None # To store the file instance + self._file_is_local = False # whether the data needs to be copied at end + self._filename_local = None # not None if using tempfile on this FS + self._firstbytes = None # For easy header parsing + + # To store formats that may be able to fulfil this request + # self._potential_formats = [] + + # Check mode + try: + self._mode = Mode(mode) + except ValueError: + raise ValueError(f"Invalid Request.Mode: {mode}") + + # Parse what was given + self._parse_uri(uri) + + # Set extension + if extension is not None: + if extension[0] != ".": + raise ValueError( + "`extension` should be a file extension starting with a `.`," + f" but is `{extension}`." + ) + self._extension = extension + elif self._filename is not None: + if self._uri_type in (URI_FILENAME, URI_ZIPPED): + path = self._filename + else: + path = urlparse(self._filename).path + ext = Path(path).suffix.lower() + self._extension = ext if ext != "" else None + + if format_hint is not None: + warnings.warn( + "The usage of `format_hint` is deprecated and will be removed " + "in ImageIO v3. Use `extension` instead.", + DeprecationWarning, + ) + + if format_hint is not None and format_hint[0] != ".": + raise ValueError( + "`format_hint` should be a file extension starting with a `.`," + f" but is `{format_hint}`." + ) + + self.format_hint = format_hint + + def _parse_uri(self, uri): + """Try to figure our what we were given""" + is_read_request = self.mode.io_mode is IOMode.read + is_write_request = self.mode.io_mode is IOMode.write + + if isinstance(uri, str): + # Explicit + if uri.startswith("imageio:"): + if is_write_request: + raise RuntimeError("Cannot write to the standard images.") + fn = uri.split(":", 1)[-1].lower() + fn, _, zip_part = fn.partition(".zip/") + if zip_part: + fn += ".zip" + if fn not in EXAMPLE_IMAGES: + raise ValueError("Unknown standard image %r." % fn) + self._uri_type = URI_FILENAME + self._filename = get_remote_file("images/" + fn, auto=True) + if zip_part: + self._filename += "/" + zip_part + elif uri.startswith("http://") or uri.startswith("https://"): + self._uri_type = URI_HTTP + self._filename = uri + elif uri.startswith("ftp://") or uri.startswith("ftps://"): + self._uri_type = URI_FTP + self._filename = uri + elif uri.startswith("file://"): + self._uri_type = URI_FILENAME + self._filename = uri[7:] + elif uri.startswith(SPECIAL_READ_URIS) and is_read_request: + self._uri_type = URI_BYTES + self._filename = uri + elif uri.startswith(RETURN_BYTES) and is_write_request: + self._uri_type = URI_BYTES + self._filename = uri + else: + self._uri_type = URI_FILENAME + self._filename = uri + + elif isinstance(uri, memoryview) and is_read_request: + self._uri_type = URI_BYTES + self._filename = "" + self._bytes = uri.tobytes() + elif isinstance(uri, bytes) and is_read_request: + self._uri_type = URI_BYTES + self._filename = "" + self._bytes = uri + elif isinstance(uri, Path): + self._uri_type = URI_FILENAME + self._filename = str(uri) + # Files + elif is_read_request: + if hasattr(uri, "read") and hasattr(uri, "close"): + self._uri_type = URI_FILE + self._filename = "" + self._file = uri # Data must be read from here + elif is_write_request: + if hasattr(uri, "write") and hasattr(uri, "close"): + self._uri_type = URI_FILE + self._filename = "" + self._file = uri # Data must be written here + + # Expand user dir + if self._uri_type == URI_FILENAME and self._filename.startswith("~"): + self._filename = os.path.expanduser(self._filename) + + # Check if a zipfile + if self._uri_type == URI_FILENAME: + # Search for zip extension followed by a path separator + for needle in [".zip/", ".zip\\"]: + zip_i = self._filename.lower().find(needle) + if zip_i > 0: + zip_i += 4 + zip_path = self._filename[:zip_i] + if os.path.isdir(zip_path): + pass # is an existing dir (see #548) + elif is_write_request or os.path.isfile(zip_path): + self._uri_type = URI_ZIPPED + self._filename_zip = ( + zip_path, + self._filename[zip_i:].lstrip("/\\"), + ) + break + + # Check if we could read it + if self._uri_type is None: + uri_r = repr(uri) + if len(uri_r) > 60: + uri_r = uri_r[:57] + "..." + raise IOError("Cannot understand given URI: %s." % uri_r) + + # Check if this is supported + noWriting = [URI_HTTP, URI_FTP] + if is_write_request and self._uri_type in noWriting: + raise IOError("imageio does not support writing to http/ftp.") + + # Deprecated way to load standard images, give a sensible error message + if is_read_request and self._uri_type in [URI_FILENAME, URI_ZIPPED]: + fn = self._filename + if self._filename_zip: + fn = self._filename_zip[0] + if (not os.path.exists(fn)) and (fn in EXAMPLE_IMAGES): + raise IOError( + "No such file: %r. This file looks like one of " + "the standard images, but from imageio 2.1, " + "standard images have to be specified using " + '"imageio:%s".' % (fn, fn) + ) + + # Make filename absolute + if self._uri_type in [URI_FILENAME, URI_ZIPPED]: + if self._filename_zip: + self._filename_zip = ( + os.path.abspath(self._filename_zip[0]), + self._filename_zip[1], + ) + else: + self._filename = os.path.abspath(self._filename) + + # Check whether file name is valid + if self._uri_type in [URI_FILENAME, URI_ZIPPED]: + fn = self._filename + if self._filename_zip: + fn = self._filename_zip[0] + if is_read_request: + # Reading: check that the file exists (but is allowed a dir) + if not os.path.exists(fn): + raise FileNotFoundError("No such file: '%s'" % fn) + else: + # Writing: check that the directory to write to does exist + dn = os.path.dirname(fn) + if not os.path.exists(dn): + raise FileNotFoundError("The directory %r does not exist" % dn) + + @property + def filename(self): + """Name of the ImageResource. + + + The uri for which reading/saving was requested. This + can be a filename, an http address, or other resource + identifier. Do not rely on the filename to obtain the data, + but use ``get_file()`` or ``get_local_filename()`` instead. + """ + return self._filename + + @property + def extension(self) -> str: + """The (lowercase) extension of the requested filename. + Suffixes in url's are stripped. Can be None if the request is + not based on a filename. + """ + return self._extension + + @property + def format_hint(self) -> Optional[str]: + return self._format_hint + + @format_hint.setter + def format_hint(self, format: str) -> None: + self._format_hint = format + if self._extension is None: + self._extension = format + + @property + def mode(self): + """The mode of the request. The first character is "r" or "w", + indicating a read or write request. The second character is + used to indicate the kind of data: + "i" for an image, "I" for multiple images, "v" for a volume, + "V" for multiple volumes, "?" for don't care. + """ + return self._mode + + @property + def kwargs(self): + """The dict of keyword arguments supplied by the user.""" + return self._kwargs + + # For obtaining data + + def get_file(self): + """get_file() + Get a file object for the resource associated with this request. + If this is a reading request, the file is in read mode, + otherwise in write mode. This method is not thread safe. Plugins + should not close the file when done. + + This is the preferred way to read/write the data. But if a + format cannot handle file-like objects, they should use + ``get_local_filename()``. + """ + want_to_write = self.mode.io_mode is IOMode.write + + # Is there already a file? + # Either _uri_type == URI_FILE, or we already opened the file, + # e.g. by using firstbytes + if self._file is not None: + return self._file + + if self._uri_type == URI_BYTES: + if want_to_write: + # Create new file object, we catch the bytes in finish() + self._file = BytesIO() + self._file_is_local = True + else: + self._file = BytesIO(self._bytes) + + elif self._uri_type == URI_FILENAME: + if want_to_write: + self._file = open(self.filename, "wb") + else: + self._file = open(self.filename, "rb") + + elif self._uri_type == URI_ZIPPED: + # Get the correct filename + filename, name = self._filename_zip + if want_to_write: + # Create new file object, we catch the bytes in finish() + self._file = BytesIO() + self._file_is_local = True + else: + # Open zipfile and open new file object for specific file + self._zipfile = zipfile.ZipFile(filename, "r") + self._file = self._zipfile.open(name, "r") + self._file = SeekableFileObject(self._file) + + elif self._uri_type in [URI_HTTP or URI_FTP]: + assert not want_to_write # This should have been tested in init + timeout = os.getenv("IMAGEIO_REQUEST_TIMEOUT") + if timeout is None or not timeout.isdigit(): + timeout = 5 + self._file = urlopen(self.filename, timeout=float(timeout)) + self._file = SeekableFileObject(self._file) + + return self._file + + def get_local_filename(self): + """get_local_filename() + If the filename is an existing file on this filesystem, return + that. Otherwise a temporary file is created on the local file + system which can be used by the format to read from or write to. + """ + + if self._uri_type == URI_FILENAME: + return self._filename + else: + # Get filename + if self.extension is not None: + ext = self.extension + else: + ext = os.path.splitext(self._filename)[1] + fd, self._filename_local = tempfile.mkstemp(ext, "imageio_") + os.close(fd) + # Write stuff to it? + if self.mode.io_mode == IOMode.read: + with open(self._filename_local, "wb") as file: + shutil.copyfileobj(self.get_file(), file) + return self._filename_local + + def finish(self) -> None: + """Wrap up this request. + + Finishes any pending reads or writes, closes any open files and frees + any resources allocated by this request. + """ + + if self.mode.io_mode == IOMode.write: + # See if we "own" the data and must put it somewhere + bytes = None + if self._filename_local: + bytes = Path(self._filename_local).read_bytes() + elif self._file_is_local: + self._file_is_local = False + bytes = self._file.getvalue() + + # Put the data in the right place + if bytes is not None: + if self._uri_type == URI_BYTES: + self._result = bytes # Picked up by imread function + elif self._uri_type == URI_FILE: + self._file.write(bytes) + elif self._uri_type == URI_ZIPPED: + zf = zipfile.ZipFile(self._filename_zip[0], "a") + zf.writestr(self._filename_zip[1], bytes) + zf.close() + # elif self._uri_type == URI_FILENAME: -> is always direct + # elif self._uri_type == URI_FTP/HTTP: -> write not supported + + # Close open files that we know of (and are responsible for) + if self._file and self._uri_type != URI_FILE: + self._file.close() + self._file = None + if self._zipfile: + self._zipfile.close() + self._zipfile = None + + # Remove temp file + if self._filename_local: + try: + os.remove(self._filename_local) + except Exception: # pragma: no cover + warnings.warn( + "Failed to delete the temporary file at " + f"`{self._filename_local}`. Please report this issue." + ) + self._filename_local = None + + # Detach so gc can clean even if a reference of self lingers + self._bytes = None + + def get_result(self): + """For internal use. In some situations a write action can have + a result (bytes data). That is obtained with this function. + """ + # Is there a reason to disallow reading multiple times? + self._result, res = None, self._result + return res + + @property + def firstbytes(self): + """The first 256 bytes of the file. These can be used to + parse the header to determine the file-format. + """ + if self._firstbytes is None: + self._read_first_bytes() + return self._firstbytes + + def _read_first_bytes(self, N=256): + if self._bytes is not None: + self._firstbytes = self._bytes[:N] + else: + # Prepare + try: + f = self.get_file() + except IOError: + if os.path.isdir(self.filename): # A directory, e.g. for DICOM + self._firstbytes = bytes() + return + raise + try: + i = f.tell() + except Exception: + i = None + # Read + self._firstbytes = read_n_bytes(f, N) + # Set back + try: + if i is None: + raise Exception("cannot seek with None") + f.seek(i) + except Exception: + # Prevent get_file() from reusing the file + self._file = None + # If the given URI was a file object, we have a problem, + if self._uri_type == URI_FILE: + raise IOError("Cannot seek back after getting firstbytes!") + + +def read_n_bytes(f, N): + """read_n_bytes(file, n) + + Read n bytes from the given file, or less if the file has less + bytes. Returns zero bytes if the file is closed. + """ + bb = bytes() + while len(bb) < N: + extra_bytes = f.read(N - len(bb)) + if not extra_bytes: + break + bb += extra_bytes + return bb + + +class SeekableFileObject: + """A readonly wrapper file object that add support for seeking, even if + the wrapped file object does not. The allows us to stream from http and + still use Pillow. + """ + + def __init__(self, f): + self.f = f + self._i = 0 # >=0 but can exceed buffer + self._buffer = b"" + self._have_all = False + self.closed = False + + def read(self, n=None): + # Fix up n + if n is None: + pass + else: + n = int(n) + if n < 0: + n = None + + # Can and must we read more? + if not self._have_all: + more = b"" + if n is None: + more = self.f.read() + self._have_all = True + else: + want_i = self._i + n + want_more = want_i - len(self._buffer) + if want_more > 0: + more = self.f.read(want_more) + if len(more) < want_more: + self._have_all = True + self._buffer += more + + # Read data from buffer and update pointer + if n is None: + res = self._buffer[self._i :] + else: + res = self._buffer[self._i : self._i + n] + self._i += len(res) + + return res + + def readline(self): + yield from self._file.readline() + + def tell(self): + return self._i + + def seek(self, i, mode=0): + # Mimic BytesIO behavior + + # Get the absolute new position + i = int(i) + if mode == 0: + if i < 0: + raise ValueError("negative seek value " + str(i)) + real_i = i + elif mode == 1: + real_i = max(0, self._i + i) # negative ok here + elif mode == 2: + if not self._have_all: + self.read() + real_i = max(0, len(self._buffer) + i) + else: + raise ValueError("invalid whence (%s, should be 0, 1 or 2)" % i) + + # Read some? + if real_i <= len(self._buffer): + pass # no need to read + elif not self._have_all: + assert real_i > self._i # if we don't have all, _i cannot be > _buffer + self.read(real_i - self._i) # sets self._i + + self._i = real_i + return self._i + + def close(self): + self.closed = True + self.f.close() + + def isatty(self): + return False + + def seekable(self): + return True + + +class InitializationError(Exception): + """The plugin could not initialize from the given request. + + This is a _internal_ error that is raised by plugins that fail to handle + a given request. We use this to differentiate incompatibility between + a plugin and a request from an actual error/bug inside a plugin. + + """ + + pass diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/request.pyi b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/request.pyi new file mode 100644 index 0000000000000000000000000000000000000000..6dee65a984baa96647d682111daa533805a77bb4 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/request.pyi @@ -0,0 +1,90 @@ +from typing import BinaryIO, Optional, Dict, Any, Sequence, overload, Literal +from ..typing import ImageResource +import enum + +EXAMPLE_IMAGES: Dict[str, str] +RETURN_BYTES = "" +URI_BYTES = 1 +URI_FILE = 2 +URI_FILENAME = 3 +URI_ZIPPED = 4 +URI_HTTP = 5 +URI_FTP = 6 + +class IOMode(str, enum.Enum): + read = "r" + write = "w" + +class ImageMode(str, enum.Enum): + single_image = "i" + multi_image = "I" + single_volume = "v" + multi_volume = "V" + any_mode = "?" + +@enum.unique +class Mode(str, enum.Enum): + read_single_image = "ri" + read_multi_image = "rI" + read_single_volume = "rv" + read_multi_volume = "rV" + read_any = "r?" + write_single_image = "wi" + write_multi_image = "wI" + write_single_volume = "wv" + write_multi_volume = "wV" + write_any = "w?" + + @classmethod + def _missing_(cls, value: Any) -> Mode: ... + @property + def io_mode(self) -> IOMode: ... + @property + def image_mode(self) -> ImageMode: ... + +class InitializationError(Exception): ... + +class Request(object): + _uri_type: int + raw_uri: ImageResource + + @property + def filename(self) -> str: ... + @property + def extension(self) -> str: ... + @property + def format_hint(self) -> Optional[str]: ... + @format_hint.setter + def format_hint(self, format: str) -> None: ... + @property + def mode(self) -> Mode: ... + @property + def kwargs(self) -> Dict[str, Any]: ... + @property + def firstbytes(self) -> bytes: ... + def __init__( + self, + uri: ImageResource, + mode: str, + *, + extension: str = None, + format_hint: str = None, + **kwargs, + ) -> None: ... + def _parse_uri(self, uri: ImageResource) -> None: ... + def get_file(self) -> BinaryIO: ... + def get_local_filename(self) -> str: ... + def finish(self) -> None: ... + def get_result(self) -> Optional[bytes]: ... + def _read_first_bytes(self, N: int = 256) -> bytes: ... + +def read_n_bytes(f: BinaryIO, N: int) -> bytes: ... + +class SeekableFileObject: + def __init__(self, f: BinaryIO) -> None: ... + def read(self, n: int = None) -> bytes: ... + def tell(self) -> int: ... + def seek(self, i: int, mode: int = 0) -> int: ... + def close(self) -> None: ... + def isatty(self) -> bool: ... + def seekable(self) -> bool: ... diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/util.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/util.py new file mode 100644 index 0000000000000000000000000000000000000000..9bd9e3b65260e1dd7261dc7339c5e3a5255996a8 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/util.py @@ -0,0 +1,539 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +""" +Various utilities for imageio +""" + + +from collections import OrderedDict +import numpy as np +import os +import re +import struct +import sys +import time +import logging + + +logger = logging.getLogger("imageio") + +IS_PYPY = "__pypy__" in sys.builtin_module_names +THIS_DIR = os.path.abspath(os.path.dirname(__file__)) + + +def urlopen(*args, **kwargs): + """Compatibility function for the urlopen function. Raises an + RuntimeError if urlopen could not be imported (which can occur in + frozen applications. + """ + try: + from urllib.request import urlopen + except ImportError: + raise RuntimeError("Could not import urlopen.") + return urlopen(*args, **kwargs) + + +def _precision_warn(p1, p2, extra=""): + t = ( + "Lossy conversion from {} to {}. {} Convert image to {} prior to " + "saving to suppress this warning." + ) + logger.warning(t.format(p1, p2, extra, p2)) + + +def image_as_uint(im, bitdepth=None): + """Convert the given image to uint (default: uint8) + + If the dtype already matches the desired format, it is returned + as-is. If the image is float, and all values are between 0 and 1, + the values are multiplied by np.power(2.0, bitdepth). In all other + situations, the values are scaled such that the minimum value + becomes 0 and the maximum value becomes np.power(2.0, bitdepth)-1 + (255 for 8-bit and 65535 for 16-bit). + """ + if not bitdepth: + bitdepth = 8 + if not isinstance(im, np.ndarray): + raise ValueError("Image must be a numpy array") + if bitdepth == 8: + out_type = np.uint8 + elif bitdepth == 16: + out_type = np.uint16 + else: + raise ValueError("Bitdepth must be either 8 or 16") + dtype_str1 = str(im.dtype) + dtype_str2 = out_type.__name__ + if (im.dtype == np.uint8 and bitdepth == 8) or ( + im.dtype == np.uint16 and bitdepth == 16 + ): + # Already the correct format? Return as-is + return im + if dtype_str1.startswith("float") and np.nanmin(im) >= 0 and np.nanmax(im) <= 1: + _precision_warn(dtype_str1, dtype_str2, "Range [0, 1].") + im = im.astype(np.float64) * (np.power(2.0, bitdepth) - 1) + 0.499999999 + elif im.dtype == np.uint16 and bitdepth == 8: + _precision_warn(dtype_str1, dtype_str2, "Losing 8 bits of resolution.") + im = np.right_shift(im, 8) + elif im.dtype == np.uint32: + _precision_warn( + dtype_str1, + dtype_str2, + "Losing {} bits of resolution.".format(32 - bitdepth), + ) + im = np.right_shift(im, 32 - bitdepth) + elif im.dtype == np.uint64: + _precision_warn( + dtype_str1, + dtype_str2, + "Losing {} bits of resolution.".format(64 - bitdepth), + ) + im = np.right_shift(im, 64 - bitdepth) + else: + mi = np.nanmin(im) + ma = np.nanmax(im) + if not np.isfinite(mi): + raise ValueError("Minimum image value is not finite") + if not np.isfinite(ma): + raise ValueError("Maximum image value is not finite") + if ma == mi: + return im.astype(out_type) + _precision_warn(dtype_str1, dtype_str2, "Range [{}, {}].".format(mi, ma)) + # Now make float copy before we scale + im = im.astype("float64") + # Scale the values between 0 and 1 then multiply by the max value + im = (im - mi) / (ma - mi) * (np.power(2.0, bitdepth) - 1) + 0.499999999 + assert np.nanmin(im) >= 0 + assert np.nanmax(im) < np.power(2.0, bitdepth) + return im.astype(out_type) + + +class Array(np.ndarray): + """Array(array, meta=None) + + A subclass of np.ndarray that has a meta attribute. Get the dictionary + that contains the meta data using ``im.meta``. Convert to a plain numpy + array using ``np.asarray(im)``. + + """ + + def __new__(cls, array, meta=None): + # Check + if not isinstance(array, np.ndarray): + raise ValueError("Array expects a numpy array.") + if not (meta is None or isinstance(meta, dict)): + raise ValueError("Array expects meta data to be a dict.") + # Convert and return + meta = meta if meta is not None else getattr(array, "meta", {}) + try: + ob = array.view(cls) + except AttributeError: # pragma: no cover + # Just return the original; no metadata on the array in Pypy! + return array + ob._copy_meta(meta) + return ob + + def _copy_meta(self, meta): + """Make a 2-level deep copy of the meta dictionary.""" + self._meta = Dict() + for key, val in meta.items(): + if isinstance(val, dict): + val = Dict(val) # Copy this level + self._meta[key] = val + + @property + def meta(self): + """The dict with the meta data of this image.""" + return self._meta + + def __array_finalize__(self, ob): + """So the meta info is maintained when doing calculations with + the array. + """ + if isinstance(ob, Array): + self._copy_meta(ob.meta) + else: + self._copy_meta({}) + + def __array_wrap__(self, out, context=None): + """So that we return a native numpy array (or scalar) when a + reducting ufunc is applied (such as sum(), std(), etc.) + """ + if not out.shape: + return out.dtype.type(out) # Scalar + elif out.shape != self.shape: + return out.view(type=np.ndarray) + elif not isinstance(out, Array): + return Array(out, self.meta) + else: + return out # Type Array + + +Image = Array # Alias for backwards compatibility + + +def asarray(a): + """Pypy-safe version of np.asarray. Pypy's np.asarray consumes a + *lot* of memory if the given array is an ndarray subclass. This + function does not. + """ + if isinstance(a, np.ndarray): + if IS_PYPY: # pragma: no cover + a = a.copy() # pypy has issues with base views + plain = a.view(type=np.ndarray) + return plain + return np.asarray(a) + + +class Dict(OrderedDict): + """A dict in which the keys can be get and set as if they were + attributes. Very convenient in combination with autocompletion. + + This Dict still behaves as much as possible as a normal dict, and + keys can be anything that are otherwise valid keys. However, + keys that are not valid identifiers or that are names of the dict + class (such as 'items' and 'copy') cannot be get/set as attributes. + """ + + __reserved_names__ = dir(OrderedDict()) # Also from OrderedDict + __pure_names__ = dir(dict()) + + def __getattribute__(self, key): + try: + return object.__getattribute__(self, key) + except AttributeError: + if key in self: + return self[key] + else: + raise + + def __setattr__(self, key, val): + if key in Dict.__reserved_names__: + # Either let OrderedDict do its work, or disallow + if key not in Dict.__pure_names__: + return OrderedDict.__setattr__(self, key, val) + else: + raise AttributeError( + "Reserved name, this key can only " + + "be set via ``d[%r] = X``" % key + ) + else: + # if isinstance(val, dict): val = Dict(val) -> no, makes a copy! + self[key] = val + + def __dir__(self): + def isidentifier(x): + return bool(re.match(r"[a-z_]\w*$", x, re.I)) + + names = [k for k in self.keys() if (isinstance(k, str) and isidentifier(k))] + return Dict.__reserved_names__ + names + + +class BaseProgressIndicator(object): + """BaseProgressIndicator(name) + + A progress indicator helps display the progress of a task to the + user. Progress can be pending, running, finished or failed. + + Each task has: + * a name - a short description of what needs to be done. + * an action - the current action in performing the task (e.g. a subtask) + * progress - how far the task is completed + * max - max number of progress units. If 0, the progress is indefinite + * unit - the units in which the progress is counted + * status - 0: pending, 1: in progress, 2: finished, 3: failed + + This class defines an abstract interface. Subclasses should implement + _start, _stop, _update_progress(progressText), _write(message). + """ + + def __init__(self, name): + self._name = name + self._action = "" + self._unit = "" + self._max = 0 + self._status = 0 + self._last_progress_update = 0 + + def start(self, action="", unit="", max=0): + """start(action='', unit='', max=0) + + Start the progress. Optionally specify an action, a unit, + and a maximum progress value. + """ + if self._status == 1: + self.finish() + self._action = action + self._unit = unit + self._max = max + # + self._progress = 0 + self._status = 1 + self._start() + + def status(self): + """status() + + Get the status of the progress - 0: pending, 1: in progress, + 2: finished, 3: failed + """ + return self._status + + def set_progress(self, progress=0, force=False): + """set_progress(progress=0, force=False) + + Set the current progress. To avoid unnecessary progress updates + this will only have a visual effect if the time since the last + update is > 0.1 seconds, or if force is True. + """ + self._progress = progress + # Update or not? + if not (force or (time.time() - self._last_progress_update > 0.1)): + return + self._last_progress_update = time.time() + # Compose new string + unit = self._unit or "" + progressText = "" + if unit == "%": + progressText = "%2.1f%%" % progress + elif self._max > 0: + percent = 100 * float(progress) / self._max + progressText = "%i/%i %s (%2.1f%%)" % (progress, self._max, unit, percent) + elif progress > 0: + if isinstance(progress, float): + progressText = "%0.4g %s" % (progress, unit) + else: + progressText = "%i %s" % (progress, unit) + # Update + self._update_progress(progressText) + + def increase_progress(self, extra_progress): + """increase_progress(extra_progress) + + Increase the progress by a certain amount. + """ + self.set_progress(self._progress + extra_progress) + + def finish(self, message=None): + """finish(message=None) + + Finish the progress, optionally specifying a message. This will + not set the progress to the maximum. + """ + self.set_progress(self._progress, True) # fore update + self._status = 2 + self._stop() + if message is not None: + self._write(message) + + def fail(self, message=None): + """fail(message=None) + + Stop the progress with a failure, optionally specifying a message. + """ + self.set_progress(self._progress, True) # fore update + self._status = 3 + self._stop() + message = "FAIL " + (message or "") + self._write(message) + + def write(self, message): + """write(message) + + Write a message during progress (such as a warning). + """ + if self.__class__ == BaseProgressIndicator: + # When this class is used as a dummy, print explicit message + print(message) + else: + return self._write(message) + + # Implementing classes should implement these + + def _start(self): + pass + + def _stop(self): + pass + + def _update_progress(self, progressText): + pass + + def _write(self, message): + pass + + +class StdoutProgressIndicator(BaseProgressIndicator): + """StdoutProgressIndicator(name) + + A progress indicator that shows the progress in stdout. It + assumes that the tty can appropriately deal with backspace + characters. + """ + + def _start(self): + self._chars_prefix, self._chars = "", "" + # Write message + if self._action: + self._chars_prefix = "%s (%s): " % (self._name, self._action) + else: + self._chars_prefix = "%s: " % self._name + sys.stdout.write(self._chars_prefix) + sys.stdout.flush() + + def _update_progress(self, progressText): + # If progress is unknown, at least make something move + if not progressText: + i1, i2, i3, i4 = "-\\|/" + M = {i1: i2, i2: i3, i3: i4, i4: i1} + progressText = M.get(self._chars, i1) + # Store new string and write + delChars = "\b" * len(self._chars) + self._chars = progressText + sys.stdout.write(delChars + self._chars) + sys.stdout.flush() + + def _stop(self): + self._chars = self._chars_prefix = "" + sys.stdout.write("\n") + sys.stdout.flush() + + def _write(self, message): + # Write message + delChars = "\b" * len(self._chars_prefix + self._chars) + sys.stdout.write(delChars + " " + message + "\n") + # Reprint progress text + sys.stdout.write(self._chars_prefix + self._chars) + sys.stdout.flush() + + +# From pyzolib/paths.py (https://bitbucket.org/pyzo/pyzolib/src/tip/paths.py) +def appdata_dir(appname=None, roaming=False): + """appdata_dir(appname=None, roaming=False) + + Get the path to the application directory, where applications are allowed + to write user specific files (e.g. configurations). For non-user specific + data, consider using common_appdata_dir(). + If appname is given, a subdir is appended (and created if necessary). + If roaming is True, will prefer a roaming directory (Windows Vista/7). + """ + + # Define default user directory + userDir = os.getenv("IMAGEIO_USERDIR", None) + if userDir is None: + userDir = os.path.expanduser("~") + if not os.path.isdir(userDir): # pragma: no cover + userDir = "/var/tmp" # issue #54 + + # Get system app data dir + path = None + if sys.platform.startswith("win"): + path1, path2 = os.getenv("LOCALAPPDATA"), os.getenv("APPDATA") + path = (path2 or path1) if roaming else (path1 or path2) + elif sys.platform.startswith("darwin"): + path = os.path.join(userDir, "Library", "Application Support") + # On Linux and as fallback + if not (path and os.path.isdir(path)): + path = userDir + + # Maybe we should store things local to the executable (in case of a + # portable distro or a frozen application that wants to be portable) + prefix = sys.prefix + if getattr(sys, "frozen", None): + prefix = os.path.abspath(os.path.dirname(sys.executable)) + for reldir in ("settings", "../settings"): + localpath = os.path.abspath(os.path.join(prefix, reldir)) + if os.path.isdir(localpath): # pragma: no cover + try: + open(os.path.join(localpath, "test.write"), "wb").close() + os.remove(os.path.join(localpath, "test.write")) + except IOError: + pass # We cannot write in this directory + else: + path = localpath + break + + # Get path specific for this app + if appname: + if path == userDir: + appname = "." + appname.lstrip(".") # Make it a hidden directory + path = os.path.join(path, appname) + if not os.path.isdir(path): # pragma: no cover + os.makedirs(path, exist_ok=True) + + # Done + return path + + +def resource_dirs(): + """resource_dirs() + + Get a list of directories where imageio resources may be located. + The first directory in this list is the "resources" directory in + the package itself. The second directory is the appdata directory + (~/.imageio on Linux). The list further contains the application + directory (for frozen apps), and may include additional directories + in the future. + """ + dirs = [resource_package_dir()] + # Resource dir baked in the package. + # Appdata directory + try: + dirs.append(appdata_dir("imageio")) + except Exception: # pragma: no cover + pass # The home dir may not be writable + # Directory where the app is located (mainly for frozen apps) + if getattr(sys, "frozen", None): + dirs.append(os.path.abspath(os.path.dirname(sys.executable))) + elif sys.path and sys.path[0]: + dirs.append(os.path.abspath(sys.path[0])) + return dirs + + +def resource_package_dir(): + """package_dir + + Get the resources directory in the imageio package installation + directory. + + Notes + ----- + This is a convenience method that is used by `resource_dirs` and + imageio entry point scripts. + """ + import importlib.resources + + return str(importlib.resources.files("imageio") / "resources") + + +def get_platform(): + """get_platform() + + Get a string that specifies the platform more specific than + sys.platform does. The result can be: linux32, linux64, win32, + win64, osx32, osx64. Other platforms may be added in the future. + """ + # Get platform + if sys.platform.startswith("linux"): + plat = "linux%i" + elif sys.platform.startswith("win"): + plat = "win%i" + elif sys.platform.startswith("darwin"): + plat = "osx%i" + elif sys.platform.startswith("freebsd"): + plat = "freebsd%i" + else: # pragma: no cover + return None + + return plat % (struct.calcsize("P") * 8) # 32 or 64 bits + + +def has_module(module_name): + """Check to see if a python module is available.""" + import importlib + + name_parts = module_name.split(".") + for i in range(len(name_parts)): + if importlib.util.find_spec(".".join(name_parts[: i + 1])) is None: + return False + return True diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/v3_plugin_api.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/v3_plugin_api.py new file mode 100644 index 0000000000000000000000000000000000000000..871d1945e5412d0963fa59fe04046f1ece148591 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/core/v3_plugin_api.py @@ -0,0 +1,370 @@ +from . import Request +from ..typing import ArrayLike +import numpy as np +from typing import Optional, Dict, Any, Tuple, Union, List, Iterator +from dataclasses import dataclass + + +@dataclass +class ImageProperties: + """Standardized Metadata + + ImageProperties represent a set of standardized metadata that is available + under the same name for every supported format. If the ImageResource (or + format) does not specify the value, a sensible default value is chosen + instead. + + Attributes + ---------- + shape : Tuple[int, ...] + The shape of the loaded ndimage. + dtype : np.dtype + The dtype of the loaded ndimage. + n_images : int + Number of images in the file if ``index=...``, `None` for single images. + is_batch : bool + If True, the first dimension of the ndimage represents a batch dimension + along which several images are stacked. + spacing : Tuple + A tuple describing the spacing between pixels along each axis of the + ndimage. If the spacing is uniform along an axis the value corresponding + to that axis is a single float. If the spacing is non-uniform, the value + corresponding to that axis is a tuple in which the i-th element + indicates the spacing between the i-th and (i+1)-th pixel along that + axis. + + """ + + shape: Tuple[int, ...] + dtype: np.dtype + n_images: Optional[int] = None + is_batch: bool = False + spacing: Optional[tuple] = None + + +class PluginV3: + """A ImageIO Plugin. + + This is an abstract plugin that documents the v3 plugin API interface. A + plugin is an adapter/wrapper around a backend that converts a request from + iio.core (e.g., read an image from file) into a sequence of instructions for + the backend that fulfill the request. + + Plugin authors may choose to subclass this class when implementing a new + plugin, but aren't obliged to do so. As long as the plugin class implements + the interface (methods) described below the ImageIO core will treat it just + like any other plugin. + + + Parameters + ---------- + request : iio.Request + A request object that represents the users intent. It provides a + standard interface to access the various ImageResources and serves them + to the plugin as a file object (or file). Check the docs for details. + **kwargs : Any + Additional configuration arguments for the plugin or backend. Usually + these match the configuration arguments available on the backend and + are forwarded to it. + + + Raises + ------ + InitializationError + During ``__init__`` the plugin tests if it can fulfill the request. If + it can't, e.g., because the request points to a file in the wrong + format, then it should raise an ``InitializationError`` and provide a + reason for failure. This reason may be reported to the user. + ImportError + Plugins will be imported dynamically when listed in + ``iio.config.known_plugins`` to fulfill requests. This way, users only + have to load plugins/backends they actually use. If this plugin's backend + is not installed, it should raise an ``ImportError`` either during + module import or during class construction. + + Notes + ----- + Upon successful construction the plugin takes ownership of the provided + request. This means that it is the plugin's responsibility to call + request.finish() to close the resource when it is no longer needed. + + Plugins _must_ implement a context manager that closes and cleans any + resources held by the plugin upon exit. + + """ + + def __init__(self, request: Request) -> None: + """Initialize a new Plugin Instance. + + See Plugin's docstring for detailed documentation. + + Notes + ----- + The implementation here stores the request as a local variable that is + exposed using a @property below. If you inherit from PluginV3, remember + to call ``super().__init__(request)``. + + """ + + self._request = request + + def read(self, *, index: int = 0) -> np.ndarray: + """Read a ndimage. + + The ``read`` method loads a (single) ndimage, located at ``index`` from + the requested ImageResource. + + It is at the plugin's descretion to decide (and document) what + constitutes a single ndimage. A sensible way to make this decision is to + choose based on the ImageResource's format and on what users will expect + from such a format. For example, a sensible choice for a TIFF file + produced by an ImageJ hyperstack is to read it as a volumetric ndimage + (1 color dimension followed by 3 spatial dimensions). On the other hand, + a sensible choice for a MP4 file produced by Davinci Resolve is to treat + each frame as a ndimage (2 spatial dimensions followed by 1 color + dimension). + + The value ``index=None`` is special. It requests the plugin to load all + ndimages in the file and stack them along a new first axis. For example, + if a MP4 file is read with ``index=None`` and the plugin identifies + single frames as ndimages, then the plugin should read all frames and + stack them into a new ndimage which now contains a time axis as its + first axis. If a PNG file (single image format) is read with + ``index=None`` the plugin does a very similar thing: It loads all + ndimages in the file (here it's just one) and stacks them along a new + first axis, effectively prepending an axis with size 1 to the image. If + a plugin does not wish to support ``index=None`` it should set a more + sensible default and raise a ``ValueError`` when requested to read using + ``index=None``. + + Parameters + ---------- + index : int + If the ImageResource contains multiple ndimages, and index is an + integer, select the index-th ndimage from among them and return it. + If index is an ellipsis (...), read all ndimages in the file and + stack them along a new batch dimension. If index is None, let the + plugin decide. If the index is out of bounds a ``ValueError`` is + raised. + **kwargs : Any + The read method may accept any number of plugin-specific keyword + arguments to further customize the read behavior. Usually these + match the arguments available on the backend and are forwarded to + it. + + Returns + ------- + ndimage : np.ndarray + A ndimage containing decoded pixel data (sometimes called bitmap). + + Notes + ----- + The ImageResource from which the plugin should read is managed by the + provided request object. Directly accessing the managed ImageResource is + _not_ permitted. Instead, you can get FileLike access to the + ImageResource via request.get_file(). + + If the backend doesn't support reading from FileLike objects, you can + request a temporary file to pass to the backend via + ``request.get_local_filename()``. This is, however, not very performant + (involves copying the Request's content into a temporary file), so you + should avoid doing this whenever possible. Consider it a fallback method + in case all else fails. + + """ + raise NotImplementedError() + + def write(self, ndimage: Union[ArrayLike, List[ArrayLike]]) -> Optional[bytes]: + """Write a ndimage to a ImageResource. + + The ``write`` method encodes the given ndimage into the format handled + by the backend and writes it to the ImageResource. It overwrites + any content that may have been previously stored in the file. + + If the backend supports only a single format then it must check if + the ImageResource matches that format and raise an exception if not. + Typically, this should be done during initialization in the form of a + ``InitializationError``. + + If the backend supports more than one format it must determine the + requested/desired format. Usually this can be done by inspecting the + ImageResource (e.g., by checking ``request.extension``), or by providing + a mechanism to explicitly set the format (perhaps with a - sensible - + default value). If the plugin can not determine the desired format, it + **must not** write to the ImageResource, but raise an exception instead. + + If the backend supports at least one format that can hold multiple + ndimages it should be capable of handling ndimage batches and lists of + ndimages. If the ``ndimage`` input is a list of ndimages, the plugin + should not assume that the ndimages are not stackable, i.e., ndimages + may have different shapes. Otherwise, the ``ndimage`` may be a batch of + multiple ndimages stacked along the first axis of the array. The plugin + must be able to discover this, either automatically or via additional + `kwargs`. If there is ambiguity in the process, the plugin must clearly + document what happens in such cases and, if possible, describe how to + resolve this ambiguity. + + Parameters + ---------- + ndimage : ArrayLike + The ndimage to encode and write to the current ImageResource. + **kwargs : Any + The write method may accept any number of plugin-specific keyword + arguments to customize the writing behavior. Usually these match the + arguments available on the backend and are forwarded to it. + + Returns + ------- + encoded_image : bytes or None + If the chosen ImageResource is the special target ``""`` then + write should return a byte string containing the encoded image data. + Otherwise, it returns None. + + Notes + ----- + The ImageResource to which the plugin should write to is managed by the + provided request object. Directly accessing the managed ImageResource is + _not_ permitted. Instead, you can get FileLike access to the + ImageResource via request.get_file(). + + If the backend doesn't support writing to FileLike objects, you can + request a temporary file to pass to the backend via + ``request.get_local_filename()``. This is, however, not very performant + (involves copying the Request's content from a temporary file), so you + should avoid doing this whenever possible. Consider it a fallback method + in case all else fails. + + """ + raise NotImplementedError() + + def iter(self) -> Iterator[np.ndarray]: + """Iterate the ImageResource. + + This method returns a generator that yields ndimages in the order in which + they appear in the file. This is roughly equivalent to:: + + idx = 0 + while True: + try: + yield self.read(index=idx) + except ValueError: + break + + It works very similar to ``read``, and you can consult the documentation + of that method for additional information on desired behavior. + + Parameters + ---------- + **kwargs : Any + The iter method may accept any number of plugin-specific keyword + arguments to further customize the reading/iteration behavior. + Usually these match the arguments available on the backend and are + forwarded to it. + + Yields + ------ + ndimage : np.ndarray + A ndimage containing decoded pixel data (sometimes called bitmap). + + See Also + -------- + PluginV3.read + + """ + raise NotImplementedError() + + def properties(self, index: int = 0) -> ImageProperties: + """Standardized ndimage metadata. + + Parameters + ---------- + index : int + If the ImageResource contains multiple ndimages, and index is an + integer, select the index-th ndimage from among them and return its + properties. If index is an ellipsis (...), read all ndimages in the file + and stack them along a new batch dimension and return their properties. + If index is None, the plugin decides the default. + + Returns + ------- + properties : ImageProperties + A dataclass filled with standardized image metadata. + + """ + raise NotImplementedError() + + def metadata(self, index: int = 0, exclude_applied: bool = True) -> Dict[str, Any]: + """Format-Specific ndimage metadata. + + The method reads metadata stored in the ImageResource and returns it as + a python dict. The plugin is free to choose which name to give a piece + of metadata; however, if possible, it should match the name given by the + format. There is no requirement regarding the fields a plugin must + expose; however, if a plugin does expose any,``exclude_applied`` applies + to these fields. + + If the plugin does return metadata items, it must check the value of + ``exclude_applied`` before returning them. If ``exclude applied`` is + True, then any metadata item that would be applied to an ndimage + returned by ``read`` (or ``iter``) must not be returned. This is done to + avoid confusion; for example, if an ImageResource defines the ExIF + rotation tag, and the plugin applies the rotation to the data before + returning it, then ``exclude_applied`` prevents confusion on whether the + tag was already applied or not. + + The `kwarg` ``index`` behaves similar to its counterpart in ``read`` + with one exception: If the ``index`` is None, then global metadata is + returned instead of returning a combination of all metadata items. If + there is no global metadata, the Plugin should return an empty dict or + raise an exception. + + Parameters + ---------- + index : int + If the ImageResource contains multiple ndimages, and index is an + integer, select the index-th ndimage from among them and return its + metadata. If index is an ellipsis (...), return global metadata. If + index is None, the plugin decides the default. + exclude_applied : bool + If True (default), do not report metadata fields that the plugin + would apply/consume while reading the image. + + Returns + ------- + metadata : dict + A dictionary filled with format-specific metadata fields and their + values. + + """ + raise NotImplementedError() + + def close(self) -> None: + """Close the ImageResource. + + This method allows a plugin to behave similar to the python built-in ``open``:: + + image_file = my_plugin(Request, "r") + ... + image_file.close() + + It is used by the context manager and deconstructor below to avoid leaking + ImageResources. If the plugin has no other cleanup to do it doesn't have + to overwrite this method itself and can rely on the implementation + below. + + """ + + self.request.finish() + + @property + def request(self) -> Request: + return self._request + + def __enter__(self) -> "PluginV3": + return self + + def __exit__(self, type, value, traceback) -> None: + self.close() + + def __del__(self) -> None: + self.close() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..741415e955069d300bee8d8bc529ea0df742d700 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__init__.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +# flake8: noqa + +""" +Here you can find documentation on how to write your own plugin to allow +ImageIO to access a new backend. Plugins are quite object oriented, and +the relevant classes and their interaction are documented here: + +.. currentmodule:: imageio + +.. autosummary:: + :toctree: ../_autosummary + :template: better_class.rst + + imageio.core.Format + imageio.core.Request + +.. note:: + You can always check existing plugins if you want to see examples. + +What methods to implement +------------------------- + +To implement a new plugin, create a new class that inherits from +:class:`imageio.core.Format`. and implement the following functions: + +.. autosummary:: + :toctree: ../_autosummary + + imageio.core.Format.__init__ + imageio.core.Format._can_read + imageio.core.Format._can_write + +Further, each format contains up to two nested classes; one for reading and +one for writing. To support reading and/or writing, the respective classes +need to be defined. + +For reading, create a nested class that inherits from +``imageio.core.Format.Reader`` and that implements the following functions: + + * Implement ``_open(**kwargs)`` to initialize the reader. Deal with the + user-provided keyword arguments here. + * Implement ``_close()`` to clean up. + * Implement ``_get_length()`` to provide a suitable length based on what + the user expects. Can be ``inf`` for streaming data. + * Implement ``_get_data(index)`` to return an array and a meta-data dict. + * Implement ``_get_meta_data(index)`` to return a meta-data dict. If index + is None, it should return the 'global' meta-data. + +For writing, create a nested class that inherits from +``imageio.core.Format.Writer`` and implement the following functions: + + * Implement ``_open(**kwargs)`` to initialize the writer. Deal with the + user-provided keyword arguments here. + * Implement ``_close()`` to clean up. + * Implement ``_append_data(im, meta)`` to add data (and meta-data). + * Implement ``_set_meta_data(meta)`` to set the global meta-data. + +""" + +import importlib +import os +import warnings + + +# v2 imports remove in v3 +from .. import formats + +# v2 allows formatting plugins by environment variable +# this is done here. +env_plugin_order = os.getenv("IMAGEIO_FORMAT_ORDER", None) +if env_plugin_order is not None: # pragma: no cover + warnings.warn( + "Setting plugin priority through an environment variable is" + " deprecated and will be removed in ImageIO v3. There is no" + " replacement planned for this feature. If you have an" + " active use-case for it, please reach out to us on GitHub.", + DeprecationWarning, + ) + + formats.sort(*os.getenv("IMAGEIO_FORMAT_ORDER", "").split(",")) + + +# this class replaces plugin module. For details +# see https://stackoverflow.com/questions/2447353/getattr-on-a-module +def __getattr__(name): + """Lazy-Import Plugins + + This function dynamically loads plugins into the imageio.plugin + namespace upon first access. For example, the following snippet will + delay importing freeimage until the second line: + + >>> import imageio + >>> imageio.plugins.freeimage.download() + + """ + + try: + return importlib.import_module(f"imageio.plugins.{name}") + except ImportError: + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") from None diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bfcd03ede80569d611d39c2c679a7d20e1304f48 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/_bsdf.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/_bsdf.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..55b00cb924f49ab65a6c74ad839fac213b00bb3d Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/_bsdf.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/_dicom.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/_dicom.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4be2a9dd0af3d7ce6eaa0222baad3e38912a4345 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/_dicom.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/_freeimage.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/_freeimage.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e2c333137dbe510d85338346d24d4e768e1d6049 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/_freeimage.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/_swf.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/_swf.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c901171e74679426eb8207855071bceadc78bfad Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/_swf.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/bsdf.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/bsdf.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..683d006106e25bf2cf8cf0f91c4e3183d85465de Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/bsdf.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/dicom.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/dicom.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..13ac8b8b24157867eccbc8e2c44a82d44cc6777a Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/dicom.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/example.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/example.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bfebb2f3bedc147971e0d11859771f90a47645a7 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/example.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/feisem.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/feisem.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..79d50d98177e6c9203fce3a396547e97453a3354 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/feisem.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/ffmpeg.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/ffmpeg.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5a1d4e934d457102a283cbfc55f28e9f8f5bb31a Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/ffmpeg.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/fits.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/fits.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1745179af27b740618fcc9cd76191705d1d8102c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/fits.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/freeimage.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/freeimage.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..db4d0fa35decc9283cbe074037c62fc3139d385b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/freeimage.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/freeimagemulti.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/freeimagemulti.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1aa8265c22c1eee548bce9b7ce3e4bf0a6fecd4c Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/freeimagemulti.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/gdal.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/gdal.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d57f7c972772eb1f44c7b41964e13f833424b719 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/gdal.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/grab.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/grab.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5b50264d6dee707f3b8f6462d3d6d70050a03c4b Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/grab.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/lytro.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/lytro.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b5784c2e92112b6305747d5807f5a21e3054922 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/lytro.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/npz.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/npz.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b6402a48f36bd0be85c6002950699034ce8e6677 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/npz.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/opencv.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/opencv.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..45f660f0b2ce70b5042e8308d777d1876ab100d7 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/opencv.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/pillow.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/pillow.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..119b4674c62faa4aeb1f69763849c1ddeb25ea3f Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/pillow.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/pillow_info.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/pillow_info.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d0ab760e16b2665dc779c0387596e0dce5c3a21e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/pillow_info.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/pillow_legacy.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/pillow_legacy.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e48c4207a8010b23c1a4acd387a1ab32ab136b2f Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/pillow_legacy.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/pillowmulti.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/pillowmulti.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2dd73c2b121bec071c1605edb2ff156d37bdd9d9 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/pillowmulti.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/pyav.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/pyav.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a5a3b46e952d61dc526259bab27c5977041f7e78 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/pyav.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/rawpy.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/rawpy.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..74e4ff15c5344e7bed0c8d829cb7931f762d420e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/rawpy.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/simpleitk.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/simpleitk.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3885fc92457c22da9a5e1d288417a6e5a47acc81 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/simpleitk.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/spe.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/spe.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aaa687c114f99caadeb22847bb8774b3d6ea1a50 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/spe.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/swf.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/swf.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6b204ef9227bc3660aa8d9ff9d066e71d271a704 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/swf.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/tifffile.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/tifffile.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..809d39eaebf4ca161d57a9ffc669b802d37cd0dc Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/tifffile.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/tifffile_v3.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/tifffile_v3.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..71a33682ab99044580ca6f532c920a7ab237ddf0 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/__pycache__/tifffile_v3.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/_bsdf.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/_bsdf.py new file mode 100644 index 0000000000000000000000000000000000000000..d6f56ce0dc471b0c13bc4e10f0ce9a26b7303bce --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/_bsdf.py @@ -0,0 +1,915 @@ +#!/usr/bin/env python +# This file is distributed under the terms of the 2-clause BSD License. +# Copyright (c) 2017-2018, Almar Klein + +""" +Python implementation of the Binary Structured Data Format (BSDF). + +BSDF is a binary format for serializing structured (scientific) data. +See http://bsdf.io for more information. + +This is the reference implementation, which is relatively relatively +sophisticated, providing e.g. lazy loading of blobs and streamed +reading/writing. A simpler Python implementation is available as +``bsdf_lite.py``. + +This module has no dependencies and works on Python 2.7 and 3.4+. + +Note: on Legacy Python (Python 2.7), non-Unicode strings are encoded as bytes. +""" + +# todo: in 2020, remove six stuff, __future__ and _isidentifier +# todo: in 2020, remove 'utf-8' args to encode/decode; it's faster + +from __future__ import absolute_import, division, print_function + +import bz2 +import hashlib +import logging +import os +import re +import struct +import sys +import types +import zlib +from io import BytesIO + +logger = logging.getLogger(__name__) + +# Notes on versioning: the major and minor numbers correspond to the +# BSDF format version. The major number if increased when backward +# incompatible changes are introduced. An implementation must raise an +# exception when the file being read has a higher major version. The +# minor number is increased when new backward compatible features are +# introduced. An implementation must display a warning when the file +# being read has a higher minor version. The patch version is increased +# for subsequent releases of the implementation. +VERSION = 2, 1, 2 +__version__ = ".".join(str(i) for i in VERSION) + + +# %% The encoder and decoder implementation + +# From six.py +PY3 = sys.version_info[0] >= 3 +if PY3: + text_type = str + string_types = str + unicode_types = str + integer_types = int + classtypes = type +else: # pragma: no cover + logging.basicConfig() # avoid "no handlers found" error + text_type = unicode # noqa + string_types = basestring # noqa + unicode_types = unicode # noqa + integer_types = (int, long) # noqa + classtypes = type, types.ClassType + +# Shorthands +spack = struct.pack +strunpack = struct.unpack + + +def lencode(x): + """Encode an unsigned integer into a variable sized blob of bytes.""" + # We could support 16 bit and 32 bit as well, but the gain is low, since + # 9 bytes for collections with over 250 elements is marginal anyway. + if x <= 250: + return spack(" extension + self._extensions_by_cls = {} # cls -> (name, extension.encode) + if extensions is None: + extensions = standard_extensions + for extension in extensions: + self.add_extension(extension) + self._parse_options(**options) + + def _parse_options( + self, + compression=0, + use_checksum=False, + float64=True, + load_streaming=False, + lazy_blob=False, + ): + # Validate compression + if isinstance(compression, string_types): + m = {"no": 0, "zlib": 1, "bz2": 2} + compression = m.get(compression.lower(), compression) + if compression not in (0, 1, 2): + raise TypeError("Compression must be 0, 1, 2, " '"no", "zlib", or "bz2"') + self._compression = compression + + # Other encoding args + self._use_checksum = bool(use_checksum) + self._float64 = bool(float64) + + # Decoding args + self._load_streaming = bool(load_streaming) + self._lazy_blob = bool(lazy_blob) + + def add_extension(self, extension_class): + """Add an extension to this serializer instance, which must be + a subclass of Extension. Can be used as a decorator. + """ + # Check class + if not ( + isinstance(extension_class, type) and issubclass(extension_class, Extension) + ): + raise TypeError("add_extension() expects a Extension class.") + extension = extension_class() + + # Get name + name = extension.name + if not isinstance(name, str): + raise TypeError("Extension name must be str.") + if len(name) == 0 or len(name) > 250: + raise NameError( + "Extension names must be nonempty and shorter " "than 251 chars." + ) + if name in self._extensions: + logger.warning( + 'BSDF warning: overwriting extension "%s", ' + "consider removing first" % name + ) + + # Get classes + cls = extension.cls + if not cls: + clss = [] + elif isinstance(cls, (tuple, list)): + clss = cls + else: + clss = [cls] + for cls in clss: + if not isinstance(cls, classtypes): + raise TypeError("Extension classes must be types.") + + # Store + for cls in clss: + self._extensions_by_cls[cls] = name, extension.encode + self._extensions[name] = extension + return extension_class + + def remove_extension(self, name): + """Remove a converted by its unique name.""" + if not isinstance(name, str): + raise TypeError("Extension name must be str.") + if name in self._extensions: + self._extensions.pop(name) + for cls in list(self._extensions_by_cls.keys()): + if self._extensions_by_cls[cls][0] == name: + self._extensions_by_cls.pop(cls) + + def _encode(self, f, value, streams, ext_id): + """Main encoder function.""" + x = encode_type_id + + if value is None: + f.write(x(b"v", ext_id)) # V for void + elif value is True: + f.write(x(b"y", ext_id)) # Y for yes + elif value is False: + f.write(x(b"n", ext_id)) # N for no + elif isinstance(value, integer_types): + if -32768 <= value <= 32767: + f.write(x(b"h", ext_id) + spack("h", value)) # H for ... + else: + f.write(x(b"i", ext_id) + spack(" 0: + raise ValueError("Can only have one stream per file.") + streams.append(value) + value._activate(f, self._encode, self._decode) # noqa + else: + if ext_id is not None: + raise ValueError( + "Extension %s wronfully encodes object to another " + "extension object (though it may encode to a list/dict " + "that contains other extension objects)." % ext_id + ) + # Try if the value is of a type we know + ex = self._extensions_by_cls.get(value.__class__, None) + # Maybe its a subclass of a type we know + if ex is None: + for name, c in self._extensions.items(): + if c.match(self, value): + ex = name, c.encode + break + else: + ex = None + # Success or fail + if ex is not None: + ext_id2, extension_encode = ex + self._encode(f, extension_encode(self, value), streams, ext_id2) + else: + t = ( + "Class %r is not a valid base BSDF type, nor is it " + "handled by an extension." + ) + raise TypeError(t % value.__class__.__name__) + + def _decode(self, f): + """Main decoder function.""" + + # Get value + char = f.read(1) + c = char.lower() + + # Conversion (uppercase value identifiers signify converted values) + if not char: + raise EOFError() + elif char != c: + n = strunpack("= 254: + # Streaming + closed = n == 254 + n = strunpack(" 0 + name = f.read(n_name).decode("UTF-8") + value[name] = self._decode(f) + elif c == b"b": + if self._lazy_blob: + value = Blob((f, True)) + else: + blob = Blob((f, False)) + value = blob.get_bytes() + else: + raise RuntimeError("Parse error %r" % char) + + # Convert value if we have an extension for it + if ext_id is not None: + extension = self._extensions.get(ext_id, None) + if extension is not None: + value = extension.decode(self, value) + else: + logger.warning("BSDF warning: no extension found for %r" % ext_id) + + return value + + def encode(self, ob): + """Save the given object to bytes.""" + f = BytesIO() + self.save(f, ob) + return f.getvalue() + + def save(self, f, ob): + """Write the given object to the given file object.""" + f.write(b"BSDF") + f.write(struct.pack(" 0: + stream = streams[0] + if stream._start_pos != f.tell(): + raise ValueError( + "The stream object must be " "the last object to be encoded." + ) + + def decode(self, bb): + """Load the data structure that is BSDF-encoded in the given bytes.""" + f = BytesIO(bb) + return self.load(f) + + def load(self, f): + """Load a BSDF-encoded object from the given file object.""" + # Check magic string + f4 = f.read(4) + if f4 != b"BSDF": + raise RuntimeError("This does not look like a BSDF file: %r" % f4) + # Check version + major_version = strunpack(" VERSION[1]: # minor should be < ours + t = ( + "BSDF warning: reading file with higher minor version (%s) " + "than the implementation (%s)." + ) + logger.warning(t % (__version__, file_version)) + + return self._decode(f) + + +# %% Streaming and blob-files + + +class BaseStream(object): + """Base class for streams.""" + + def __init__(self, mode="w"): + self._i = 0 + self._count = -1 + if isinstance(mode, int): + self._count = mode + mode = "r" + elif mode == "w": + self._count = 0 + assert mode in ("r", "w") + self._mode = mode + self._f = None + self._start_pos = 0 + + def _activate(self, file, encode_func, decode_func): + if self._f is not None: # Associated with another write + raise IOError("Stream object cannot be activated twice?") + self._f = file + self._start_pos = self._f.tell() + self._encode = encode_func + self._decode = decode_func + + @property + def mode(self): + """The mode of this stream: 'r' or 'w'.""" + return self._mode + + +class ListStream(BaseStream): + """A streamable list object used for writing or reading. + In read mode, it can also be iterated over. + """ + + @property + def count(self): + """The number of elements in the stream (can be -1 for unclosed + streams in read-mode). + """ + return self._count + + @property + def index(self): + """The current index of the element to read/write.""" + return self._i + + def append(self, item): + """Append an item to the streaming list. The object is immediately + serialized and written to the underlying file. + """ + # if self._mode != 'w': + # raise IOError('This ListStream is not in write mode.') + if self._count != self._i: + raise IOError("Can only append items to the end of the stream.") + if self._f is None: + raise IOError("List stream is not associated with a file yet.") + if self._f.closed: + raise IOError("Cannot stream to a close file.") + self._encode(self._f, item, [self], None) + self._i += 1 + self._count += 1 + + def close(self, unstream=False): + """Close the stream, marking the number of written elements. New + elements may still be appended, but they won't be read during decoding. + If ``unstream`` is False, the stream is turned into a regular list + (not streaming). + """ + # if self._mode != 'w': + # raise IOError('This ListStream is not in write mode.') + if self._count != self._i: + raise IOError("Can only close when at the end of the stream.") + if self._f is None: + raise IOError("ListStream is not associated with a file yet.") + if self._f.closed: + raise IOError("Cannot close a stream on a close file.") + i = self._f.tell() + self._f.seek(self._start_pos - 8 - 1) + self._f.write(spack("= 0: + if self._i >= self._count: + raise StopIteration() + self._i += 1 + return self._decode(self._f) + else: + # This raises EOFError at some point. + try: + res = self._decode(self._f) + self._i += 1 + return res + except EOFError: + self._count = self._i + raise StopIteration() + + def __iter__(self): + if self._mode != "r": + raise IOError("Cannot iterate: ListStream in not in read mode.") + return self + + def __next__(self): + return self.next() + + +class Blob(object): + """Object to represent a blob of bytes. When used to write a BSDF file, + it's a wrapper for bytes plus properties such as what compression to apply. + When used to read a BSDF file, it can be used to read the data lazily, and + also modify the data if reading in 'r+' mode and the blob isn't compressed. + """ + + # For now, this does not allow re-sizing blobs (within the allocated size) + # but this can be added later. + + def __init__(self, bb, compression=0, extra_size=0, use_checksum=False): + if isinstance(bb, bytes): + self._f = None + self.compressed = self._from_bytes(bb, compression) + self.compression = compression + self.allocated_size = self.used_size + extra_size + self.use_checksum = use_checksum + elif isinstance(bb, tuple) and len(bb) == 2 and hasattr(bb[0], "read"): + self._f, allow_seek = bb + self.compressed = None + self._from_file(self._f, allow_seek) + self._modified = False + else: + raise TypeError("Wrong argument to create Blob.") + + def _from_bytes(self, value, compression): + """When used to wrap bytes in a blob.""" + if compression == 0: + compressed = value + elif compression == 1: + compressed = zlib.compress(value, 9) + elif compression == 2: + compressed = bz2.compress(value, 9) + else: # pragma: no cover + assert False, "Unknown compression identifier" + + self.data_size = len(value) + self.used_size = len(compressed) + return compressed + + def _to_file(self, f): + """Private friend method called by encoder to write a blob to a file.""" + # Write sizes - write at least in a size that allows resizing + if self.allocated_size <= 250 and self.compression == 0: + f.write(spack(" self.allocated_size: + raise IOError("Seek beyond blob boundaries.") + self._f.seek(self.start_pos + p) + + def tell(self): + """Get the current file pointer position (relative to the blob start).""" + if self._f is None: + raise RuntimeError( + "Cannot tell in a blob " "that is not created by the BSDF decoder." + ) + return self._f.tell() - self.start_pos + + def write(self, bb): + """Write bytes to the blob.""" + if self._f is None: + raise RuntimeError( + "Cannot write in a blob " "that is not created by the BSDF decoder." + ) + if self.compression: + raise IOError("Cannot arbitrarily write in compressed blob.") + if self._f.tell() + len(bb) > self.end_pos: + raise IOError("Write beyond blob boundaries.") + self._modified = True + return self._f.write(bb) + + def read(self, n): + """Read n bytes from the blob.""" + if self._f is None: + raise RuntimeError( + "Cannot read in a blob " "that is not created by the BSDF decoder." + ) + if self.compression: + raise IOError("Cannot arbitrarily read in compressed blob.") + if self._f.tell() + n > self.end_pos: + raise IOError("Read beyond blob boundaries.") + return self._f.read(n) + + def get_bytes(self): + """Get the contents of the blob as bytes.""" + if self.compressed is not None: + compressed = self.compressed + else: + i = self._f.tell() + self.seek(0) + compressed = self._f.read(self.used_size) + self._f.seek(i) + if self.compression == 0: + value = compressed + elif self.compression == 1: + value = zlib.decompress(compressed) + elif self.compression == 2: + value = bz2.decompress(compressed) + else: # pragma: no cover + raise RuntimeError("Invalid compression %i" % self.compression) + return value + + def update_checksum(self): + """Reset the blob's checksum if present. Call this after modifying + the data. + """ + # or ... should the presence of a checksum mean that data is proteced? + if self.use_checksum and self._modified: + self.seek(0) + compressed = self._f.read(self.used_size) + self._f.seek(self.start_pos - self.alignment - 1 - 16) + self._f.write(hashlib.md5(compressed).digest()) + + +# %% High-level functions + + +def encode(ob, extensions=None, **options): + """Save (BSDF-encode) the given object to bytes. + See `BSDFSerializer` for details on extensions and options. + """ + s = BsdfSerializer(extensions, **options) + return s.encode(ob) + + +def save(f, ob, extensions=None, **options): + """Save (BSDF-encode) the given object to the given filename or + file object. See` BSDFSerializer` for details on extensions and options. + """ + s = BsdfSerializer(extensions, **options) + if isinstance(f, string_types): + with open(f, "wb") as fp: + return s.save(fp, ob) + else: + return s.save(f, ob) + + +def decode(bb, extensions=None, **options): + """Load a (BSDF-encoded) structure from bytes. + See `BSDFSerializer` for details on extensions and options. + """ + s = BsdfSerializer(extensions, **options) + return s.decode(bb) + + +def load(f, extensions=None, **options): + """Load a (BSDF-encoded) structure from the given filename or file object. + See `BSDFSerializer` for details on extensions and options. + """ + s = BsdfSerializer(extensions, **options) + if isinstance(f, string_types): + if f.startswith(("~/", "~\\")): # pragma: no cover + f = os.path.expanduser(f) + with open(f, "rb") as fp: + return s.load(fp) + else: + return s.load(f) + + +# Aliases for json compat +loads = decode +dumps = encode + + +# %% Standard extensions + +# Defining extensions as a dict would be more compact and feel lighter, but +# that would only allow lambdas, which is too limiting, e.g. for ndarray +# extension. + + +class Extension(object): + """Base class to implement BSDF extensions for special data types. + + Extension classes are provided to the BSDF serializer, which + instantiates the class. That way, the extension can be somewhat dynamic: + e.g. the NDArrayExtension exposes the ndarray class only when numpy + is imported. + + A extension instance must have two attributes. These can be attributes of + the class, or of the instance set in ``__init__()``: + + * name (str): the name by which encoded values will be identified. + * cls (type): the type (or list of types) to match values with. + This is optional, but it makes the encoder select extensions faster. + + Further, it needs 3 methods: + + * `match(serializer, value) -> bool`: return whether the extension can + convert the given value. The default is ``isinstance(value, self.cls)``. + * `encode(serializer, value) -> encoded_value`: the function to encode a + value to more basic data types. + * `decode(serializer, encoded_value) -> value`: the function to decode an + encoded value back to its intended representation. + + """ + + name = "" + cls = () + + def __repr__(self): + return "" % (self.name, hex(id(self))) + + def match(self, s, v): + return isinstance(v, self.cls) + + def encode(self, s, v): + raise NotImplementedError() + + def decode(self, s, v): + raise NotImplementedError() + + +class ComplexExtension(Extension): + name = "c" + cls = complex + + def encode(self, s, v): + return (v.real, v.imag) + + def decode(self, s, v): + return complex(v[0], v[1]) + + +class NDArrayExtension(Extension): + name = "ndarray" + + def __init__(self): + if "numpy" in sys.modules: + import numpy as np + + self.cls = np.ndarray + + def match(self, s, v): # pragma: no cover - e.g. work for nd arrays in JS + return hasattr(v, "shape") and hasattr(v, "dtype") and hasattr(v, "tobytes") + + def encode(self, s, v): + return dict(shape=v.shape, dtype=text_type(v.dtype), data=v.tobytes()) + + def decode(self, s, v): + try: + import numpy as np + except ImportError: # pragma: no cover + return v + a = np.frombuffer(v["data"], dtype=v["dtype"]) + a.shape = v["shape"] + return a + + +standard_extensions = [ComplexExtension, NDArrayExtension] + + +if __name__ == "__main__": + # Invoke CLI + import bsdf_cli + + bsdf_cli.main() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/_dicom.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/_dicom.py new file mode 100644 index 0000000000000000000000000000000000000000..183c9f7b4877975d509aff775fe6d28224d8b523 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/_dicom.py @@ -0,0 +1,931 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +"""Plugin for reading DICOM files.""" + +# todo: Use pydicom: +# * Note: is not py3k ready yet +# * Allow reading the full meta info +# I think we can more or less replace the SimpleDicomReader with a +# pydicom.Dataset For series, only ned to read the full info from one +# file: speed still high +# * Perhaps allow writing? + +import sys +import os +import struct +import logging + +import numpy as np + + +logger = logging.getLogger(__name__) + +# Determine endianity of system +sys_is_little_endian = sys.byteorder == "little" + +# Define a dictionary that contains the tags that we would like to know +MINIDICT = { + (0x7FE0, 0x0010): ("PixelData", "OB"), + # Date and time + (0x0008, 0x0020): ("StudyDate", "DA"), + (0x0008, 0x0021): ("SeriesDate", "DA"), + (0x0008, 0x0022): ("AcquisitionDate", "DA"), + (0x0008, 0x0023): ("ContentDate", "DA"), + (0x0008, 0x0030): ("StudyTime", "TM"), + (0x0008, 0x0031): ("SeriesTime", "TM"), + (0x0008, 0x0032): ("AcquisitionTime", "TM"), + (0x0008, 0x0033): ("ContentTime", "TM"), + # With what, where, by whom? + (0x0008, 0x0060): ("Modality", "CS"), + (0x0008, 0x0070): ("Manufacturer", "LO"), + (0x0008, 0x0080): ("InstitutionName", "LO"), + # Descriptions + (0x0008, 0x1030): ("StudyDescription", "LO"), + (0x0008, 0x103E): ("SeriesDescription", "LO"), + # UID's + (0x0008, 0x0016): ("SOPClassUID", "UI"), + (0x0008, 0x0018): ("SOPInstanceUID", "UI"), + (0x0020, 0x000D): ("StudyInstanceUID", "UI"), + (0x0020, 0x000E): ("SeriesInstanceUID", "UI"), + (0x0008, 0x0117): ("ContextUID", "UI"), + # Numbers + (0x0020, 0x0011): ("SeriesNumber", "IS"), + (0x0020, 0x0012): ("AcquisitionNumber", "IS"), + (0x0020, 0x0013): ("InstanceNumber", "IS"), + (0x0020, 0x0014): ("IsotopeNumber", "IS"), + (0x0020, 0x0015): ("PhaseNumber", "IS"), + (0x0020, 0x0016): ("IntervalNumber", "IS"), + (0x0020, 0x0017): ("TimeSlotNumber", "IS"), + (0x0020, 0x0018): ("AngleNumber", "IS"), + (0x0020, 0x0019): ("ItemNumber", "IS"), + (0x0020, 0x0020): ("PatientOrientation", "CS"), + (0x0020, 0x0030): ("ImagePosition", "CS"), + (0x0020, 0x0032): ("ImagePositionPatient", "CS"), + (0x0020, 0x0035): ("ImageOrientation", "CS"), + (0x0020, 0x0037): ("ImageOrientationPatient", "CS"), + # Patient information + (0x0010, 0x0010): ("PatientName", "PN"), + (0x0010, 0x0020): ("PatientID", "LO"), + (0x0010, 0x0030): ("PatientBirthDate", "DA"), + (0x0010, 0x0040): ("PatientSex", "CS"), + (0x0010, 0x1010): ("PatientAge", "AS"), + (0x0010, 0x1020): ("PatientSize", "DS"), + (0x0010, 0x1030): ("PatientWeight", "DS"), + # Image specific (required to construct numpy array) + (0x0028, 0x0002): ("SamplesPerPixel", "US"), + (0x0028, 0x0008): ("NumberOfFrames", "IS"), + (0x0028, 0x0100): ("BitsAllocated", "US"), + (0x0028, 0x0101): ("BitsStored", "US"), + (0x0028, 0x0102): ("HighBit", "US"), + (0x0028, 0x0103): ("PixelRepresentation", "US"), + (0x0028, 0x0010): ("Rows", "US"), + (0x0028, 0x0011): ("Columns", "US"), + (0x0028, 0x1052): ("RescaleIntercept", "DS"), + (0x0028, 0x1053): ("RescaleSlope", "DS"), + # Image specific (for the user) + (0x0028, 0x0030): ("PixelSpacing", "DS"), + (0x0018, 0x0088): ("SliceSpacing", "DS"), +} + +# Define some special tags: +# See PS 3.5-2008 section 7.5 (p.40) +ItemTag = (0xFFFE, 0xE000) # start of Sequence Item +ItemDelimiterTag = (0xFFFE, 0xE00D) # end of Sequence Item +SequenceDelimiterTag = (0xFFFE, 0xE0DD) # end of Sequence of undefined length + +# Define set of groups that we're interested in (so we can quickly skip others) +GROUPS = set([key[0] for key in MINIDICT.keys()]) +VRS = set([val[1] for val in MINIDICT.values()]) + + +class NotADicomFile(Exception): + pass + + +class CompressedDicom(RuntimeError): + pass + + +class SimpleDicomReader(object): + """ + This class provides reading of pixel data from DICOM files. It is + focussed on getting the pixel data, not the meta info. + + To use, first create an instance of this class (giving it + a file object or filename). Next use the info attribute to + get a dict of the meta data. The loading of pixel data is + deferred until get_numpy_array() is called. + + Comparison with Pydicom + ----------------------- + + This code focusses on getting the pixel data out, which allows some + shortcuts, resulting in the code being much smaller. + + Since the processing of data elements is much cheaper (it skips a lot + of tags), this code is about 3x faster than pydicom (except for the + deflated DICOM files). + + This class does borrow some code (and ideas) from the pydicom + project, and (to the best of our knowledge) has the same limitations + as pydicom with regard to the type of files that it can handle. + + Limitations + ----------- + + For more advanced DICOM processing, please check out pydicom. + + * Only a predefined subset of data elements (meta information) is read. + * This is a reader; it can not write DICOM files. + * (just like pydicom) it can handle none of the compressed DICOM + formats except for "Deflated Explicit VR Little Endian" + (1.2.840.10008.1.2.1.99). + + """ + + def __init__(self, file): + # Open file if filename given + if isinstance(file, str): + self._filename = file + self._file = open(file, "rb") + else: + self._filename = "" + self._file = file + # Init variable to store position and size of pixel data + self._pixel_data_loc = None + # The meta header is always explicit and little endian + self.is_implicit_VR = False + self.is_little_endian = True + self._unpackPrefix = "<" + # Dict to store data elements of interest in + self._info = {} + # VR Conversion + self._converters = { + # Numbers + "US": lambda x: self._unpack("H", x), + "UL": lambda x: self._unpack("L", x), + # Numbers encoded as strings + "DS": lambda x: self._splitValues(x, float, "\\"), + "IS": lambda x: self._splitValues(x, int, "\\"), + # strings + "AS": lambda x: x.decode("ascii", "ignore").strip("\x00"), + "DA": lambda x: x.decode("ascii", "ignore").strip("\x00"), + "TM": lambda x: x.decode("ascii", "ignore").strip("\x00"), + "UI": lambda x: x.decode("ascii", "ignore").strip("\x00"), + "LO": lambda x: x.decode("utf-8", "ignore").strip("\x00").rstrip(), + "CS": lambda x: self._splitValues(x, float, "\\"), + "PN": lambda x: x.decode("utf-8", "ignore").strip("\x00").rstrip(), + } + + # Initiate reading + self._read() + + @property + def info(self): + return self._info + + def _splitValues(self, x, type, splitter): + s = x.decode("ascii").strip("\x00") + try: + if splitter in s: + return tuple([type(v) for v in s.split(splitter) if v.strip()]) + else: + return type(s) + except ValueError: + return s + + def _unpack(self, fmt, value): + return struct.unpack(self._unpackPrefix + fmt, value)[0] + + # Really only so we need minimal changes to _pixel_data_numpy + def __iter__(self): + return iter(self._info.keys()) + + def __getattr__(self, key): + info = object.__getattribute__(self, "_info") + if key in info: + return info[key] + return object.__getattribute__(self, key) # pragma: no cover + + def _read(self): + f = self._file + # Check prefix after peamble + f.seek(128) + if f.read(4) != b"DICM": + raise NotADicomFile("Not a valid DICOM file.") + # Read + self._read_header() + self._read_data_elements() + self._get_shape_and_sampling() + # Close if done, reopen if necessary to read pixel data + if os.path.isfile(self._filename): + self._file.close() + self._file = None + + def _readDataElement(self): + f = self._file + # Get group and element + group = self._unpack("H", f.read(2)) + element = self._unpack("H", f.read(2)) + # Get value length + if self.is_implicit_VR: + vl = self._unpack("I", f.read(4)) + else: + vr = f.read(2) + if vr in (b"OB", b"OW", b"SQ", b"UN"): + reserved = f.read(2) # noqa + vl = self._unpack("I", f.read(4)) + else: + vl = self._unpack("H", f.read(2)) + # Get value + if group == 0x7FE0 and element == 0x0010: + here = f.tell() + self._pixel_data_loc = here, vl + f.seek(here + vl) + return group, element, b"Deferred loading of pixel data" + else: + if vl == 0xFFFFFFFF: + value = self._read_undefined_length_value() + else: + value = f.read(vl) + return group, element, value + + def _read_undefined_length_value(self, read_size=128): + """Copied (in compacted form) from PyDicom + Copyright Darcy Mason. + """ + fp = self._file + # data_start = fp.tell() + search_rewind = 3 + bytes_to_find = struct.pack( + self._unpackPrefix + "HH", SequenceDelimiterTag[0], SequenceDelimiterTag[1] + ) + + found = False + value_chunks = [] + while not found: + chunk_start = fp.tell() + bytes_read = fp.read(read_size) + if len(bytes_read) < read_size: + # try again, + # if still don't get required amount, this is last block + new_bytes = fp.read(read_size - len(bytes_read)) + bytes_read += new_bytes + if len(bytes_read) < read_size: + raise EOFError( + "End of file reached before sequence " "delimiter found." + ) + index = bytes_read.find(bytes_to_find) + if index != -1: + found = True + value_chunks.append(bytes_read[:index]) + fp.seek(chunk_start + index + 4) # rewind to end of delimiter + length = fp.read(4) + if length != b"\0\0\0\0": + logger.warning( + "Expected 4 zero bytes after undefined length " "delimiter" + ) + else: + fp.seek(fp.tell() - search_rewind) # rewind a bit + # accumulate the bytes read (not including the rewind) + value_chunks.append(bytes_read[:-search_rewind]) + + # if get here then have found the byte string + return b"".join(value_chunks) + + def _read_header(self): + f = self._file + TransferSyntaxUID = None + + # Read all elements, store transferSyntax when we encounter it + try: + while True: + fp_save = f.tell() + # Get element + group, element, value = self._readDataElement() + if group == 0x02: + if group == 0x02 and element == 0x10: + TransferSyntaxUID = value.decode("ascii").strip("\x00") + else: + # No more group 2: rewind and break + # (don't trust group length) + f.seek(fp_save) + break + except (EOFError, struct.error): # pragma: no cover + raise RuntimeError("End of file reached while still in header.") + + # Handle transfer syntax + self._info["TransferSyntaxUID"] = TransferSyntaxUID + # + if TransferSyntaxUID is None: + # Assume ExplicitVRLittleEndian + is_implicit_VR, is_little_endian = False, True + elif TransferSyntaxUID == "1.2.840.10008.1.2.1": + # ExplicitVRLittleEndian + is_implicit_VR, is_little_endian = False, True + elif TransferSyntaxUID == "1.2.840.10008.1.2.2": + # ExplicitVRBigEndian + is_implicit_VR, is_little_endian = False, False + elif TransferSyntaxUID == "1.2.840.10008.1.2": + # implicit VR little endian + is_implicit_VR, is_little_endian = True, True + elif TransferSyntaxUID == "1.2.840.10008.1.2.1.99": + # DeflatedExplicitVRLittleEndian: + is_implicit_VR, is_little_endian = False, True + self._inflate() + else: + # http://www.dicomlibrary.com/dicom/transfer-syntax/ + t, extra_info = TransferSyntaxUID, "" + if "1.2.840.10008.1.2.4.50" <= t < "1.2.840.10008.1.2.4.99": + extra_info = " (JPEG)" + if "1.2.840.10008.1.2.4.90" <= t < "1.2.840.10008.1.2.4.99": + extra_info = " (JPEG 2000)" + if t == "1.2.840.10008.1.2.5": + extra_info = " (RLE)" + if t == "1.2.840.10008.1.2.6.1": + extra_info = " (RFC 2557)" + raise CompressedDicom( + "The dicom reader can only read files with " + "uncompressed image data - not %r%s. You " + "can try using dcmtk or gdcm to convert the " + "image." % (t, extra_info) + ) + + # From hereon, use implicit/explicit big/little endian + self.is_implicit_VR = is_implicit_VR + self.is_little_endian = is_little_endian + self._unpackPrefix = "><"[is_little_endian] + + def _read_data_elements(self): + info = self._info + try: + while True: + # Get element + group, element, value = self._readDataElement() + # Is it a group we are interested in? + if group in GROUPS: + key = (group, element) + name, vr = MINIDICT.get(key, (None, None)) + # Is it an element we are interested in? + if name: + # Store value + converter = self._converters.get(vr, lambda x: x) + info[name] = converter(value) + except (EOFError, struct.error): + pass # end of file ... + + def get_numpy_array(self): + """Get numpy arra for this DICOM file, with the correct shape, + and pixel values scaled appropriately. + """ + # Is there pixel data at all? + if "PixelData" not in self: + raise TypeError("No pixel data found in this dataset.") + + # Load it now if it was not already loaded + if self._pixel_data_loc and len(self.PixelData) < 100: + # Reopen file? + close_file = False + if self._file is None: + close_file = True + self._file = open(self._filename, "rb") + # Read data + self._file.seek(self._pixel_data_loc[0]) + if self._pixel_data_loc[1] == 0xFFFFFFFF: + value = self._read_undefined_length_value() + else: + value = self._file.read(self._pixel_data_loc[1]) + # Close file + if close_file: + self._file.close() + self._file = None + # Overwrite + self._info["PixelData"] = value + + # Get data + data = self._pixel_data_numpy() + data = self._apply_slope_and_offset(data) + + # Remove data again to preserve memory + # Note that the data for the original file is loaded twice ... + self._info["PixelData"] = ( + b"Data converted to numpy array, " + b"raw data removed to preserve memory" + ) + return data + + def _get_shape_and_sampling(self): + """Get shape and sampling without actuall using the pixel data. + In this way, the user can get an idea what's inside without having + to load it. + """ + # Get shape (in the same way that pydicom does) + if "NumberOfFrames" in self and self.NumberOfFrames > 1: + if self.SamplesPerPixel > 1: + shape = ( + self.SamplesPerPixel, + self.NumberOfFrames, + self.Rows, + self.Columns, + ) + else: + shape = self.NumberOfFrames, self.Rows, self.Columns + elif "SamplesPerPixel" in self: + if self.SamplesPerPixel > 1: + if self.BitsAllocated == 8: + shape = self.SamplesPerPixel, self.Rows, self.Columns + else: + raise NotImplementedError( + "DICOM plugin only handles " + "SamplesPerPixel > 1 if Bits " + "Allocated = 8" + ) + else: + shape = self.Rows, self.Columns + else: + raise RuntimeError( + "DICOM file has no SamplesPerPixel " "(perhaps this is a report?)" + ) + + # Try getting sampling between pixels + if "PixelSpacing" in self: + sampling = float(self.PixelSpacing[0]), float(self.PixelSpacing[1]) + else: + sampling = 1.0, 1.0 + if "SliceSpacing" in self: + sampling = (abs(self.SliceSpacing),) + sampling + + # Ensure that sampling has as many elements as shape + sampling = (1.0,) * (len(shape) - len(sampling)) + sampling[-len(shape) :] + + # Set shape and sampling + self._info["shape"] = shape + self._info["sampling"] = sampling + + def _pixel_data_numpy(self): + """Return a NumPy array of the pixel data.""" + # Taken from pydicom + # Copyright (c) 2008-2012 Darcy Mason + + if "PixelData" not in self: + raise TypeError("No pixel data found in this dataset.") + + # determine the type used for the array + need_byteswap = self.is_little_endian != sys_is_little_endian + + # Make NumPy format code, e.g. "uint16", "int32" etc + # from two pieces of info: + # self.PixelRepresentation -- 0 for unsigned, 1 for signed; + # self.BitsAllocated -- 8, 16, or 32 + format_str = "%sint%d" % ( + ("u", "")[self.PixelRepresentation], + self.BitsAllocated, + ) + try: + numpy_format = np.dtype(format_str) + except TypeError: # pragma: no cover + raise TypeError( + "Data type not understood by NumPy: format='%s', " + " PixelRepresentation=%d, BitsAllocated=%d" + % (numpy_format, self.PixelRepresentation, self.BitsAllocated) + ) + + # Have correct Numpy format, so create the NumPy array + arr = np.frombuffer(self.PixelData, numpy_format).copy() + + # XXX byte swap - may later handle this in read_file!!? + if need_byteswap: + arr.byteswap(True) # True means swap in-place, don't make new copy + + # Note the following reshape operations return a new *view* onto arr, + # but don't copy the data + arr = arr.reshape(*self._info["shape"]) + return arr + + def _apply_slope_and_offset(self, data): + """ + If RescaleSlope and RescaleIntercept are present in the data, + apply them. The data type of the data is changed if necessary. + """ + # Obtain slope and offset + slope, offset = 1, 0 + needFloats, needApplySlopeOffset = False, False + if "RescaleSlope" in self: + needApplySlopeOffset = True + slope = self.RescaleSlope + if "RescaleIntercept" in self: + needApplySlopeOffset = True + offset = self.RescaleIntercept + if int(slope) != slope or int(offset) != offset: + needFloats = True + if not needFloats: + slope, offset = int(slope), int(offset) + + # Apply slope and offset + if needApplySlopeOffset: + # Maybe we need to change the datatype? + if data.dtype in [np.float32, np.float64]: + pass + elif needFloats: + data = data.astype(np.float32) + else: + # Determine required range + minReq, maxReq = data.min().item(), data.max().item() + minReq = min([minReq, minReq * slope + offset, maxReq * slope + offset]) + maxReq = max([maxReq, minReq * slope + offset, maxReq * slope + offset]) + + # Determine required datatype from that + dtype = None + if minReq < 0: + # Signed integer type + maxReq = max([-minReq, maxReq]) + if maxReq < 2**7: + dtype = np.int8 + elif maxReq < 2**15: + dtype = np.int16 + elif maxReq < 2**31: + dtype = np.int32 + else: + dtype = np.float32 + else: + # Unsigned integer type + if maxReq < 2**8: + dtype = np.int8 + elif maxReq < 2**16: + dtype = np.int16 + elif maxReq < 2**32: + dtype = np.int32 + else: + dtype = np.float32 + # Change datatype + if dtype != data.dtype: + data = data.astype(dtype) + + # Apply slope and offset + data *= slope + data += offset + + # Done + return data + + def _inflate(self): + # Taken from pydicom + # Copyright (c) 2008-2012 Darcy Mason + import zlib + from io import BytesIO + + # See PS3.6-2008 A.5 (p 71) -- when written, the entire dataset + # following the file metadata was prepared the normal way, + # then "deflate" compression applied. + # All that is needed here is to decompress and then + # use as normal in a file-like object + zipped = self._file.read() + # -MAX_WBITS part is from comp.lang.python answer: + # groups.google.com/group/comp.lang.python/msg/e95b3b38a71e6799 + unzipped = zlib.decompress(zipped, -zlib.MAX_WBITS) + self._file = BytesIO(unzipped) # a file-like object + + +class DicomSeries(object): + """DicomSeries + This class represents a serie of dicom files (SimpleDicomReader + objects) that belong together. If these are multiple files, they + represent the slices of a volume (like for CT or MRI). + """ + + def __init__(self, suid, progressIndicator): + # Init dataset list and the callback + self._entries = [] + + # Init props + self._suid = suid + self._info = {} + self._progressIndicator = progressIndicator + + def __len__(self): + return len(self._entries) + + def __iter__(self): + return iter(self._entries) + + def __getitem__(self, index): + return self._entries[index] + + @property + def suid(self): + return self._suid + + @property + def shape(self): + """The shape of the data (nz, ny, nx).""" + return self._info["shape"] + + @property + def sampling(self): + """The sampling (voxel distances) of the data (dz, dy, dx).""" + return self._info["sampling"] + + @property + def info(self): + """A dictionary containing the information as present in the + first dicomfile of this serie. None if there are no entries.""" + return self._info + + @property + def description(self): + """A description of the dicom series. Used fields are + PatientName, shape of the data, SeriesDescription, and + ImageComments. + """ + info = self.info + + # If no info available, return simple description + if not info: # pragma: no cover + return "DicomSeries containing %i images" % len(self) + + fields = [] + # Give patient name + if "PatientName" in info: + fields.append("" + info["PatientName"]) + # Also add dimensions + if self.shape: + tmp = [str(d) for d in self.shape] + fields.append("x".join(tmp)) + # Try adding more fields + if "SeriesDescription" in info: + fields.append("'" + info["SeriesDescription"] + "'") + if "ImageComments" in info: + fields.append("'" + info["ImageComments"] + "'") + + # Combine + return " ".join(fields) + + def __repr__(self): + adr = hex(id(self)).upper() + return "" % (len(self), adr) + + def get_numpy_array(self): + """Get (load) the data that this DicomSeries represents, and return + it as a numpy array. If this serie contains multiple images, the + resulting array is 3D, otherwise it's 2D. + """ + + # It's easy if no file or if just a single file + if len(self) == 0: + raise ValueError("Serie does not contain any files.") + elif len(self) == 1: + return self[0].get_numpy_array() + + # Check info + if self.info is None: + raise RuntimeError("Cannot return volume if series not finished.") + + # Init data (using what the dicom packaged produces as a reference) + slice = self[0].get_numpy_array() + vol = np.zeros(self.shape, dtype=slice.dtype) + vol[0] = slice + + # Fill volume + self._progressIndicator.start("loading data", "", len(self)) + for z in range(1, len(self)): + vol[z] = self[z].get_numpy_array() + self._progressIndicator.set_progress(z + 1) + self._progressIndicator.finish() + + # Done + import gc + + gc.collect() + return vol + + def _append(self, dcm): + self._entries.append(dcm) + + def _sort(self): + self._entries.sort( + key=lambda k: ( + k.InstanceNumber, + ( + k.ImagePositionPatient[2] + if hasattr(k, "ImagePositionPatient") + else None + ), + ) + ) + + def _finish(self): + """ + Evaluate the series of dicom files. Together they should make up + a volumetric dataset. This means the files should meet certain + conditions. Also some additional information has to be calculated, + such as the distance between the slices. This method sets the + attributes for "shape", "sampling" and "info". + + This method checks: + * that there are no missing files + * that the dimensions of all images match + * that the pixel spacing of all images match + """ + + # The datasets list should be sorted by instance number + L = self._entries + if len(L) == 0: + return + elif len(L) == 1: + self._info = L[0].info + return + + # Get previous + ds1 = L[0] + # Init measures to calculate average of + distance_sum = 0.0 + # Init measures to check (these are in 2D) + dimensions = ds1.Rows, ds1.Columns + # sampling = float(ds1.PixelSpacing[0]), float(ds1.PixelSpacing[1]) + sampling = ds1.info["sampling"][:2] # row, column + + for index in range(len(L)): + # The first round ds1 and ds2 will be the same, for the + # distance calculation this does not matter + # Get current + ds2 = L[index] + # Get positions + pos1 = float(ds1.ImagePositionPatient[2]) + pos2 = float(ds2.ImagePositionPatient[2]) + # Update distance_sum to calculate distance later + distance_sum += abs(pos1 - pos2) + # Test measures + dimensions2 = ds2.Rows, ds2.Columns + # sampling2 = float(ds2.PixelSpacing[0]), float(ds2.PixelSpacing[1]) + sampling2 = ds2.info["sampling"][:2] # row, column + if dimensions != dimensions2: + # We cannot produce a volume if the dimensions match + raise ValueError("Dimensions of slices does not match.") + if sampling != sampling2: + # We can still produce a volume, but we should notify the user + self._progressIndicator.write("Warn: sampling does not match.") + # Store previous + ds1 = ds2 + + # Finish calculating average distance + # (Note that there are len(L)-1 distances) + distance_mean = distance_sum / (len(L) - 1) + + # Set info dict + self._info = L[0].info.copy() + + # Store information that is specific for the serie + self._info["shape"] = (len(L),) + ds2.info["shape"] + self._info["sampling"] = (distance_mean,) + ds2.info["sampling"] + + +def list_files(files, path): + """List all files in the directory, recursively.""" + for item in os.listdir(path): + item = os.path.join(path, item) + if os.path.isdir(item): + list_files(files, item) + elif os.path.isfile(item): + files.append(item) + + +def process_directory(request, progressIndicator, readPixelData=False): + """ + Reads dicom files and returns a list of DicomSeries objects, which + contain information about the data, and can be used to load the + image or volume data. + + if readPixelData is True, the pixel data of all series is read. By + default the loading of pixeldata is deferred until it is requested + using the DicomSeries.get_pixel_array() method. In general, both + methods should be equally fast. + """ + # Get directory to examine + if os.path.isdir(request.filename): + path = request.filename + elif os.path.isfile(request.filename): + path = os.path.dirname(request.filename) + else: # pragma: no cover - tested earlier + raise ValueError("Dicom plugin needs a valid filename to examine the directory") + + # Check files + files = [] + list_files(files, path) # Find files recursively + + # Gather file data and put in DicomSeries + series = {} + count = 0 + progressIndicator.start("examining files", "files", len(files)) + for filename in files: + # Show progress (note that we always start with a 0.0) + count += 1 + progressIndicator.set_progress(count) + # Skip DICOMDIR files + if filename.count("DICOMDIR"): # pragma: no cover + continue + # Try loading dicom ... + try: + dcm = SimpleDicomReader(filename) + except NotADicomFile: + continue # skip non-dicom file + except Exception as why: # pragma: no cover + progressIndicator.write(str(why)) + continue + # Get SUID and register the file with an existing or new series object + try: + suid = dcm.SeriesInstanceUID + except AttributeError: # pragma: no cover + continue # some other kind of dicom file + if suid not in series: + series[suid] = DicomSeries(suid, progressIndicator) + series[suid]._append(dcm) + + # Finish progress + # progressIndicator.finish('Found %i series.' % len(series)) + + # Make a list and sort, so that the order is deterministic + series = list(series.values()) + series.sort(key=lambda x: x.suid) + + # Split series if necessary + for serie in reversed([serie for serie in series]): + splitSerieIfRequired(serie, series, progressIndicator) + + # Finish all series + # progressIndicator.start('analyse series', '', len(series)) + series_ = [] + for i in range(len(series)): + try: + series[i]._finish() + series_.append(series[i]) + except Exception as err: # pragma: no cover + progressIndicator.write(str(err)) + pass # Skip serie (probably report-like file without pixels) + # progressIndicator.set_progress(i+1) + progressIndicator.finish("Found %i correct series." % len(series_)) + + # Done + return series_ + + +def splitSerieIfRequired(serie, series, progressIndicator): + """ + Split the serie in multiple series if this is required. The choice + is based on examing the image position relative to the previous + image. If it differs too much, it is assumed that there is a new + dataset. This can happen for example in unspitted gated CT data. + """ + + # Sort the original list and get local name + serie._sort() + L = serie._entries + # Init previous slice + ds1 = L[0] + # Check whether we can do this + if "ImagePositionPatient" not in ds1: + return + # Initialize a list of new lists + L2 = [[ds1]] + # Init slice distance estimate + distance = 0 + + for index in range(1, len(L)): + # Get current slice + ds2 = L[index] + # Get positions + pos1 = float(ds1.ImagePositionPatient[2]) + pos2 = float(ds2.ImagePositionPatient[2]) + # Get distances + newDist = abs(pos1 - pos2) + # deltaDist = abs(firstPos-pos2) + # If the distance deviates more than 2x from what we've seen, + # we can agree it's a new dataset. + if distance and newDist > 2.1 * distance: + L2.append([]) + distance = 0 + else: + # Test missing file + if distance and newDist > 1.5 * distance: + progressIndicator.write( + "Warning: missing file after %r" % ds1._filename + ) + distance = newDist + # Add to last list + L2[-1].append(ds2) + # Store previous + ds1 = ds2 + + # Split if we should + if len(L2) > 1: + # At what position are we now? + i = series.index(serie) + # Create new series + series2insert = [] + for L in L2: + newSerie = DicomSeries(serie.suid, progressIndicator) + newSerie._entries = L + series2insert.append(newSerie) + # Insert series and remove self + for newSerie in reversed(series2insert): + series.insert(i, newSerie) + series.remove(serie) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/_freeimage.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/_freeimage.py new file mode 100644 index 0000000000000000000000000000000000000000..3b4b88068cde1c9035c43fa993a7627f808a53a1 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/_freeimage.py @@ -0,0 +1,1312 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +# styletest: ignore E261 + +"""Module imageio/freeimage.py + +This module contains the wrapper code for the freeimage library. +The functions defined in this module are relatively thin; just thin +enough so that arguments and results are native Python/numpy data +types. + +""" + +import os +import sys +import ctypes +import threading +import logging +import numpy + +from ..core import ( + get_remote_file, + load_lib, + Dict, + resource_dirs, + IS_PYPY, + get_platform, + InternetNotAllowedError, + NeedDownloadError, +) + +logger = logging.getLogger(__name__) + +TEST_NUMPY_NO_STRIDES = False # To test pypy fallback + +FNAME_PER_PLATFORM = { + "osx32": "libfreeimage-3.16.0-osx10.6.dylib", # universal library + "osx64": "libfreeimage-3.16.0-osx10.6.dylib", + "win32": "FreeImage-3.18.0-win32.dll", + "win64": "FreeImage-3.18.0-win64.dll", + "linux32": "libfreeimage-3.16.0-linux32.so", + "linux64": "libfreeimage-3.16.0-linux64.so", +} + + +def download(directory=None, force_download=False): + """Download the FreeImage library to your computer. + + Parameters + ---------- + directory : str | None + The directory where the file will be cached if a download was + required to obtain the file. By default, the appdata directory + is used. This is also the first directory that is checked for + a local version of the file. + force_download : bool | str + If True, the file will be downloaded even if a local copy exists + (and this copy will be overwritten). Can also be a YYYY-MM-DD date + to ensure a file is up-to-date (modified date of a file on disk, + if present, is checked). + """ + plat = get_platform() + if plat and plat in FNAME_PER_PLATFORM: + fname = "freeimage/" + FNAME_PER_PLATFORM[plat] + get_remote_file(fname=fname, directory=directory, force_download=force_download) + fi._lib = None # allow trying again (needed to make tests work) + + +def get_freeimage_lib(): + """Ensure we have our version of the binary freeimage lib.""" + + lib = os.getenv("IMAGEIO_FREEIMAGE_LIB", None) + if lib: # pragma: no cover + return lib + + # Get filename to load + # If we do not provide a binary, the system may still do ... + plat = get_platform() + if plat and plat in FNAME_PER_PLATFORM: + try: + return get_remote_file("freeimage/" + FNAME_PER_PLATFORM[plat], auto=False) + except InternetNotAllowedError: + pass + except NeedDownloadError: + raise NeedDownloadError( + "Need FreeImage library. " + "You can obtain it with either:\n" + " - download using the command: " + "imageio_download_bin freeimage\n" + " - download by calling (in Python): " + "imageio.plugins.freeimage.download()\n" + ) + except RuntimeError as e: # pragma: no cover + logger.warning(str(e)) + + +# Define function to encode a filename to bytes (for the current system) +def efn(x): + return x.encode(sys.getfilesystemencoding()) + + +# 4-byte quads of 0,v,v,v from 0,0,0,0 to 0,255,255,255 +GREY_PALETTE = numpy.arange(0, 0x01000000, 0x00010101, dtype=numpy.uint32) + + +class FI_TYPES(object): + FIT_UNKNOWN = 0 + FIT_BITMAP = 1 + FIT_UINT16 = 2 + FIT_INT16 = 3 + FIT_UINT32 = 4 + FIT_INT32 = 5 + FIT_FLOAT = 6 + FIT_DOUBLE = 7 + FIT_COMPLEX = 8 + FIT_RGB16 = 9 + FIT_RGBA16 = 10 + FIT_RGBF = 11 + FIT_RGBAF = 12 + + dtypes = { + FIT_BITMAP: numpy.uint8, + FIT_UINT16: numpy.uint16, + FIT_INT16: numpy.int16, + FIT_UINT32: numpy.uint32, + FIT_INT32: numpy.int32, + FIT_FLOAT: numpy.float32, + FIT_DOUBLE: numpy.float64, + FIT_COMPLEX: numpy.complex128, + FIT_RGB16: numpy.uint16, + FIT_RGBA16: numpy.uint16, + FIT_RGBF: numpy.float32, + FIT_RGBAF: numpy.float32, + } + + fi_types = { + (numpy.uint8, 1): FIT_BITMAP, + (numpy.uint8, 3): FIT_BITMAP, + (numpy.uint8, 4): FIT_BITMAP, + (numpy.uint16, 1): FIT_UINT16, + (numpy.int16, 1): FIT_INT16, + (numpy.uint32, 1): FIT_UINT32, + (numpy.int32, 1): FIT_INT32, + (numpy.float32, 1): FIT_FLOAT, + (numpy.float64, 1): FIT_DOUBLE, + (numpy.complex128, 1): FIT_COMPLEX, + (numpy.uint16, 3): FIT_RGB16, + (numpy.uint16, 4): FIT_RGBA16, + (numpy.float32, 3): FIT_RGBF, + (numpy.float32, 4): FIT_RGBAF, + } + + extra_dims = { + FIT_UINT16: [], + FIT_INT16: [], + FIT_UINT32: [], + FIT_INT32: [], + FIT_FLOAT: [], + FIT_DOUBLE: [], + FIT_COMPLEX: [], + FIT_RGB16: [3], + FIT_RGBA16: [4], + FIT_RGBF: [3], + FIT_RGBAF: [4], + } + + +class IO_FLAGS(object): + FIF_LOAD_NOPIXELS = 0x8000 # loading: load the image header only + # # (not supported by all plugins) + BMP_DEFAULT = 0 + BMP_SAVE_RLE = 1 + CUT_DEFAULT = 0 + DDS_DEFAULT = 0 + EXR_DEFAULT = 0 # save data as half with piz-based wavelet compression + EXR_FLOAT = 0x0001 # save data as float instead of half (not recommended) + EXR_NONE = 0x0002 # save with no compression + EXR_ZIP = 0x0004 # save with zlib compression, in blocks of 16 scan lines + EXR_PIZ = 0x0008 # save with piz-based wavelet compression + EXR_PXR24 = 0x0010 # save with lossy 24-bit float compression + EXR_B44 = 0x0020 # save with lossy 44% float compression + # # - goes to 22% when combined with EXR_LC + EXR_LC = 0x0040 # save images with one luminance and two chroma channels, + # # rather than as RGB (lossy compression) + FAXG3_DEFAULT = 0 + GIF_DEFAULT = 0 + GIF_LOAD256 = 1 # Load the image as a 256 color image with ununsed + # # palette entries, if it's 16 or 2 color + GIF_PLAYBACK = 2 # 'Play' the GIF to generate each frame (as 32bpp) + # # instead of returning raw frame data when loading + HDR_DEFAULT = 0 + ICO_DEFAULT = 0 + ICO_MAKEALPHA = 1 # convert to 32bpp and create an alpha channel from the + # # AND-mask when loading + IFF_DEFAULT = 0 + J2K_DEFAULT = 0 # save with a 16:1 rate + JP2_DEFAULT = 0 # save with a 16:1 rate + JPEG_DEFAULT = 0 # loading (see JPEG_FAST); + # # saving (see JPEG_QUALITYGOOD|JPEG_SUBSAMPLING_420) + JPEG_FAST = 0x0001 # load the file as fast as possible, + # # sacrificing some quality + JPEG_ACCURATE = 0x0002 # load the file with the best quality, + # # sacrificing some speed + JPEG_CMYK = 0x0004 # load separated CMYK "as is" + # # (use | to combine with other load flags) + JPEG_EXIFROTATE = 0x0008 # load and rotate according to + # # Exif 'Orientation' tag if available + JPEG_QUALITYSUPERB = 0x80 # save with superb quality (100:1) + JPEG_QUALITYGOOD = 0x0100 # save with good quality (75:1) + JPEG_QUALITYNORMAL = 0x0200 # save with normal quality (50:1) + JPEG_QUALITYAVERAGE = 0x0400 # save with average quality (25:1) + JPEG_QUALITYBAD = 0x0800 # save with bad quality (10:1) + JPEG_PROGRESSIVE = 0x2000 # save as a progressive-JPEG + # # (use | to combine with other save flags) + JPEG_SUBSAMPLING_411 = 0x1000 # save with high 4x1 chroma + # # subsampling (4:1:1) + JPEG_SUBSAMPLING_420 = 0x4000 # save with medium 2x2 medium chroma + # # subsampling (4:2:0) - default value + JPEG_SUBSAMPLING_422 = 0x8000 # save /w low 2x1 chroma subsampling (4:2:2) + JPEG_SUBSAMPLING_444 = 0x10000 # save with no chroma subsampling (4:4:4) + JPEG_OPTIMIZE = 0x20000 # on saving, compute optimal Huffman coding tables + # # (can reduce a few percent of file size) + JPEG_BASELINE = 0x40000 # save basic JPEG, without metadata or any markers + KOALA_DEFAULT = 0 + LBM_DEFAULT = 0 + MNG_DEFAULT = 0 + PCD_DEFAULT = 0 + PCD_BASE = 1 # load the bitmap sized 768 x 512 + PCD_BASEDIV4 = 2 # load the bitmap sized 384 x 256 + PCD_BASEDIV16 = 3 # load the bitmap sized 192 x 128 + PCX_DEFAULT = 0 + PFM_DEFAULT = 0 + PICT_DEFAULT = 0 + PNG_DEFAULT = 0 + PNG_IGNOREGAMMA = 1 # loading: avoid gamma correction + PNG_Z_BEST_SPEED = 0x0001 # save using ZLib level 1 compression flag + # # (default value is 6) + PNG_Z_DEFAULT_COMPRESSION = 0x0006 # save using ZLib level 6 compression + # # flag (default recommended value) + PNG_Z_BEST_COMPRESSION = 0x0009 # save using ZLib level 9 compression flag + # # (default value is 6) + PNG_Z_NO_COMPRESSION = 0x0100 # save without ZLib compression + PNG_INTERLACED = 0x0200 # save using Adam7 interlacing (use | to combine + # # with other save flags) + PNM_DEFAULT = 0 + PNM_SAVE_RAW = 0 # Writer saves in RAW format (i.e. P4, P5 or P6) + PNM_SAVE_ASCII = 1 # Writer saves in ASCII format (i.e. P1, P2 or P3) + PSD_DEFAULT = 0 + PSD_CMYK = 1 # reads tags for separated CMYK (default is conversion to RGB) + PSD_LAB = 2 # reads tags for CIELab (default is conversion to RGB) + RAS_DEFAULT = 0 + RAW_DEFAULT = 0 # load the file as linear RGB 48-bit + RAW_PREVIEW = 1 # try to load the embedded JPEG preview with included + # # Exif Data or default to RGB 24-bit + RAW_DISPLAY = 2 # load the file as RGB 24-bit + SGI_DEFAULT = 0 + TARGA_DEFAULT = 0 + TARGA_LOAD_RGB888 = 1 # Convert RGB555 and ARGB8888 -> RGB888. + TARGA_SAVE_RLE = 2 # Save with RLE compression + TIFF_DEFAULT = 0 + TIFF_CMYK = 0x0001 # reads/stores tags for separated CMYK + # # (use | to combine with compression flags) + TIFF_PACKBITS = 0x0100 # save using PACKBITS compression + TIFF_DEFLATE = 0x0200 # save using DEFLATE (a.k.a. ZLIB) compression + TIFF_ADOBE_DEFLATE = 0x0400 # save using ADOBE DEFLATE compression + TIFF_NONE = 0x0800 # save without any compression + TIFF_CCITTFAX3 = 0x1000 # save using CCITT Group 3 fax encoding + TIFF_CCITTFAX4 = 0x2000 # save using CCITT Group 4 fax encoding + TIFF_LZW = 0x4000 # save using LZW compression + TIFF_JPEG = 0x8000 # save using JPEG compression + TIFF_LOGLUV = 0x10000 # save using LogLuv compression + WBMP_DEFAULT = 0 + XBM_DEFAULT = 0 + XPM_DEFAULT = 0 + + +class METADATA_MODELS(object): + FIMD_COMMENTS = 0 + FIMD_EXIF_MAIN = 1 + FIMD_EXIF_EXIF = 2 + FIMD_EXIF_GPS = 3 + FIMD_EXIF_MAKERNOTE = 4 + FIMD_EXIF_INTEROP = 5 + FIMD_IPTC = 6 + FIMD_XMP = 7 + FIMD_GEOTIFF = 8 + FIMD_ANIMATION = 9 + + +class METADATA_DATATYPE(object): + FIDT_BYTE = 1 # 8-bit unsigned integer + FIDT_ASCII = 2 # 8-bit bytes w/ last byte null + FIDT_SHORT = 3 # 16-bit unsigned integer + FIDT_LONG = 4 # 32-bit unsigned integer + FIDT_RATIONAL = 5 # 64-bit unsigned fraction + FIDT_SBYTE = 6 # 8-bit signed integer + FIDT_UNDEFINED = 7 # 8-bit untyped data + FIDT_SSHORT = 8 # 16-bit signed integer + FIDT_SLONG = 9 # 32-bit signed integer + FIDT_SRATIONAL = 10 # 64-bit signed fraction + FIDT_FLOAT = 11 # 32-bit IEEE floating point + FIDT_DOUBLE = 12 # 64-bit IEEE floating point + FIDT_IFD = 13 # 32-bit unsigned integer (offset) + FIDT_PALETTE = 14 # 32-bit RGBQUAD + FIDT_LONG8 = 16 # 64-bit unsigned integer + FIDT_SLONG8 = 17 # 64-bit signed integer + FIDT_IFD8 = 18 # 64-bit unsigned integer (offset) + + dtypes = { + FIDT_BYTE: numpy.uint8, + FIDT_SHORT: numpy.uint16, + FIDT_LONG: numpy.uint32, + FIDT_RATIONAL: [("numerator", numpy.uint32), ("denominator", numpy.uint32)], + FIDT_LONG8: numpy.uint64, + FIDT_SLONG8: numpy.int64, + FIDT_IFD8: numpy.uint64, + FIDT_SBYTE: numpy.int8, + FIDT_UNDEFINED: numpy.uint8, + FIDT_SSHORT: numpy.int16, + FIDT_SLONG: numpy.int32, + FIDT_SRATIONAL: [("numerator", numpy.int32), ("denominator", numpy.int32)], + FIDT_FLOAT: numpy.float32, + FIDT_DOUBLE: numpy.float64, + FIDT_IFD: numpy.uint32, + FIDT_PALETTE: [ + ("R", numpy.uint8), + ("G", numpy.uint8), + ("B", numpy.uint8), + ("A", numpy.uint8), + ], + } + + +class Freeimage(object): + """Class to represent an interface to the FreeImage library. + This class is relatively thin. It provides a Pythonic API that converts + Freeimage objects to Python objects, but that's about it. + The actual implementation should be provided by the plugins. + + The recommended way to call into the Freeimage library (so that + errors and warnings show up in the right moment) is to use this + object as a context manager: + with imageio.fi as lib: + lib.FreeImage_GetPalette() + + """ + + _API = { + # All we're doing here is telling ctypes that some of the + # FreeImage functions return pointers instead of integers. (On + # 64-bit systems, without this information the pointers get + # truncated and crashes result). There's no need to list + # functions that return ints, or the types of the parameters + # to these or other functions -- that's fine to do implicitly. + # Note that the ctypes immediately converts the returned void_p + # back to a python int again! This is really not helpful, + # because then passing it back to another library call will + # cause truncation-to-32-bits on 64-bit systems. Thanks, ctypes! + # So after these calls one must immediately re-wrap the int as + # a c_void_p if it is to be passed back into FreeImage. + "FreeImage_AllocateT": (ctypes.c_void_p, None), + "FreeImage_FindFirstMetadata": (ctypes.c_void_p, None), + "FreeImage_GetBits": (ctypes.c_void_p, None), + "FreeImage_GetPalette": (ctypes.c_void_p, None), + "FreeImage_GetTagKey": (ctypes.c_char_p, None), + "FreeImage_GetTagValue": (ctypes.c_void_p, None), + "FreeImage_CreateTag": (ctypes.c_void_p, None), + "FreeImage_Save": (ctypes.c_void_p, None), + "FreeImage_Load": (ctypes.c_void_p, None), + "FreeImage_LoadFromMemory": (ctypes.c_void_p, None), + "FreeImage_OpenMultiBitmap": (ctypes.c_void_p, None), + "FreeImage_LoadMultiBitmapFromMemory": (ctypes.c_void_p, None), + "FreeImage_LockPage": (ctypes.c_void_p, None), + "FreeImage_OpenMemory": (ctypes.c_void_p, None), + # 'FreeImage_ReadMemory': (ctypes.c_void_p, None), + # 'FreeImage_CloseMemory': (ctypes.c_void_p, None), + "FreeImage_GetVersion": (ctypes.c_char_p, None), + "FreeImage_GetFIFExtensionList": (ctypes.c_char_p, None), + "FreeImage_GetFormatFromFIF": (ctypes.c_char_p, None), + "FreeImage_GetFIFDescription": (ctypes.c_char_p, None), + "FreeImage_ColorQuantizeEx": (ctypes.c_void_p, None), + # Pypy wants some extra definitions, so here we go ... + "FreeImage_IsLittleEndian": (ctypes.c_int, None), + "FreeImage_SetOutputMessage": (ctypes.c_void_p, None), + "FreeImage_GetFIFCount": (ctypes.c_int, None), + "FreeImage_IsPluginEnabled": (ctypes.c_int, None), + "FreeImage_GetFileType": (ctypes.c_int, None), + # + "FreeImage_GetTagType": (ctypes.c_int, None), + "FreeImage_GetTagLength": (ctypes.c_int, None), + "FreeImage_FindNextMetadata": (ctypes.c_int, None), + "FreeImage_FindCloseMetadata": (ctypes.c_void_p, None), + # + "FreeImage_GetFIFFromFilename": (ctypes.c_int, None), + "FreeImage_FIFSupportsReading": (ctypes.c_int, None), + "FreeImage_FIFSupportsWriting": (ctypes.c_int, None), + "FreeImage_FIFSupportsExportType": (ctypes.c_int, None), + "FreeImage_FIFSupportsExportBPP": (ctypes.c_int, None), + "FreeImage_GetHeight": (ctypes.c_int, None), + "FreeImage_GetWidth": (ctypes.c_int, None), + "FreeImage_GetImageType": (ctypes.c_int, None), + "FreeImage_GetBPP": (ctypes.c_int, None), + "FreeImage_GetColorsUsed": (ctypes.c_int, None), + "FreeImage_ConvertTo32Bits": (ctypes.c_void_p, None), + "FreeImage_GetPitch": (ctypes.c_int, None), + "FreeImage_Unload": (ctypes.c_void_p, None), + } + + def __init__(self): + # Initialize freeimage lib as None + self._lib = None + + # A lock to create thread-safety + self._lock = threading.RLock() + + # Init log messages lists + self._messages = [] + + # Select functype for error handler + if sys.platform.startswith("win"): + functype = ctypes.WINFUNCTYPE + else: + functype = ctypes.CFUNCTYPE + + # Create output message handler + @functype(None, ctypes.c_int, ctypes.c_char_p) + def error_handler(fif, message): + message = message.decode("utf-8") + self._messages.append(message) + while (len(self._messages)) > 256: + self._messages.pop(0) + + # Make sure to keep a ref to function + self._error_handler = error_handler + + @property + def lib(self): + if self._lib is None: + try: + self.load_freeimage() + except OSError as err: + self._lib = "The freeimage library could not be loaded: " + self._lib += str(err) + if isinstance(self._lib, str): + raise RuntimeError(self._lib) + return self._lib + + def has_lib(self): + try: + self.lib + except Exception: + return False + return True + + def load_freeimage(self): + """Try to load the freeimage lib from the system. If not successful, + try to download the imageio version and try again. + """ + # Load library and register API + success = False + try: + # Try without forcing a download, but giving preference + # to the imageio-provided lib (if previously downloaded) + self._load_freeimage() + self._register_api() + if self.lib.FreeImage_GetVersion().decode("utf-8") >= "3.15": + success = True + except OSError: + pass + + if not success: + # Ensure we have our own lib, try again + get_freeimage_lib() + self._load_freeimage() + self._register_api() + + # Wrap up + self.lib.FreeImage_SetOutputMessage(self._error_handler) + self.lib_version = self.lib.FreeImage_GetVersion().decode("utf-8") + + def _load_freeimage(self): + # Define names + lib_names = ["freeimage", "libfreeimage"] + exact_lib_names = [ + "FreeImage", + "libfreeimage.dylib", + "libfreeimage.so", + "libfreeimage.so.3", + ] + # Add names of libraries that we provide (that file may not exist) + res_dirs = resource_dirs() + plat = get_platform() + if plat: # Can be None on e.g. FreeBSD + fname = FNAME_PER_PLATFORM[plat] + for dir in res_dirs: + exact_lib_names.insert(0, os.path.join(dir, "freeimage", fname)) + + # Add the path specified with IMAGEIO_FREEIMAGE_LIB: + lib = os.getenv("IMAGEIO_FREEIMAGE_LIB", None) + if lib is not None: + exact_lib_names.insert(0, lib) + + # Load + try: + lib, fname = load_lib(exact_lib_names, lib_names, res_dirs) + except OSError as err: # pragma: no cover + err_msg = str(err) + "\nPlease install the FreeImage library." + raise OSError(err_msg) + + # Store + self._lib = lib + self.lib_fname = fname + + def _register_api(self): + # Albert's ctypes pattern + for f, (restype, argtypes) in self._API.items(): + func = getattr(self.lib, f) + func.restype = restype + func.argtypes = argtypes + + # Handling of output messages + + def __enter__(self): + self._lock.acquire() + return self.lib + + def __exit__(self, *args): + self._show_any_warnings() + self._lock.release() + + def _reset_log(self): + """Reset the list of output messages. Call this before + loading or saving an image with the FreeImage API. + """ + self._messages = [] + + def _get_error_message(self): + """Get the output messages produced since the last reset as + one string. Returns 'No known reason.' if there are no messages. + Also resets the log. + """ + if self._messages: + res = " ".join(self._messages) + self._reset_log() + return res + else: + return "No known reason." + + def _show_any_warnings(self): + """If there were any messages since the last reset, show them + as a warning. Otherwise do nothing. Also resets the messages. + """ + if self._messages: + logger.warning("imageio.freeimage warning: " + self._get_error_message()) + self._reset_log() + + def get_output_log(self): + """Return a list of the last 256 output messages + (warnings and errors) produced by the FreeImage library. + """ + # This message log is not cleared/reset, but kept to 256 elements. + return [m for m in self._messages] + + def getFIF(self, filename, mode, bb=None): + """Get the freeimage Format (FIF) from a given filename. + If mode is 'r', will try to determine the format by reading + the file, otherwise only the filename is used. + + This function also tests whether the format supports reading/writing. + """ + with self as lib: + # Init + ftype = -1 + if mode not in "rw": + raise ValueError('Invalid mode (must be "r" or "w").') + + # Try getting format from the content. Note that some files + # do not have a header that allows reading the format from + # the file. + if mode == "r": + if bb is not None: + fimemory = lib.FreeImage_OpenMemory(ctypes.c_char_p(bb), len(bb)) + ftype = lib.FreeImage_GetFileTypeFromMemory( + ctypes.c_void_p(fimemory), len(bb) + ) + lib.FreeImage_CloseMemory(ctypes.c_void_p(fimemory)) + if (ftype == -1) and os.path.isfile(filename): + ftype = lib.FreeImage_GetFileType(efn(filename), 0) + # Try getting the format from the extension + if ftype == -1: + ftype = lib.FreeImage_GetFIFFromFilename(efn(filename)) + + # Test if ok + if ftype == -1: + raise ValueError('Cannot determine format of file "%s"' % filename) + elif mode == "w" and not lib.FreeImage_FIFSupportsWriting(ftype): + raise ValueError('Cannot write the format of file "%s"' % filename) + elif mode == "r" and not lib.FreeImage_FIFSupportsReading(ftype): + raise ValueError('Cannot read the format of file "%s"' % filename) + return ftype + + def create_bitmap(self, filename, ftype, flags=0): + """create_bitmap(filename, ftype, flags=0) + Create a wrapped bitmap object. + """ + return FIBitmap(self, filename, ftype, flags) + + def create_multipage_bitmap(self, filename, ftype, flags=0): + """create_multipage_bitmap(filename, ftype, flags=0) + Create a wrapped multipage bitmap object. + """ + return FIMultipageBitmap(self, filename, ftype, flags) + + +class FIBaseBitmap(object): + def __init__(self, fi, filename, ftype, flags): + self._fi = fi + self._filename = filename + self._ftype = ftype + self._flags = flags + self._bitmap = None + self._close_funcs = [] + + def __del__(self): + self.close() + + def close(self): + if (self._bitmap is not None) and self._close_funcs: + for close_func in self._close_funcs: + try: + with self._fi: + fun = close_func[0] + fun(*close_func[1:]) + except Exception: # pragma: no cover + pass + self._close_funcs = [] + self._bitmap = None + + def _set_bitmap(self, bitmap, close_func=None): + """Function to set the bitmap and specify the function to unload it.""" + if self._bitmap is not None: + pass # bitmap is converted + if close_func is None: + close_func = self._fi.lib.FreeImage_Unload, bitmap + + self._bitmap = bitmap + if close_func: + self._close_funcs.append(close_func) + + def get_meta_data(self): + # todo: there is also FreeImage_TagToString, is that useful? + # and would that work well when reading and then saving? + + # Create a list of (model_name, number) tuples + models = [ + (name[5:], number) + for name, number in METADATA_MODELS.__dict__.items() + if name.startswith("FIMD_") + ] + + # Prepare + metadata = Dict() + tag = ctypes.c_void_p() + + with self._fi as lib: + # Iterate over all FreeImage meta models + for model_name, number in models: + # Find beginning, get search handle + mdhandle = lib.FreeImage_FindFirstMetadata( + number, self._bitmap, ctypes.byref(tag) + ) + mdhandle = ctypes.c_void_p(mdhandle) + if mdhandle: + # Iterate over all tags in this model + more = True + while more: + # Get info about tag + tag_name = lib.FreeImage_GetTagKey(tag).decode("utf-8") + tag_type = lib.FreeImage_GetTagType(tag) + byte_size = lib.FreeImage_GetTagLength(tag) + char_ptr = ctypes.c_char * byte_size + data = char_ptr.from_address(lib.FreeImage_GetTagValue(tag)) + # Convert in a way compatible with Pypy + tag_bytes = bytes(bytearray(data)) + # The default value is the raw bytes + tag_val = tag_bytes + # Convert to a Python value in the metadata dict + if tag_type == METADATA_DATATYPE.FIDT_ASCII: + tag_val = tag_bytes.decode("utf-8", "replace") + elif tag_type in METADATA_DATATYPE.dtypes: + dtype = METADATA_DATATYPE.dtypes[tag_type] + if IS_PYPY and isinstance(dtype, (list, tuple)): + pass # pragma: no cover - or we get a segfault + else: + try: + tag_val = numpy.frombuffer( + tag_bytes, dtype=dtype + ).copy() + if len(tag_val) == 1: + tag_val = tag_val[0] + except Exception: # pragma: no cover + pass + # Store data in dict + subdict = metadata.setdefault(model_name, Dict()) + subdict[tag_name] = tag_val + # Next + more = lib.FreeImage_FindNextMetadata( + mdhandle, ctypes.byref(tag) + ) + + # Close search handle for current meta model + lib.FreeImage_FindCloseMetadata(mdhandle) + + # Done + return metadata + + def set_meta_data(self, metadata): + # Create a dict mapping model_name to number + models = {} + for name, number in METADATA_MODELS.__dict__.items(): + if name.startswith("FIMD_"): + models[name[5:]] = number + + # Create a mapping from numpy.dtype to METADATA_DATATYPE + def get_tag_type_number(dtype): + for number, numpy_dtype in METADATA_DATATYPE.dtypes.items(): + if dtype == numpy_dtype: + return number + else: + return None + + with self._fi as lib: + for model_name, subdict in metadata.items(): + # Get model number + number = models.get(model_name, None) + if number is None: + continue # Unknown model, silent ignore + + for tag_name, tag_val in subdict.items(): + # Create new tag + tag = lib.FreeImage_CreateTag() + tag = ctypes.c_void_p(tag) + + try: + # Convert Python value to FI type, val + is_ascii = False + if isinstance(tag_val, str): + try: + tag_bytes = tag_val.encode("ascii") + is_ascii = True + except UnicodeError: + pass + if is_ascii: + tag_type = METADATA_DATATYPE.FIDT_ASCII + tag_count = len(tag_bytes) + else: + if not hasattr(tag_val, "dtype"): + tag_val = numpy.array([tag_val]) + tag_type = get_tag_type_number(tag_val.dtype) + if tag_type is None: + logger.warning( + "imageio.freeimage warning: Could not " + "determine tag type of %r." % tag_name + ) + continue + tag_bytes = tag_val.tobytes() + tag_count = tag_val.size + # Set properties + lib.FreeImage_SetTagKey(tag, tag_name.encode("utf-8")) + lib.FreeImage_SetTagType(tag, tag_type) + lib.FreeImage_SetTagCount(tag, tag_count) + lib.FreeImage_SetTagLength(tag, len(tag_bytes)) + lib.FreeImage_SetTagValue(tag, tag_bytes) + # Store tag + tag_key = lib.FreeImage_GetTagKey(tag) + lib.FreeImage_SetMetadata(number, self._bitmap, tag_key, tag) + + except Exception as err: # pragma: no cover + logger.warning( + "imagio.freeimage warning: Could not set tag " + "%r: %s, %s" + % (tag_name, self._fi._get_error_message(), str(err)) + ) + finally: + lib.FreeImage_DeleteTag(tag) + + +class FIBitmap(FIBaseBitmap): + """Wrapper for the FI bitmap object.""" + + def allocate(self, array): + # Prepare array + assert isinstance(array, numpy.ndarray) + shape = array.shape + dtype = array.dtype + + # Get shape and channel info + r, c = shape[:2] + if len(shape) == 2: + n_channels = 1 + elif len(shape) == 3: + n_channels = shape[2] + else: + n_channels = shape[0] + + # Get fi_type + try: + fi_type = FI_TYPES.fi_types[(dtype.type, n_channels)] + self._fi_type = fi_type + except KeyError: + raise ValueError("Cannot write arrays of given type and shape.") + + # Allocate bitmap + with self._fi as lib: + bpp = 8 * dtype.itemsize * n_channels + bitmap = lib.FreeImage_AllocateT(fi_type, c, r, bpp, 0, 0, 0) + bitmap = ctypes.c_void_p(bitmap) + + # Check and store + if not bitmap: # pragma: no cover + raise RuntimeError( + "Could not allocate bitmap for storage: %s" + % self._fi._get_error_message() + ) + self._set_bitmap(bitmap, (lib.FreeImage_Unload, bitmap)) + + def load_from_filename(self, filename=None): + if filename is None: + filename = self._filename + + with self._fi as lib: + # Create bitmap + bitmap = lib.FreeImage_Load(self._ftype, efn(filename), self._flags) + bitmap = ctypes.c_void_p(bitmap) + + # Check and store + if not bitmap: # pragma: no cover + raise ValueError( + 'Could not load bitmap "%s": %s' + % (self._filename, self._fi._get_error_message()) + ) + self._set_bitmap(bitmap, (lib.FreeImage_Unload, bitmap)) + + # def load_from_bytes(self, bb): + # with self._fi as lib: + # # Create bitmap + # fimemory = lib.FreeImage_OpenMemory( + # ctypes.c_char_p(bb), len(bb)) + # bitmap = lib.FreeImage_LoadFromMemory( + # self._ftype, ctypes.c_void_p(fimemory), self._flags) + # bitmap = ctypes.c_void_p(bitmap) + # lib.FreeImage_CloseMemory(ctypes.c_void_p(fimemory)) + # + # # Check + # if not bitmap: + # raise ValueError('Could not load bitmap "%s": %s' + # % (self._filename, self._fi._get_error_message())) + # else: + # self._set_bitmap(bitmap, (lib.FreeImage_Unload, bitmap)) + + def save_to_filename(self, filename=None): + if filename is None: + filename = self._filename + + ftype = self._ftype + bitmap = self._bitmap + fi_type = self._fi_type # element type + + with self._fi as lib: + # Check if can write + if fi_type == FI_TYPES.FIT_BITMAP: + can_write = lib.FreeImage_FIFSupportsExportBPP( + ftype, lib.FreeImage_GetBPP(bitmap) + ) + else: + can_write = lib.FreeImage_FIFSupportsExportType(ftype, fi_type) + if not can_write: + raise TypeError("Cannot save image of this format to this file type") + + # Save to file + res = lib.FreeImage_Save(ftype, bitmap, efn(filename), self._flags) + # Check + if res is None: # pragma: no cover, we do so many checks, this is rare + raise RuntimeError( + f"Could not save file `{self._filename}`: {self._fi._get_error_message()}" + ) + + # def save_to_bytes(self): + # ftype = self._ftype + # bitmap = self._bitmap + # fi_type = self._fi_type # element type + # + # with self._fi as lib: + # # Check if can write + # if fi_type == FI_TYPES.FIT_BITMAP: + # can_write = lib.FreeImage_FIFSupportsExportBPP(ftype, + # lib.FreeImage_GetBPP(bitmap)) + # else: + # can_write = lib.FreeImage_FIFSupportsExportType(ftype, fi_type) + # if not can_write: + # raise TypeError('Cannot save image of this format ' + # 'to this file type') + # + # # Extract the bytes + # fimemory = lib.FreeImage_OpenMemory(0, 0) + # res = lib.FreeImage_SaveToMemory(ftype, bitmap, + # ctypes.c_void_p(fimemory), + # self._flags) + # if res: + # N = lib.FreeImage_TellMemory(ctypes.c_void_p(fimemory)) + # result = ctypes.create_string_buffer(N) + # lib.FreeImage_SeekMemory(ctypes.c_void_p(fimemory), 0) + # lib.FreeImage_ReadMemory(result, 1, N, ctypes.c_void_p(fimemory)) + # result = result.raw + # lib.FreeImage_CloseMemory(ctypes.c_void_p(fimemory)) + # + # # Check + # if not res: + # raise RuntimeError('Could not save file "%s": %s' + # % (self._filename, self._fi._get_error_message())) + # + # # Done + # return result + + def get_image_data(self): + dtype, shape, bpp = self._get_type_and_shape() + array = self._wrap_bitmap_bits_in_array(shape, dtype, False) + with self._fi as lib: + isle = lib.FreeImage_IsLittleEndian() + + # swizzle the color components and flip the scanlines to go from + # FreeImage's BGR[A] and upside-down internal memory format to + # something more normal + def n(arr): + # return arr[..., ::-1].T # Does not work on numpypy yet + if arr.ndim == 1: # pragma: no cover + return arr[::-1].T + elif arr.ndim == 2: # Always the case here ... + return arr[:, ::-1].T + elif arr.ndim == 3: # pragma: no cover + return arr[:, :, ::-1].T + elif arr.ndim == 4: # pragma: no cover + return arr[:, :, :, ::-1].T + + if len(shape) == 3 and isle and dtype.type == numpy.uint8: + b = n(array[0]) + g = n(array[1]) + r = n(array[2]) + if shape[0] == 3: + return numpy.dstack((r, g, b)) + elif shape[0] == 4: + a = n(array[3]) + return numpy.dstack((r, g, b, a)) + else: # pragma: no cover - we check this earlier + raise ValueError("Cannot handle images of shape %s" % shape) + + # We need to copy because array does *not* own its memory + # after bitmap is freed. + a = n(array).copy() + return a + + def set_image_data(self, array): + # Prepare array + assert isinstance(array, numpy.ndarray) + shape = array.shape + dtype = array.dtype + with self._fi as lib: + isle = lib.FreeImage_IsLittleEndian() + + # Calculate shape and channels + r, c = shape[:2] + if len(shape) == 2: + n_channels = 1 + w_shape = (c, r) + elif len(shape) == 3: + n_channels = shape[2] + w_shape = (n_channels, c, r) + else: + n_channels = shape[0] + + def n(arr): # normalise to freeimage's in-memory format + return arr[::-1].T + + wrapped_array = self._wrap_bitmap_bits_in_array(w_shape, dtype, True) + # swizzle the color components and flip the scanlines to go to + # FreeImage's BGR[A] and upside-down internal memory format + # The BGR[A] order is only used for 8bits per channel images + # on little endian machines. For everything else RGB[A] is + # used. + if len(shape) == 3 and isle and dtype.type == numpy.uint8: + R = array[:, :, 0] + G = array[:, :, 1] + B = array[:, :, 2] + wrapped_array[0] = n(B) + wrapped_array[1] = n(G) + wrapped_array[2] = n(R) + if shape[2] == 4: + A = array[:, :, 3] + wrapped_array[3] = n(A) + else: + wrapped_array[:] = n(array) + if self._need_finish: + self._finish_wrapped_array(wrapped_array) + + if len(shape) == 2 and dtype.type == numpy.uint8: + with self._fi as lib: + palette = lib.FreeImage_GetPalette(self._bitmap) + palette = ctypes.c_void_p(palette) + if not palette: + raise RuntimeError("Could not get image palette") + try: + palette_data = GREY_PALETTE.ctypes.data + except Exception: # pragma: no cover - IS_PYPY + palette_data = GREY_PALETTE.__array_interface__["data"][0] + ctypes.memmove(palette, palette_data, 1024) + + def _wrap_bitmap_bits_in_array(self, shape, dtype, save): + """Return an ndarray view on the data in a FreeImage bitmap. Only + valid for as long as the bitmap is loaded (if single page) / locked + in memory (if multipage). This is used in loading data, but + also during saving, to prepare a strided numpy array buffer. + + """ + # Get bitmap info + with self._fi as lib: + pitch = lib.FreeImage_GetPitch(self._bitmap) + bits = lib.FreeImage_GetBits(self._bitmap) + + # Get more info + height = shape[-1] + byte_size = height * pitch + itemsize = dtype.itemsize + + # Get strides + if len(shape) == 3: + strides = (itemsize, shape[0] * itemsize, pitch) + else: + strides = (itemsize, pitch) + + # Create numpy array and return + data = (ctypes.c_char * byte_size).from_address(bits) + try: + self._need_finish = False + if TEST_NUMPY_NO_STRIDES: + raise NotImplementedError() + return numpy.ndarray(shape, dtype=dtype, buffer=data, strides=strides) + except NotImplementedError: + # IS_PYPY - not very efficient. We create a C-contiguous + # numpy array (because pypy does not support Fortran-order) + # and shape it such that the rest of the code can remain. + if save: + self._need_finish = True # Flag to use _finish_wrapped_array + return numpy.zeros(shape, dtype=dtype) + else: + bb = bytes(bytearray(data)) + array = numpy.frombuffer(bb, dtype=dtype).copy() + # Deal with strides + if len(shape) == 3: + array.shape = shape[2], strides[-1] // shape[0], shape[0] + array2 = array[: shape[2], : shape[1], : shape[0]] + array = numpy.zeros(shape, dtype=array.dtype) + for i in range(shape[0]): + array[i] = array2[:, :, i].T + else: + array.shape = shape[1], strides[-1] + array = array[: shape[1], : shape[0]].T + return array + + def _finish_wrapped_array(self, array): # IS_PYPY + """Hardcore way to inject numpy array in bitmap.""" + # Get bitmap info + with self._fi as lib: + pitch = lib.FreeImage_GetPitch(self._bitmap) + bits = lib.FreeImage_GetBits(self._bitmap) + bpp = lib.FreeImage_GetBPP(self._bitmap) + # Get channels and realwidth + nchannels = bpp // 8 // array.itemsize + realwidth = pitch // nchannels + # Apply padding for pitch if necessary + extra = realwidth - array.shape[-2] + assert 0 <= extra < 10 + # Make sort of Fortran, also take padding (i.e. pitch) into account + newshape = array.shape[-1], realwidth, nchannels + array2 = numpy.zeros(newshape, array.dtype) + if nchannels == 1: + array2[:, : array.shape[-2], 0] = array.T + else: + for i in range(nchannels): + array2[:, : array.shape[-2], i] = array[i, :, :].T + # copy data + data_ptr = array2.__array_interface__["data"][0] + ctypes.memmove(bits, data_ptr, array2.nbytes) + del array2 + + def _get_type_and_shape(self): + bitmap = self._bitmap + + # Get info on bitmap + with self._fi as lib: + w = lib.FreeImage_GetWidth(bitmap) + h = lib.FreeImage_GetHeight(bitmap) + self._fi_type = fi_type = lib.FreeImage_GetImageType(bitmap) + if not fi_type: + raise ValueError("Unknown image pixel type") + + # Determine required props for numpy array + bpp = None + dtype = FI_TYPES.dtypes[fi_type] + + if fi_type == FI_TYPES.FIT_BITMAP: + with self._fi as lib: + bpp = lib.FreeImage_GetBPP(bitmap) + has_pallette = lib.FreeImage_GetColorsUsed(bitmap) + if has_pallette: + # Examine the palette. If it is grayscale, we return as such + if has_pallette == 256: + palette = lib.FreeImage_GetPalette(bitmap) + palette = ctypes.c_void_p(palette) + p = (ctypes.c_uint8 * (256 * 4)).from_address(palette.value) + p = numpy.frombuffer(p, numpy.uint32).copy() + if (GREY_PALETTE == p).all(): + extra_dims = [] + return numpy.dtype(dtype), extra_dims + [w, h], bpp + # Convert bitmap and call this method again + newbitmap = lib.FreeImage_ConvertTo32Bits(bitmap) + newbitmap = ctypes.c_void_p(newbitmap) + self._set_bitmap(newbitmap) + return self._get_type_and_shape() + elif bpp == 8: + extra_dims = [] + elif bpp == 24: + extra_dims = [3] + elif bpp == 32: + extra_dims = [4] + else: # pragma: no cover + # raise ValueError('Cannot convert %d BPP bitmap' % bpp) + # Convert bitmap and call this method again + newbitmap = lib.FreeImage_ConvertTo32Bits(bitmap) + newbitmap = ctypes.c_void_p(newbitmap) + self._set_bitmap(newbitmap) + return self._get_type_and_shape() + else: + extra_dims = FI_TYPES.extra_dims[fi_type] + + # Return dtype and shape + return numpy.dtype(dtype), extra_dims + [w, h], bpp + + def quantize(self, quantizer=0, palettesize=256): + """Quantize the bitmap to make it 8-bit (paletted). Returns a new + FIBitmap object. + Only for 24 bit images. + """ + with self._fi as lib: + # New bitmap + bitmap = lib.FreeImage_ColorQuantizeEx( + self._bitmap, quantizer, palettesize, 0, None + ) + bitmap = ctypes.c_void_p(bitmap) + + # Check and return + if not bitmap: + raise ValueError( + 'Could not quantize bitmap "%s": %s' + % (self._filename, self._fi._get_error_message()) + ) + + new = FIBitmap(self._fi, self._filename, self._ftype, self._flags) + new._set_bitmap(bitmap, (lib.FreeImage_Unload, bitmap)) + new._fi_type = self._fi_type + return new + + +# def convert_to_32bit(self): +# """ Convert to 32bit image. +# """ +# with self._fi as lib: +# # New bitmap +# bitmap = lib.FreeImage_ConvertTo32Bits(self._bitmap) +# bitmap = ctypes.c_void_p(bitmap) +# +# # Check and return +# if not bitmap: +# raise ValueError('Could not convert bitmap to 32bit "%s": %s' % +# (self._filename, +# self._fi._get_error_message())) +# else: +# new = FIBitmap(self._fi, self._filename, self._ftype, +# self._flags) +# new._set_bitmap(bitmap, (lib.FreeImage_Unload, bitmap)) +# new._fi_type = self._fi_type +# return new + + +class FIMultipageBitmap(FIBaseBitmap): + """Wrapper for the multipage FI bitmap object.""" + + def load_from_filename(self, filename=None): + if filename is None: # pragma: no cover + filename = self._filename + + # Prepare + create_new = False + read_only = True + keep_cache_in_memory = False + + # Try opening + with self._fi as lib: + # Create bitmap + multibitmap = lib.FreeImage_OpenMultiBitmap( + self._ftype, + efn(filename), + create_new, + read_only, + keep_cache_in_memory, + self._flags, + ) + multibitmap = ctypes.c_void_p(multibitmap) + + # Check + if not multibitmap: # pragma: no cover + err = self._fi._get_error_message() + raise ValueError( + 'Could not open file "%s" as multi-image: %s' + % (self._filename, err) + ) + self._set_bitmap(multibitmap, (lib.FreeImage_CloseMultiBitmap, multibitmap)) + + # def load_from_bytes(self, bb): + # with self._fi as lib: + # # Create bitmap + # fimemory = lib.FreeImage_OpenMemory( + # ctypes.c_char_p(bb), len(bb)) + # multibitmap = lib.FreeImage_LoadMultiBitmapFromMemory( + # self._ftype, ctypes.c_void_p(fimemory), self._flags) + # multibitmap = ctypes.c_void_p(multibitmap) + # #lib.FreeImage_CloseMemory(ctypes.c_void_p(fimemory)) + # self._mem = fimemory + # self._bytes = bb + # # Check + # if not multibitmap: + # raise ValueError('Could not load multibitmap "%s": %s' + # % (self._filename, self._fi._get_error_message())) + # else: + # self._set_bitmap(multibitmap, + # (lib.FreeImage_CloseMultiBitmap, multibitmap)) + + def save_to_filename(self, filename=None): + if filename is None: # pragma: no cover + filename = self._filename + + # Prepare + create_new = True + read_only = False + keep_cache_in_memory = False + + # Open the file + # todo: Set flags at close func + with self._fi as lib: + multibitmap = lib.FreeImage_OpenMultiBitmap( + self._ftype, + efn(filename), + create_new, + read_only, + keep_cache_in_memory, + 0, + ) + multibitmap = ctypes.c_void_p(multibitmap) + + # Check + if not multibitmap: # pragma: no cover + msg = 'Could not open file "%s" for writing multi-image: %s' % ( + self._filename, + self._fi._get_error_message(), + ) + raise ValueError(msg) + self._set_bitmap(multibitmap, (lib.FreeImage_CloseMultiBitmap, multibitmap)) + + def __len__(self): + with self._fi as lib: + return lib.FreeImage_GetPageCount(self._bitmap) + + def get_page(self, index): + """Return the sub-bitmap for the given page index. + Please close the returned bitmap when done. + """ + with self._fi as lib: + # Create low-level bitmap in freeimage + bitmap = lib.FreeImage_LockPage(self._bitmap, index) + bitmap = ctypes.c_void_p(bitmap) + if not bitmap: # pragma: no cover + raise ValueError( + "Could not open sub-image %i in %r: %s" + % (index, self._filename, self._fi._get_error_message()) + ) + + # Get bitmap object to wrap this bitmap + bm = FIBitmap(self._fi, self._filename, self._ftype, self._flags) + bm._set_bitmap( + bitmap, (lib.FreeImage_UnlockPage, self._bitmap, bitmap, False) + ) + return bm + + def append_bitmap(self, bitmap): + """Add a sub-bitmap to the multi-page bitmap.""" + with self._fi as lib: + # no return value + lib.FreeImage_AppendPage(self._bitmap, bitmap._bitmap) + + +# Create instance +fi = Freeimage() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/_swf.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/_swf.py new file mode 100644 index 0000000000000000000000000000000000000000..98ca3a4b0520200fd78508feeba68102db8b3f9a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/_swf.py @@ -0,0 +1,897 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. +# This code was taken from https://github.com/almarklein/visvis/blob/master/vvmovie/images2swf.py + +# styletest: ignore E261 + +""" +Provides a function (write_swf) to store a series of numpy arrays in an +SWF movie, that can be played on a wide range of OS's. + +In desperation of wanting to share animated images, and then lacking a good +writer for animated gif or .avi, I decided to look into SWF. This format +is very well documented. + +This is a pure python module to create an SWF file that shows a series +of images. The images are stored using the DEFLATE algorithm (same as +PNG and ZIP and which is included in the standard Python distribution). +As this compression algorithm is much more effective than that used in +GIF images, we obtain better quality (24 bit colors + alpha channel) +while still producesing smaller files (a test showed ~75%). Although +SWF also allows for JPEG compression, doing so would probably require +a third party library for the JPEG encoding/decoding, we could +perhaps do this via Pillow or freeimage. + +sources and tools: + +- SWF on wikipedia +- Adobes "SWF File Format Specification" version 10 + (http://www.adobe.com/devnet/swf/pdf/swf_file_format_spec_v10.pdf) +- swftools (swfdump in specific) for debugging +- iwisoft swf2avi can be used to convert swf to avi/mpg/flv with really + good quality, while file size is reduced with factors 20-100. + A good program in my opinion. The free version has the limitation + of a watermark in the upper left corner. + +""" + +import os +import zlib +import time # noqa +import logging + +import numpy as np + + +logger = logging.getLogger(__name__) + +# todo: use Pillow to support reading JPEG images from SWF? + + +# Base functions and classes + + +class BitArray: + """Dynamic array of bits that automatically resizes + with factors of two. + Append bits using .append() or += + You can reverse bits using .reverse() + """ + + def __init__(self, initvalue=None): + self.data = np.zeros((16,), dtype=np.uint8) + self._len = 0 + if initvalue is not None: + self.append(initvalue) + + def __len__(self): + return self._len # self.data.shape[0] + + def __repr__(self): + return self.data[: self._len].tobytes().decode("ascii") + + def _checkSize(self): + # check length... grow if necessary + arraylen = self.data.shape[0] + if self._len >= arraylen: + tmp = np.zeros((arraylen * 2,), dtype=np.uint8) + tmp[: self._len] = self.data[: self._len] + self.data = tmp + + def __add__(self, value): + self.append(value) + return self + + def append(self, bits): + # check input + if isinstance(bits, BitArray): + bits = str(bits) + if isinstance(bits, int): # pragma: no cover - we dont use it + bits = str(bits) + if not isinstance(bits, str): # pragma: no cover + raise ValueError("Append bits as strings or integers!") + + # add bits + for bit in bits: + self.data[self._len] = ord(bit) + self._len += 1 + self._checkSize() + + def reverse(self): + """In-place reverse.""" + tmp = self.data[: self._len].copy() + self.data[: self._len] = tmp[::-1] + + def tobytes(self): + """Convert to bytes. If necessary, + zeros are padded to the end (right side). + """ + bits = str(self) + + # determine number of bytes + nbytes = 0 + while nbytes * 8 < len(bits): + nbytes += 1 + # pad + bits = bits.ljust(nbytes * 8, "0") + + # go from bits to bytes + bb = bytes() + for i in range(nbytes): + tmp = int(bits[i * 8 : (i + 1) * 8], 2) + bb += int2uint8(tmp) + + # done + return bb + + +def int2uint32(i): + return int(i).to_bytes(4, "little") + + +def int2uint16(i): + return int(i).to_bytes(2, "little") + + +def int2uint8(i): + return int(i).to_bytes(1, "little") + + +def int2bits(i, n=None): + """convert int to a string of bits (0's and 1's in a string), + pad to n elements. Convert back using int(ss,2).""" + ii = i + + # make bits + bb = BitArray() + while ii > 0: + bb += str(ii % 2) + ii = ii >> 1 + bb.reverse() + + # justify + if n is not None: + if len(bb) > n: # pragma: no cover + raise ValueError("int2bits fail: len larger than padlength.") + bb = str(bb).rjust(n, "0") + + # done + return BitArray(bb) + + +def bits2int(bb, n=8): + # Init + value = "" + + # Get value in bits + for i in range(len(bb)): + b = bb[i : i + 1] + tmp = bin(ord(b))[2:] + # value += tmp.rjust(8,'0') + value = tmp.rjust(8, "0") + value + + # Make decimal + return int(value[:n], 2) + + +def get_type_and_len(bb): + """bb should be 6 bytes at least + Return (type, length, length_of_full_tag) + """ + # Init + value = "" + + # Get first 16 bits + for i in range(2): + b = bb[i : i + 1] + tmp = bin(ord(b))[2:] + # value += tmp.rjust(8,'0') + value = tmp.rjust(8, "0") + value + + # Get type and length + type = int(value[:10], 2) + L = int(value[10:], 2) + L2 = L + 2 + + # Long tag header? + if L == 63: # '111111' + value = "" + for i in range(2, 6): + b = bb[i : i + 1] # becomes a single-byte bytes() + tmp = bin(ord(b))[2:] + # value += tmp.rjust(8,'0') + value = tmp.rjust(8, "0") + value + L = int(value, 2) + L2 = L + 6 + + # Done + return type, L, L2 + + +def signedint2bits(i, n=None): + """convert signed int to a string of bits (0's and 1's in a string), + pad to n elements. Negative numbers are stored in 2's complement bit + patterns, thus positive numbers always start with a 0. + """ + + # negative number? + ii = i + if i < 0: + # A negative number, -n, is represented as the bitwise opposite of + ii = abs(ii) - 1 # the positive-zero number n-1. + + # make bits + bb = BitArray() + while ii > 0: + bb += str(ii % 2) + ii = ii >> 1 + bb.reverse() + + # justify + bb = "0" + str(bb) # always need the sign bit in front + if n is not None: + if len(bb) > n: # pragma: no cover + raise ValueError("signedint2bits fail: len larger than padlength.") + bb = bb.rjust(n, "0") + + # was it negative? (then opposite bits) + if i < 0: + bb = bb.replace("0", "x").replace("1", "0").replace("x", "1") + + # done + return BitArray(bb) + + +def twits2bits(arr): + """Given a few (signed) numbers, store them + as compactly as possible in the wat specifief by the swf format. + The numbers are multiplied by 20, assuming they + are twits. + Can be used to make the RECT record. + """ + + # first determine length using non justified bit strings + maxlen = 1 + for i in arr: + tmp = len(signedint2bits(i * 20)) + if tmp > maxlen: + maxlen = tmp + + # build array + bits = int2bits(maxlen, 5) + for i in arr: + bits += signedint2bits(i * 20, maxlen) + + return bits + + +def floats2bits(arr): + """Given a few (signed) numbers, convert them to bits, + stored as FB (float bit values). We always use 16.16. + Negative numbers are not (yet) possible, because I don't + know how the're implemented (ambiguity). + """ + bits = int2bits(31, 5) # 32 does not fit in 5 bits! + for i in arr: + if i < 0: # pragma: no cover + raise ValueError("Dit not implement negative floats!") + i1 = int(i) + i2 = i - i1 + bits += int2bits(i1, 15) + bits += int2bits(i2 * 2**16, 16) + return bits + + +# Base Tag + + +class Tag: + def __init__(self): + self.bytes = bytes() + self.tagtype = -1 + + def process_tag(self): + """Implement this to create the tag.""" + raise NotImplementedError() + + def get_tag(self): + """Calls processTag and attaches the header.""" + self.process_tag() + + # tag to binary + bits = int2bits(self.tagtype, 10) + + # complete header uint16 thing + bits += "1" * 6 # = 63 = 0x3f + # make uint16 + bb = int2uint16(int(str(bits), 2)) + + # now add 32bit length descriptor + bb += int2uint32(len(self.bytes)) + + # done, attach and return + bb += self.bytes + return bb + + def make_rect_record(self, xmin, xmax, ymin, ymax): + """Simply uses makeCompactArray to produce + a RECT Record.""" + return twits2bits([xmin, xmax, ymin, ymax]) + + def make_matrix_record(self, scale_xy=None, rot_xy=None, trans_xy=None): + # empty matrix? + if scale_xy is None and rot_xy is None and trans_xy is None: + return "0" * 8 + + # init + bits = BitArray() + + # scale + if scale_xy: + bits += "1" + bits += floats2bits([scale_xy[0], scale_xy[1]]) + else: + bits += "0" + + # rotation + if rot_xy: + bits += "1" + bits += floats2bits([rot_xy[0], rot_xy[1]]) + else: + bits += "0" + + # translation (no flag here) + if trans_xy: + bits += twits2bits([trans_xy[0], trans_xy[1]]) + else: + bits += twits2bits([0, 0]) + + # done + return bits + + +# Control tags + + +class ControlTag(Tag): + def __init__(self): + Tag.__init__(self) + + +class FileAttributesTag(ControlTag): + def __init__(self): + ControlTag.__init__(self) + self.tagtype = 69 + + def process_tag(self): + self.bytes = "\x00".encode("ascii") * (1 + 3) + + +class ShowFrameTag(ControlTag): + def __init__(self): + ControlTag.__init__(self) + self.tagtype = 1 + + def process_tag(self): + self.bytes = bytes() + + +class SetBackgroundTag(ControlTag): + """Set the color in 0-255, or 0-1 (if floats given).""" + + def __init__(self, *rgb): + self.tagtype = 9 + if len(rgb) == 1: + rgb = rgb[0] + self.rgb = rgb + + def process_tag(self): + bb = bytes() + for i in range(3): + clr = self.rgb[i] + if isinstance(clr, float): # pragma: no cover - not used + clr = clr * 255 + bb += int2uint8(clr) + self.bytes = bb + + +class DoActionTag(Tag): + def __init__(self, action="stop"): + Tag.__init__(self) + self.tagtype = 12 + self.actions = [action] + + def append(self, action): # pragma: no cover - not used + self.actions.append(action) + + def process_tag(self): + bb = bytes() + + for action in self.actions: + action = action.lower() + if action == "stop": + bb += "\x07".encode("ascii") + elif action == "play": # pragma: no cover - not used + bb += "\x06".encode("ascii") + else: # pragma: no cover + logger.warning("unknown action: %s" % action) + + bb += int2uint8(0) + self.bytes = bb + + +# Definition tags +class DefinitionTag(Tag): + counter = 0 # to give automatically id's + + def __init__(self): + Tag.__init__(self) + DefinitionTag.counter += 1 + self.id = DefinitionTag.counter # id in dictionary + + +class BitmapTag(DefinitionTag): + def __init__(self, im): + DefinitionTag.__init__(self) + self.tagtype = 36 # DefineBitsLossless2 + + # convert image (note that format is ARGB) + # even a grayscale image is stored in ARGB, nevertheless, + # the fabilous deflate compression will make it that not much + # more data is required for storing (25% or so, and less than 10% + # when storing RGB as ARGB). + + if len(im.shape) == 3: + if im.shape[2] in [3, 4]: + tmp = np.ones((im.shape[0], im.shape[1], 4), dtype=np.uint8) * 255 + for i in range(3): + tmp[:, :, i + 1] = im[:, :, i] + if im.shape[2] == 4: + tmp[:, :, 0] = im[:, :, 3] # swap channel where alpha is + else: # pragma: no cover + raise ValueError("Invalid shape to be an image.") + + elif len(im.shape) == 2: + tmp = np.ones((im.shape[0], im.shape[1], 4), dtype=np.uint8) * 255 + for i in range(3): + tmp[:, :, i + 1] = im[:, :] + else: # pragma: no cover + raise ValueError("Invalid shape to be an image.") + + # we changed the image to uint8 4 channels. + # now compress! + self._data = zlib.compress(tmp.tobytes(), zlib.DEFLATED) + self.imshape = im.shape + + def process_tag(self): + # build tag + bb = bytes() + bb += int2uint16(self.id) # CharacterID + bb += int2uint8(5) # BitmapFormat + bb += int2uint16(self.imshape[1]) # BitmapWidth + bb += int2uint16(self.imshape[0]) # BitmapHeight + bb += self._data # ZlibBitmapData + + self.bytes = bb + + +class PlaceObjectTag(ControlTag): + def __init__(self, depth, idToPlace=None, xy=(0, 0), move=False): + ControlTag.__init__(self) + self.tagtype = 26 + self.depth = depth + self.idToPlace = idToPlace + self.xy = xy + self.move = move + + def process_tag(self): + # retrieve stuff + depth = self.depth + xy = self.xy + id = self.idToPlace + + # build PlaceObject2 + bb = bytes() + if self.move: + bb += "\x07".encode("ascii") + else: + # (8 bit flags): 4:matrix, 2:character, 1:move + bb += "\x06".encode("ascii") + bb += int2uint16(depth) # Depth + bb += int2uint16(id) # character id + bb += self.make_matrix_record(trans_xy=xy).tobytes() # MATRIX record + self.bytes = bb + + +class ShapeTag(DefinitionTag): + def __init__(self, bitmapId, xy, wh): + DefinitionTag.__init__(self) + self.tagtype = 2 + self.bitmapId = bitmapId + self.xy = xy + self.wh = wh + + def process_tag(self): + """Returns a defineshape tag. with a bitmap fill""" + + bb = bytes() + bb += int2uint16(self.id) + xy, wh = self.xy, self.wh + tmp = self.make_rect_record(xy[0], wh[0], xy[1], wh[1]) # ShapeBounds + bb += tmp.tobytes() + + # make SHAPEWITHSTYLE structure + + # first entry: FILLSTYLEARRAY with in it a single fill style + bb += int2uint8(1) # FillStyleCount + bb += "\x41".encode("ascii") # FillStyleType (0x41 or 0x43 unsmoothed) + bb += int2uint16(self.bitmapId) # BitmapId + # bb += '\x00' # BitmapMatrix (empty matrix with leftover bits filled) + bb += self.make_matrix_record(scale_xy=(20, 20)).tobytes() + + # # first entry: FILLSTYLEARRAY with in it a single fill style + # bb += int2uint8(1) # FillStyleCount + # bb += '\x00' # solid fill + # bb += '\x00\x00\xff' # color + + # second entry: LINESTYLEARRAY with a single line style + bb += int2uint8(0) # LineStyleCount + # bb += int2uint16(0*20) # Width + # bb += '\x00\xff\x00' # Color + + # third and fourth entry: NumFillBits and NumLineBits (4 bits each) + # I each give them four bits, so 16 styles possible. + bb += "\x44".encode("ascii") + + self.bytes = bb + + # last entries: SHAPERECORDs ... (individual shape records not aligned) + # STYLECHANGERECORD + bits = BitArray() + bits += self.make_style_change_record(0, 1, moveTo=(self.wh[0], self.wh[1])) + # STRAIGHTEDGERECORD 4x + bits += self.make_straight_edge_record(-self.wh[0], 0) + bits += self.make_straight_edge_record(0, -self.wh[1]) + bits += self.make_straight_edge_record(self.wh[0], 0) + bits += self.make_straight_edge_record(0, self.wh[1]) + + # ENDSHAPRECORD + bits += self.make_end_shape_record() + + self.bytes += bits.tobytes() + + # done + # self.bytes = bb + + def make_style_change_record(self, lineStyle=None, fillStyle=None, moveTo=None): + # first 6 flags + # Note that we use FillStyle1. If we don't flash (at least 8) does not + # recognize the frames properly when importing to library. + + bits = BitArray() + bits += "0" # TypeFlag (not an edge record) + bits += "0" # StateNewStyles (only for DefineShape2 and Defineshape3) + if lineStyle: + bits += "1" # StateLineStyle + else: + bits += "0" + if fillStyle: + bits += "1" # StateFillStyle1 + else: + bits += "0" + bits += "0" # StateFillStyle0 + if moveTo: + bits += "1" # StateMoveTo + else: + bits += "0" + + # give information + # todo: nbits for fillStyle and lineStyle is hard coded. + + if moveTo: + bits += twits2bits([moveTo[0], moveTo[1]]) + if fillStyle: + bits += int2bits(fillStyle, 4) + if lineStyle: + bits += int2bits(lineStyle, 4) + + return bits + + def make_straight_edge_record(self, *dxdy): + if len(dxdy) == 1: + dxdy = dxdy[0] + + # determine required number of bits + xbits = signedint2bits(dxdy[0] * 20) + ybits = signedint2bits(dxdy[1] * 20) + nbits = max([len(xbits), len(ybits)]) + + bits = BitArray() + bits += "11" # TypeFlag and StraightFlag + bits += int2bits(nbits - 2, 4) + bits += "1" # GeneralLineFlag + bits += signedint2bits(dxdy[0] * 20, nbits) + bits += signedint2bits(dxdy[1] * 20, nbits) + + # note: I do not make use of vertical/horizontal only lines... + + return bits + + def make_end_shape_record(self): + bits = BitArray() + bits += "0" # TypeFlag: no edge + bits += "0" * 5 # EndOfShape + return bits + + +def read_pixels(bb, i, tagType, L1): + """With pf's seed after the recordheader, reads the pixeldata.""" + + # Get info + charId = bb[i : i + 2] # noqa + i += 2 + format = ord(bb[i : i + 1]) + i += 1 + width = bits2int(bb[i : i + 2], 16) + i += 2 + height = bits2int(bb[i : i + 2], 16) + i += 2 + + # If we can, get pixeldata and make numpy array + if format != 5: + logger.warning("Can only read 24bit or 32bit RGB(A) lossless images.") + else: + # Read byte data + offset = 2 + 1 + 2 + 2 # all the info bits + bb2 = bb[i : i + (L1 - offset)] + + # Decompress and make numpy array + data = zlib.decompress(bb2) + a = np.frombuffer(data, dtype=np.uint8) + + # Set shape + if tagType == 20: + # DefineBitsLossless - RGB data + try: + a.shape = height, width, 3 + except Exception: + # Byte align stuff might cause troubles + logger.warning("Cannot read image due to byte alignment") + if tagType == 36: + # DefineBitsLossless2 - ARGB data + a.shape = height, width, 4 + # Swap alpha channel to make RGBA + b = a + a = np.zeros_like(a) + a[:, :, 0] = b[:, :, 1] + a[:, :, 1] = b[:, :, 2] + a[:, :, 2] = b[:, :, 3] + a[:, :, 3] = b[:, :, 0] + + return a + + +# Last few functions + + +# These are the original public functions, we don't use them, but we +# keep it so that in principle this module can be used stand-alone. + + +def checkImages(images): # pragma: no cover + """checkImages(images) + Check numpy images and correct intensity range etc. + The same for all movie formats. + """ + # Init results + images2 = [] + + for im in images: + if isinstance(im, np.ndarray): + # Check and convert dtype + if im.dtype == np.uint8: + images2.append(im) # Ok + elif im.dtype in [np.float32, np.float64]: + theMax = im.max() + if 128 < theMax < 300: + pass # assume 0:255 + else: + im = im.copy() + im[im < 0] = 0 + im[im > 1] = 1 + im *= 255 + images2.append(im.astype(np.uint8)) + else: + im = im.astype(np.uint8) + images2.append(im) + # Check size + if im.ndim == 2: + pass # ok + elif im.ndim == 3: + if im.shape[2] not in [3, 4]: + raise ValueError("This array can not represent an image.") + else: + raise ValueError("This array can not represent an image.") + else: + raise ValueError("Invalid image type: " + str(type(im))) + + # Done + return images2 + + +def build_file( + fp, taglist, nframes=1, framesize=(500, 500), fps=10, version=8 +): # pragma: no cover + """Give the given file (as bytes) a header.""" + + # compose header + bb = bytes() + bb += "F".encode("ascii") # uncompressed + bb += "WS".encode("ascii") # signature bytes + bb += int2uint8(version) # version + bb += "0000".encode("ascii") # FileLength (leave open for now) + bb += Tag().make_rect_record(0, framesize[0], 0, framesize[1]).tobytes() + bb += int2uint8(0) + int2uint8(fps) # FrameRate + bb += int2uint16(nframes) + fp.write(bb) + + # produce all tags + for tag in taglist: + fp.write(tag.get_tag()) + + # finish with end tag + fp.write("\x00\x00".encode("ascii")) + + # set size + sze = fp.tell() + fp.seek(4) + fp.write(int2uint32(sze)) + + +def write_swf(filename, images, duration=0.1, repeat=True): # pragma: no cover + """Write an swf-file from the specified images. If repeat is False, + the movie is finished with a stop action. Duration may also + be a list with durations for each frame (note that the duration + for each frame is always an integer amount of the minimum duration.) + + Images should be a list consisting numpy arrays with values between + 0 and 255 for integer types, and between 0 and 1 for float types. + + """ + + # Check images + images2 = checkImages(images) + + # Init + taglist = [FileAttributesTag(), SetBackgroundTag(0, 0, 0)] + + # Check duration + if hasattr(duration, "__len__"): + if len(duration) == len(images2): + duration = [d for d in duration] + else: + raise ValueError("len(duration) doesn't match amount of images.") + else: + duration = [duration for im in images2] + + # Build delays list + minDuration = float(min(duration)) + delays = [round(d / minDuration) for d in duration] + delays = [max(1, int(d)) for d in delays] + + # Get FPS + fps = 1.0 / minDuration + + # Produce series of tags for each image + # t0 = time.time() + nframes = 0 + for im in images2: + bm = BitmapTag(im) + wh = (im.shape[1], im.shape[0]) + sh = ShapeTag(bm.id, (0, 0), wh) + po = PlaceObjectTag(1, sh.id, move=nframes > 0) + taglist.extend([bm, sh, po]) + for i in range(delays[nframes]): + taglist.append(ShowFrameTag()) + nframes += 1 + + if not repeat: + taglist.append(DoActionTag("stop")) + + # Build file + # t1 = time.time() + fp = open(filename, "wb") + try: + build_file(fp, taglist, nframes=nframes, framesize=wh, fps=fps) + except Exception: + raise + finally: + fp.close() + # t2 = time.time() + + # logger.warning("Writing SWF took %1.2f and %1.2f seconds" % (t1-t0, t2-t1) ) + + +def read_swf(filename): # pragma: no cover + """Read all images from an SWF (shockwave flash) file. Returns a list + of numpy arrays. + + Limitation: only read the PNG encoded images (not the JPG encoded ones). + """ + + # Check whether it exists + if not os.path.isfile(filename): + raise IOError("File not found: " + str(filename)) + + # Init images + images = [] + + # Open file and read all + fp = open(filename, "rb") + bb = fp.read() + + try: + # Check opening tag + tmp = bb[0:3].decode("ascii", "ignore") + if tmp.upper() == "FWS": + pass # ok + elif tmp.upper() == "CWS": + # Decompress movie + bb = bb[:8] + zlib.decompress(bb[8:]) + else: + raise IOError("Not a valid SWF file: " + str(filename)) + + # Set filepointer at first tag (skipping framesize RECT and two uin16's + i = 8 + nbits = bits2int(bb[i : i + 1], 5) # skip FrameSize + nbits = 5 + nbits * 4 + Lrect = nbits / 8.0 + if Lrect % 1: + Lrect += 1 + Lrect = int(Lrect) + i += Lrect + 4 + + # Iterate over the tags + counter = 0 + while True: + counter += 1 + + # Get tag header + head = bb[i : i + 6] + if not head: + break # Done (we missed end tag) + + # Determine type and length + T, L1, L2 = get_type_and_len(head) + if not L2: + logger.warning("Invalid tag length, could not proceed") + break + # logger.warning(T, L2) + + # Read image if we can + if T in [20, 36]: + im = read_pixels(bb, i + 6, T, L1) + if im is not None: + images.append(im) + elif T in [6, 21, 35, 90]: + logger.warning("Ignoring JPEG image: cannot read JPEG.") + else: + pass # Not an image tag + + # Detect end tag + if T == 0: + break + + # Next tag! + i += L2 + + finally: + fp.close() + + # Done + return images + + +# Backward compatibility; same public names as when this was images2swf. +writeSwf = write_swf +readSwf = read_swf diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/_tifffile.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/_tifffile.py new file mode 100644 index 0000000000000000000000000000000000000000..1f4a9af6dd8871e42bb7424abf4ea145d816c909 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/_tifffile.py @@ -0,0 +1,10675 @@ +#! /usr/bin/env python3 +# -*- coding: utf-8 -*- +# tifffile.py + +# Copyright (c) 2008-2018, Christoph Gohlke +# Copyright (c) 2008-2018, The Regents of the University of California +# Produced at the Laboratory for Fluorescence Dynamics +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the copyright holders nor the names of any +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +"""Read image and meta data from (bio) TIFF(R) files. Save numpy arrays as TIFF. + +Image and metadata can be read from TIFF, BigTIFF, OME-TIFF, STK, LSM, NIH, +SGI, ImageJ, MicroManager, FluoView, ScanImage, SEQ, GEL, and GeoTIFF files. + +Tifffile is not a general-purpose TIFF library. +Only a subset of the TIFF specification is supported, mainly uncompressed and +losslessly compressed 1, 8, 16, 32 and 64 bit integer, 16, 32 and 64-bit float, +grayscale and RGB(A) images, which are commonly used in scientific imaging. +Specifically, reading slices of image data, image trees defined via SubIFDs, +CCITT and OJPEG compression, chroma subsampling without JPEG compression, +or IPTC and XMP metadata are not implemented. + +TIFF(R), the tagged Image File Format, is a trademark and under control of +Adobe Systems Incorporated. BigTIFF allows for files greater than 4 GB. +STK, LSM, FluoView, SGI, SEQ, GEL, and OME-TIFF, are custom extensions +defined by Molecular Devices (Universal Imaging Corporation), Carl Zeiss +MicroImaging, Olympus, Silicon Graphics International, Media Cybernetics, +Molecular Dynamics, and the Open Microscopy Environment consortium +respectively. + +For command line usage run C{python -m tifffile --help} + +:Author: + `Christoph Gohlke `_ + +:Organization: + Laboratory for Fluorescence Dynamics, University of California, Irvine + +:Version: 2018.06.15 + +Requirements +------------ +* `CPython 3.6 64-bit `_ +* `Numpy 1.14 `_ +* `Matplotlib 2.2 `_ (optional for plotting) +* `Tifffile.c 2018.02.10 `_ + (recommended for faster decoding of PackBits and LZW encoded strings) +* `Tifffile_geodb.py 2018.02.10 `_ + (optional enums for GeoTIFF metadata) +* Python 2 requires 'futures', 'enum34', 'pathlib'. + +Revisions +--------- +2018.06.15 + Pass 2680 tests. + Towards reading JPEG and other compressions via imagecodecs package (WIP). + Add function to validate TIFF using 'jhove -m TIFF-hul'. + Save bool arrays as bilevel TIFF. + Accept pathlib.Path as filenames. + Move 'software' argument from TiffWriter __init__ to save. + Raise DOS limit to 16 TB. + Lazy load lzma and zstd compressors and decompressors. + Add option to save IJMetadata tags. + Return correct number of pages for truncated series (bug fix). + Move EXIF tags to TIFF.TAG as per TIFF/EP standard. +2018.02.18 + Pass 2293 tests. + Always save RowsPerStrip and Resolution tags as required by TIFF standard. + Do not use badly typed ImageDescription. + Coherce bad ASCII string tags to bytes. + Tuning of __str__ functions. + Fix reading 'undefined' tag values (bug fix). + Read and write ZSTD compressed data. + Use hexdump to print byte strings. + Determine TIFF byte order from data dtype in imsave. + Add option to specify RowsPerStrip for compressed strips. + Allow memory map of arrays with non-native byte order. + Attempt to handle ScanImage <= 5.1 files. + Restore TiffPageSeries.pages sequence interface. + Use numpy.frombuffer instead of fromstring to read from binary data. + Parse GeoTIFF metadata. + Add option to apply horizontal differencing before compression. + Towards reading PerkinElmer QPTIFF (no test files). + Do not index out of bounds data in tifffile.c unpackbits and decodelzw. +2017.09.29 (tentative) + Many backwards incompatible changes improving speed and resource usage: + Pass 2268 tests. + Add detail argument to __str__ function. Remove info functions. + Fix potential issue correcting offsets of large LSM files with positions. + Remove TiffFile sequence interface; use TiffFile.pages instead. + Do not make tag values available as TiffPage attributes. + Use str (not bytes) type for tag and metadata strings (WIP). + Use documented standard tag and value names (WIP). + Use enums for some documented TIFF tag values. + Remove 'memmap' and 'tmpfile' options; use out='memmap' instead. + Add option to specify output in asarray functions. + Add option to concurrently decode image strips or tiles using threads. + Add TiffPage.asrgb function (WIP). + Do not apply colormap in asarray. + Remove 'colormapped', 'rgbonly', and 'scale_mdgel' options from asarray. + Consolidate metadata in TiffFile _metadata functions. + Remove non-tag metadata properties from TiffPage. + Add function to convert LSM to tiled BIN files. + Align image data in file. + Make TiffPage.dtype a numpy.dtype. + Add 'ndim' and 'size' properties to TiffPage and TiffPageSeries. + Allow imsave to write non-BigTIFF files up to ~4 GB. + Only read one page for shaped series if possible. + Add memmap function to create memory-mapped array stored in TIFF file. + Add option to save empty arrays to TIFF files. + Add option to save truncated TIFF files. + Allow single tile images to be saved contiguously. + Add optional movie mode for files with uniform pages. + Lazy load pages. + Use lightweight TiffFrame for IFDs sharing properties with key TiffPage. + Move module constants to 'TIFF' namespace (speed up module import). + Remove 'fastij' option from TiffFile. + Remove 'pages' parameter from TiffFile. + Remove TIFFfile alias. + Deprecate Python 2. + Require enum34 and futures packages on Python 2.7. + Remove Record class and return all metadata as dict instead. + Add functions to parse STK, MetaSeries, ScanImage, SVS, Pilatus metadata. + Read tags from EXIF and GPS IFDs. + Use pformat for tag and metadata values. + Fix reading some UIC tags (bug fix). + Do not modify input array in imshow (bug fix). + Fix Python implementation of unpack_ints. +2017.05.23 + Pass 1961 tests. + Write correct number of SampleFormat values (bug fix). + Use Adobe deflate code to write ZIP compressed files. + Add option to pass tag values as packed binary data for writing. + Defer tag validation to attribute access. + Use property instead of lazyattr decorator for simple expressions. +2017.03.17 + Write IFDs and tag values on word boundaries. + Read ScanImage metadata. + Remove is_rgb and is_indexed attributes from TiffFile. + Create files used by doctests. +2017.01.12 + Read Zeiss SEM metadata. + Read OME-TIFF with invalid references to external files. + Rewrite C LZW decoder (5x faster). + Read corrupted LSM files missing EOI code in LZW stream. +2017.01.01 + Add option to append images to existing TIFF files. + Read files without pages. + Read S-FEG and Helios NanoLab tags created by FEI software. + Allow saving Color Filter Array (CFA) images. + Add info functions returning more information about TiffFile and TiffPage. + Add option to read specific pages only. + Remove maxpages argument (backwards incompatible). + Remove test_tifffile function. +2016.10.28 + Pass 1944 tests. + Improve detection of ImageJ hyperstacks. + Read TVIPS metadata created by EM-MENU (by Marco Oster). + Add option to disable using OME-XML metadata. + Allow non-integer range attributes in modulo tags (by Stuart Berg). +2016.06.21 + Do not always memmap contiguous data in page series. +2016.05.13 + Add option to specify resolution unit. + Write grayscale images with extra samples when planarconfig is specified. + Do not write RGB color images with 2 samples. + Reorder TiffWriter.save keyword arguments (backwards incompatible). +2016.04.18 + Pass 1932 tests. + TiffWriter, imread, and imsave accept open binary file streams. +2016.04.13 + Correctly handle reversed fill order in 2 and 4 bps images (bug fix). + Implement reverse_bitorder in C. +2016.03.18 + Fix saving additional ImageJ metadata. +2016.02.22 + Pass 1920 tests. + Write 8 bytes double tag values using offset if necessary (bug fix). + Add option to disable writing second image description tag. + Detect tags with incorrect counts. + Disable color mapping for LSM. +2015.11.13 + Read LSM 6 mosaics. + Add option to specify directory of memory-mapped files. + Add command line options to specify vmin and vmax values for colormapping. +2015.10.06 + New helper function to apply colormaps. + Renamed is_palette attributes to is_indexed (backwards incompatible). + Color-mapped samples are now contiguous (backwards incompatible). + Do not color-map ImageJ hyperstacks (backwards incompatible). + Towards reading Leica SCN. +2015.09.25 + Read images with reversed bit order (FillOrder is LSB2MSB). +2015.09.21 + Read RGB OME-TIFF. + Warn about malformed OME-XML. +2015.09.16 + Detect some corrupted ImageJ metadata. + Better axes labels for 'shaped' files. + Do not create TiffTag for default values. + Chroma subsampling is not supported. + Memory-map data in TiffPageSeries if possible (optional). +2015.08.17 + Pass 1906 tests. + Write ImageJ hyperstacks (optional). + Read and write LZMA compressed data. + Specify datetime when saving (optional). + Save tiled and color-mapped images (optional). + Ignore void bytecounts and offsets if possible. + Ignore bogus image_depth tag created by ISS Vista software. + Decode floating point horizontal differencing (not tiled). + Save image data contiguously if possible. + Only read first IFD from ImageJ files if possible. + Read ImageJ 'raw' format (files larger than 4 GB). + TiffPageSeries class for pages with compatible shape and data type. + Try to read incomplete tiles. + Open file dialog if no filename is passed on command line. + Ignore errors when decoding OME-XML. + Rename decoder functions (backwards incompatible). +2014.08.24 + TiffWriter class for incremental writing images. + Simplify examples. +2014.08.19 + Add memmap function to FileHandle. + Add function to determine if image data in TiffPage is memory-mappable. + Do not close files if multifile_close parameter is False. +2014.08.10 + Pass 1730 tests. + Return all extrasamples by default (backwards incompatible). + Read data from series of pages into memory-mapped array (optional). + Squeeze OME dimensions (backwards incompatible). + Workaround missing EOI code in strips. + Support image and tile depth tags (SGI extension). + Better handling of STK/UIC tags (backwards incompatible). + Disable color mapping for STK. + Julian to datetime converter. + TIFF ASCII type may be NULL separated. + Unwrap strip offsets for LSM files greater than 4 GB. + Correct strip byte counts in compressed LSM files. + Skip missing files in OME series. + Read embedded TIFF files. +2014.02.05 + Save rational numbers as type 5 (bug fix). +2013.12.20 + Keep other files in OME multi-file series closed. + FileHandle class to abstract binary file handle. + Disable color mapping for bad OME-TIFF produced by bio-formats. + Read bad OME-XML produced by ImageJ when cropping. +2013.11.03 + Allow zlib compress data in imsave function (optional). + Memory-map contiguous image data (optional). +2013.10.28 + Read MicroManager metadata and little-endian ImageJ tag. + Save extra tags in imsave function. + Save tags in ascending order by code (bug fix). +2012.10.18 + Accept file like objects (read from OIB files). +2012.08.21 + Rename TIFFfile to TiffFile and TIFFpage to TiffPage. + TiffSequence class for reading sequence of TIFF files. + Read UltraQuant tags. + Allow float numbers as resolution in imsave function. +2012.08.03 + Read MD GEL tags and NIH Image header. +2012.07.25 + Read ImageJ tags. + ... + +Notes +----- +The API is not stable yet and might change between revisions. + +Tested on little-endian platforms only. + +Other Python packages and modules for reading (bio) scientific TIFF files: + +* `python-bioformats `_ +* `Imread `_ +* `PyLibTiff `_ +* `ITK `_ +* `PyLSM `_ +* `PyMca.TiffIO.py `_ (same as fabio.TiffIO) +* `BioImageXD.Readers `_ +* `Cellcognition.io `_ +* `pymimage `_ +* `pytiff `_ + +Acknowledgements +---------------- +* Egor Zindy, University of Manchester, for lsm_scan_info specifics. +* Wim Lewis for a bug fix and some LSM functions. +* Hadrien Mary for help on reading MicroManager files. +* Christian Kliche for help writing tiled and color-mapped files. + +References +---------- +1) TIFF 6.0 Specification and Supplements. Adobe Systems Incorporated. + http://partners.adobe.com/public/developer/tiff/ +2) TIFF File Format FAQ. http://www.awaresystems.be/imaging/tiff/faq.html +3) MetaMorph Stack (STK) Image File Format. + http://support.meta.moleculardevices.com/docs/t10243.pdf +4) Image File Format Description LSM 5/7 Release 6.0 (ZEN 2010). + Carl Zeiss MicroImaging GmbH. BioSciences. May 10, 2011 +5) The OME-TIFF format. + http://www.openmicroscopy.org/site/support/file-formats/ome-tiff +6) UltraQuant(r) Version 6.0 for Windows Start-Up Guide. + http://www.ultralum.com/images%20ultralum/pdf/UQStart%20Up%20Guide.pdf +7) Micro-Manager File Formats. + http://www.micro-manager.org/wiki/Micro-Manager_File_Formats +8) Tags for TIFF and Related Specifications. Digital Preservation. + http://www.digitalpreservation.gov/formats/content/tiff_tags.shtml +9) ScanImage BigTiff Specification - ScanImage 2016. + http://scanimage.vidriotechnologies.com/display/SI2016/ + ScanImage+BigTiff+Specification +10) CIPA DC-008-2016: Exchangeable image file format for digital still cameras: + Exif Version 2.31. + http://www.cipa.jp/std/documents/e/DC-008-Translation-2016-E.pdf + +Examples +-------- +>>> # write numpy array to TIFF file +>>> data = numpy.random.rand(4, 301, 219) +>>> imsave('temp.tif', data, photometric='minisblack') + +>>> # read numpy array from TIFF file +>>> image = imread('temp.tif') +>>> numpy.testing.assert_array_equal(image, data) + +>>> # iterate over pages and tags in TIFF file +>>> with TiffFile('temp.tif') as tif: +... images = tif.asarray() +... for page in tif.pages: +... for tag in page.tags.values(): +... _ = tag.name, tag.value +... image = page.asarray() + +""" + +from __future__ import division, print_function + +import sys +import os +import io +import re +import glob +import math +import zlib +import time +import json +import enum +import struct +import pathlib +import warnings +import binascii +import tempfile +import datetime +import threading +import collections +import multiprocessing +import concurrent.futures + +import numpy + +# delay imports: mmap, pprint, fractions, xml, tkinter, matplotlib, lzma, zstd, +# subprocess + +__version__ = "2018.06.15" +__docformat__ = "restructuredtext en" +__all__ = ( + "imsave", + "imread", + "imshow", + "memmap", + "TiffFile", + "TiffWriter", + "TiffSequence", + # utility functions used by oiffile or czifile + "FileHandle", + "lazyattr", + "natural_sorted", + "decode_lzw", + "stripnull", + "create_output", + "repeat_nd", + "format_size", + "product", + "xml2dict", +) + + +def imread(files, **kwargs): + """Return image data from TIFF file(s) as numpy array. + + Refer to the TiffFile class and member functions for documentation. + + Parameters + ---------- + files : str, binary stream, or sequence + File name, seekable binary stream, glob pattern, or sequence of + file names. + kwargs : dict + Parameters 'multifile' and 'is_ome' are passed to the TiffFile class. + The 'pattern' parameter is passed to the TiffSequence class. + Other parameters are passed to the asarray functions. + The first image series is returned if no arguments are provided. + + Examples + -------- + >>> # get image from first page + >>> imsave('temp.tif', numpy.random.rand(3, 4, 301, 219)) + >>> im = imread('temp.tif', key=0) + >>> im.shape + (4, 301, 219) + + >>> # get images from sequence of files + >>> ims = imread(['temp.tif', 'temp.tif']) + >>> ims.shape + (2, 3, 4, 301, 219) + + """ + kwargs_file = parse_kwargs(kwargs, "multifile", "is_ome") + kwargs_seq = parse_kwargs(kwargs, "pattern") + + if isinstance(files, basestring) and any(i in files for i in "?*"): + files = glob.glob(files) + if not files: + raise ValueError("no files found") + if not hasattr(files, "seek") and len(files) == 1: + files = files[0] + + if isinstance(files, basestring) or hasattr(files, "seek"): + with TiffFile(files, **kwargs_file) as tif: + return tif.asarray(**kwargs) + else: + with TiffSequence(files, **kwargs_seq) as imseq: + return imseq.asarray(**kwargs) + + +def imsave(file, data=None, shape=None, dtype=None, bigsize=2**32 - 2**25, **kwargs): + """Write numpy array to TIFF file. + + Refer to the TiffWriter class and member functions for documentation. + + Parameters + ---------- + file : str or binary stream + File name or writable binary stream, such as an open file or BytesIO. + data : array_like + Input image. The last dimensions are assumed to be image depth, + height, width, and samples. + If None, an empty array of the specified shape and dtype is + saved to file. + Unless 'byteorder' is specified in 'kwargs', the TIFF file byte order + is determined from the data's dtype or the dtype argument. + shape : tuple + If 'data' is None, shape of an empty array to save to the file. + dtype : numpy.dtype + If 'data' is None, data-type of an empty array to save to the file. + bigsize : int + Create a BigTIFF file if the size of data in bytes is larger than + this threshold and 'imagej' or 'truncate' are not enabled. + By default, the threshold is 4 GB minus 32 MB reserved for metadata. + Use the 'bigtiff' parameter to explicitly specify the type of + file created. + kwargs : dict + Parameters 'append', 'byteorder', 'bigtiff', and 'imagej', are passed + to TiffWriter(). Other parameters are passed to TiffWriter.save(). + + Returns + ------- + If the image data are written contiguously, return offset and bytecount + of image data in the file. + + Examples + -------- + >>> # save a RGB image + >>> data = numpy.random.randint(0, 255, (256, 256, 3), 'uint8') + >>> imsave('temp.tif', data, photometric='rgb') + + >>> # save a random array and metadata, using compression + >>> data = numpy.random.rand(2, 5, 3, 301, 219) + >>> imsave('temp.tif', data, compress=6, metadata={'axes': 'TZCYX'}) + + """ + tifargs = parse_kwargs(kwargs, "append", "bigtiff", "byteorder", "imagej") + if data is None: + size = product(shape) * numpy.dtype(dtype).itemsize + byteorder = numpy.dtype(dtype).byteorder + else: + try: + size = data.nbytes + byteorder = data.dtype.byteorder + except Exception: + size = 0 + byteorder = None + if ( + size > bigsize + and "bigtiff" not in tifargs + and not (tifargs.get("imagej", False) or tifargs.get("truncate", False)) + ): + tifargs["bigtiff"] = True + if "byteorder" not in tifargs: + tifargs["byteorder"] = byteorder + + with TiffWriter(file, **tifargs) as tif: + return tif.save(data, shape, dtype, **kwargs) + + +def memmap(filename, shape=None, dtype=None, page=None, series=0, mode="r+", **kwargs): + """Return memory-mapped numpy array stored in TIFF file. + + Memory-mapping requires data stored in native byte order, without tiling, + compression, predictors, etc. + If 'shape' and 'dtype' are provided, existing files will be overwritten or + appended to depending on the 'append' parameter. + Otherwise the image data of a specified page or series in an existing + file will be memory-mapped. By default, the image data of the first page + series is memory-mapped. + Call flush() to write any changes in the array to the file. + Raise ValueError if the image data in the file is not memory-mappable. + + Parameters + ---------- + filename : str + Name of the TIFF file which stores the array. + shape : tuple + Shape of the empty array. + dtype : numpy.dtype + Data-type of the empty array. + page : int + Index of the page which image data to memory-map. + series : int + Index of the page series which image data to memory-map. + mode : {'r+', 'r', 'c'}, optional + The file open mode. Default is to open existing file for reading and + writing ('r+'). + kwargs : dict + Additional parameters passed to imsave() or TiffFile(). + + Examples + -------- + >>> # create an empty TIFF file and write to memory-mapped image + >>> im = memmap('temp.tif', shape=(256, 256), dtype='float32') + >>> im[255, 255] = 1.0 + >>> im.flush() + >>> im.shape, im.dtype + ((256, 256), dtype('float32')) + >>> del im + + >>> # memory-map image data in a TIFF file + >>> im = memmap('temp.tif', page=0) + >>> im[255, 255] + 1.0 + + """ + if shape is not None and dtype is not None: + # create a new, empty array + kwargs.update( + data=None, + shape=shape, + dtype=dtype, + returnoffset=True, + align=TIFF.ALLOCATIONGRANULARITY, + ) + result = imsave(filename, **kwargs) + if result is None: + # TODO: fail before creating file or writing data + raise ValueError("image data are not memory-mappable") + offset = result[0] + else: + # use existing file + with TiffFile(filename, **kwargs) as tif: + if page is not None: + page = tif.pages[page] + if not page.is_memmappable: + raise ValueError("image data are not memory-mappable") + offset, _ = page.is_contiguous + shape = page.shape + dtype = page.dtype + else: + series = tif.series[series] + if series.offset is None: + raise ValueError("image data are not memory-mappable") + shape = series.shape + dtype = series.dtype + offset = series.offset + dtype = tif.byteorder + dtype.char + return numpy.memmap(filename, dtype, mode, offset, shape, "C") + + +class lazyattr(object): + """Attribute whose value is computed on first access.""" + + # TODO: help() doesn't work + __slots__ = ("func",) + + def __init__(self, func): + self.func = func + # self.__name__ = func.__name__ + # self.__doc__ = func.__doc__ + # self.lock = threading.RLock() + + def __get__(self, instance, owner): + # with self.lock: + if instance is None: + return self + try: + value = self.func(instance) + except AttributeError as e: + raise RuntimeError(e) + if value is NotImplemented: + return getattr(super(owner, instance), self.func.__name__) + setattr(instance, self.func.__name__, value) + return value + + +class TiffWriter(object): + """Write numpy arrays to TIFF file. + + TiffWriter instances must be closed using the 'close' method, which is + automatically called when using the 'with' context manager. + + TiffWriter's main purpose is saving nD numpy array's as TIFF, + not to create any possible TIFF format. Specifically, JPEG compression, + SubIFDs, ExifIFD, or GPSIFD tags are not supported. + + Examples + -------- + >>> # successively append images to BigTIFF file + >>> data = numpy.random.rand(2, 5, 3, 301, 219) + >>> with TiffWriter('temp.tif', bigtiff=True) as tif: + ... for i in range(data.shape[0]): + ... tif.save(data[i], compress=6, photometric='minisblack') + + """ + + def __init__(self, file, bigtiff=False, byteorder=None, append=False, imagej=False): + """Open a TIFF file for writing. + + An empty TIFF file is created if the file does not exist, else the + file is overwritten with an empty TIFF file unless 'append' + is true. Use bigtiff=True when creating files larger than 4 GB. + + Parameters + ---------- + file : str, binary stream, or FileHandle + File name or writable binary stream, such as an open file + or BytesIO. + bigtiff : bool + If True, the BigTIFF format is used. + byteorder : {'<', '>', '=', '|'} + The endianness of the data in the file. + By default, this is the system's native byte order. + append : bool + If True and 'file' is an existing standard TIFF file, image data + and tags are appended to the file. + Appending data may corrupt specifically formatted TIFF files + such as LSM, STK, ImageJ, NIH, or FluoView. + imagej : bool + If True, write an ImageJ hyperstack compatible file. + This format can handle data types uint8, uint16, or float32 and + data shapes up to 6 dimensions in TZCYXS order. + RGB images (S=3 or S=4) must be uint8. + ImageJ's default byte order is big-endian but this implementation + uses the system's native byte order by default. + ImageJ does not support BigTIFF format or LZMA compression. + The ImageJ file format is undocumented. + + """ + if append: + # determine if file is an existing TIFF file that can be extended + try: + with FileHandle(file, mode="rb", size=0) as fh: + pos = fh.tell() + try: + with TiffFile(fh) as tif: + if append != "force" and any( + getattr(tif, "is_" + a) + for a in ( + "lsm", + "stk", + "imagej", + "nih", + "fluoview", + "micromanager", + ) + ): + raise ValueError("file contains metadata") + byteorder = tif.byteorder + bigtiff = tif.is_bigtiff + self._ifdoffset = tif.pages.next_page_offset + except Exception as e: + raise ValueError("cannot append to file: %s" % str(e)) + finally: + fh.seek(pos) + except (IOError, FileNotFoundError): + append = False + + if byteorder in (None, "=", "|"): + byteorder = "<" if sys.byteorder == "little" else ">" + elif byteorder not in ("<", ">"): + raise ValueError("invalid byteorder %s" % byteorder) + if imagej and bigtiff: + warnings.warn("writing incompatible BigTIFF ImageJ") + + self._byteorder = byteorder + self._imagej = bool(imagej) + self._truncate = False + self._metadata = None + self._colormap = None + + self._descriptionoffset = 0 + self._descriptionlen = 0 + self._descriptionlenoffset = 0 + self._tags = None + self._shape = None # normalized shape of data in consecutive pages + self._datashape = None # shape of data in consecutive pages + self._datadtype = None # data type + self._dataoffset = None # offset to data + self._databytecounts = None # byte counts per plane + self._tagoffsets = None # strip or tile offset tag code + + if bigtiff: + self._bigtiff = True + self._offsetsize = 8 + self._tagsize = 20 + self._tagnoformat = "Q" + self._offsetformat = "Q" + self._valueformat = "8s" + else: + self._bigtiff = False + self._offsetsize = 4 + self._tagsize = 12 + self._tagnoformat = "H" + self._offsetformat = "I" + self._valueformat = "4s" + + if append: + self._fh = FileHandle(file, mode="r+b", size=0) + self._fh.seek(0, 2) + else: + self._fh = FileHandle(file, mode="wb", size=0) + self._fh.write({"<": b"II", ">": b"MM"}[byteorder]) + if bigtiff: + self._fh.write(struct.pack(byteorder + "HHH", 43, 8, 0)) + else: + self._fh.write(struct.pack(byteorder + "H", 42)) + # first IFD + self._ifdoffset = self._fh.tell() + self._fh.write(struct.pack(byteorder + self._offsetformat, 0)) + + def save( + self, + data=None, + shape=None, + dtype=None, + returnoffset=False, + photometric=None, + planarconfig=None, + tile=None, + contiguous=True, + align=16, + truncate=False, + compress=0, + rowsperstrip=None, + predictor=False, + colormap=None, + description=None, + datetime=None, + resolution=None, + software="tifffile.py", + metadata={}, + ijmetadata=None, + extratags=(), + ): + """Write numpy array and tags to TIFF file. + + The data shape's last dimensions are assumed to be image depth, + height (length), width, and samples. + If a colormap is provided, the data's dtype must be uint8 or uint16 + and the data values are indices into the last dimension of the + colormap. + If 'shape' and 'dtype' are specified, an empty array is saved. + This option cannot be used with compression or multiple tiles. + Image data are written uncompressed in one strip per plane by default. + Dimensions larger than 2 to 4 (depending on photometric mode, planar + configuration, and SGI mode) are flattened and saved as separate pages. + The SampleFormat and BitsPerSample tags are derived from the data type. + + Parameters + ---------- + data : numpy.ndarray or None + Input image array. + shape : tuple or None + Shape of the empty array to save. Used only if 'data' is None. + dtype : numpy.dtype or None + Data-type of the empty array to save. Used only if 'data' is None. + returnoffset : bool + If True and the image data in the file is memory-mappable, return + the offset and number of bytes of the image data in the file. + photometric : {'MINISBLACK', 'MINISWHITE', 'RGB', 'PALETTE', 'CFA'} + The color space of the image data. + By default, this setting is inferred from the data shape and the + value of colormap. + For CFA images, DNG tags must be specified in 'extratags'. + planarconfig : {'CONTIG', 'SEPARATE'} + Specifies if samples are stored contiguous or in separate planes. + By default, this setting is inferred from the data shape. + If this parameter is set, extra samples are used to store grayscale + images. + 'CONTIG': last dimension contains samples. + 'SEPARATE': third last dimension contains samples. + tile : tuple of int + The shape (depth, length, width) of image tiles to write. + If None (default), image data are written in strips. + The tile length and width must be a multiple of 16. + If the tile depth is provided, the SGI ImageDepth and TileDepth + tags are used to save volume data. + Unless a single tile is used, tiles cannot be used to write + contiguous files. + Few software can read the SGI format, e.g. MeVisLab. + contiguous : bool + If True (default) and the data and parameters are compatible with + previous ones, if any, the image data are stored contiguously after + the previous one. Parameters 'photometric' and 'planarconfig' + are ignored. Parameters 'description', datetime', and 'extratags' + are written to the first page of a contiguous series only. + align : int + Byte boundary on which to align the image data in the file. + Default 16. Use mmap.ALLOCATIONGRANULARITY for memory-mapped data. + Following contiguous writes are not aligned. + truncate : bool + If True, only write the first page including shape metadata if + possible (uncompressed, contiguous, not tiled). + Other TIFF readers will only be able to read part of the data. + compress : int or 'LZMA', 'ZSTD' + Values from 0 to 9 controlling the level of zlib compression. + If 0 (default), data are written uncompressed. + Compression cannot be used to write contiguous files. + If 'LZMA' or 'ZSTD', LZMA or ZSTD compression is used, which is + not available on all platforms. + rowsperstrip : int + The number of rows per strip used for compression. + Uncompressed data are written in one strip per plane. + predictor : bool + If True, apply horizontal differencing to integer type images + before compression. + colormap : numpy.ndarray + RGB color values for the corresponding data value. + Must be of shape (3, 2**(data.itemsize*8)) and dtype uint16. + description : str + The subject of the image. Must be 7-bit ASCII. Cannot be used with + the ImageJ format. Saved with the first page only. + datetime : datetime + Date and time of image creation in '%Y:%m:%d %H:%M:%S' format. + If None (default), the current date and time is used. + Saved with the first page only. + resolution : (float, float[, str]) or ((int, int), (int, int)[, str]) + X and Y resolutions in pixels per resolution unit as float or + rational numbers. A third, optional parameter specifies the + resolution unit, which must be None (default for ImageJ), + 'INCH' (default), or 'CENTIMETER'. + software : str + Name of the software used to create the file. Must be 7-bit ASCII. + Saved with the first page only. + metadata : dict + Additional meta data to be saved along with shape information + in JSON or ImageJ formats in an ImageDescription tag. + If None, do not write a second ImageDescription tag. + Strings must be 7-bit ASCII. Saved with the first page only. + ijmetadata : dict + Additional meta data to be saved in application specific + IJMetadata and IJMetadataByteCounts tags. Refer to the + imagej_metadata_tags function for valid keys and values. + Saved with the first page only. + extratags : sequence of tuples + Additional tags as [(code, dtype, count, value, writeonce)]. + + code : int + The TIFF tag Id. + dtype : str + Data type of items in 'value' in Python struct format. + One of B, s, H, I, 2I, b, h, i, 2i, f, d, Q, or q. + count : int + Number of data values. Not used for string or byte string + values. + value : sequence + 'Count' values compatible with 'dtype'. + Byte strings must contain count values of dtype packed as + binary data. + writeonce : bool + If True, the tag is written to the first page only. + + """ + # TODO: refactor this function + fh = self._fh + byteorder = self._byteorder + + if data is None: + if compress: + raise ValueError("cannot save compressed empty file") + datashape = shape + datadtype = numpy.dtype(dtype).newbyteorder(byteorder) + datadtypechar = datadtype.char + else: + data = numpy.asarray(data, byteorder + data.dtype.char, "C") + if data.size == 0: + raise ValueError("cannot save empty array") + datashape = data.shape + datadtype = data.dtype + datadtypechar = data.dtype.char + + returnoffset = returnoffset and datadtype.isnative + bilevel = datadtypechar == "?" + if bilevel: + index = -1 if datashape[-1] > 1 else -2 + datasize = product(datashape[:index]) + if datashape[index] % 8: + datasize *= datashape[index] // 8 + 1 + else: + datasize *= datashape[index] // 8 + else: + datasize = product(datashape) * datadtype.itemsize + + # just append contiguous data if possible + self._truncate = bool(truncate) + if self._datashape: + if ( + not contiguous + or self._datashape[1:] != datashape + or self._datadtype != datadtype + or (compress and self._tags) + or tile + or not numpy.array_equal(colormap, self._colormap) + ): + # incompatible shape, dtype, compression mode, or colormap + self._write_remaining_pages() + self._write_image_description() + self._truncate = False + self._descriptionoffset = 0 + self._descriptionlenoffset = 0 + self._datashape = None + self._colormap = None + if self._imagej: + raise ValueError("ImageJ does not support non-contiguous data") + else: + # consecutive mode + self._datashape = (self._datashape[0] + 1,) + datashape + if not compress: + # write contiguous data, write IFDs/tags later + offset = fh.tell() + if data is None: + fh.write_empty(datasize) + else: + fh.write_array(data) + if returnoffset: + return offset, datasize + return + + input_shape = datashape + tagnoformat = self._tagnoformat + valueformat = self._valueformat + offsetformat = self._offsetformat + offsetsize = self._offsetsize + tagsize = self._tagsize + + MINISBLACK = TIFF.PHOTOMETRIC.MINISBLACK + RGB = TIFF.PHOTOMETRIC.RGB + CFA = TIFF.PHOTOMETRIC.CFA + PALETTE = TIFF.PHOTOMETRIC.PALETTE + CONTIG = TIFF.PLANARCONFIG.CONTIG + SEPARATE = TIFF.PLANARCONFIG.SEPARATE + + # parse input + if photometric is not None: + photometric = enumarg(TIFF.PHOTOMETRIC, photometric) + if planarconfig: + planarconfig = enumarg(TIFF.PLANARCONFIG, planarconfig) + if not compress: + compress = False + compresstag = 1 + predictor = False + else: + if isinstance(compress, (tuple, list)): + compress, compresslevel = compress + elif isinstance(compress, int): + compress, compresslevel = "ADOBE_DEFLATE", int(compress) + if not 0 <= compresslevel <= 9: + raise ValueError("invalid compression level %s" % compress) + else: + compresslevel = None + compress = compress.upper() + compresstag = enumarg(TIFF.COMPRESSION, compress) + + # prepare ImageJ format + if self._imagej: + if compress in ("LZMA", "ZSTD"): + raise ValueError("ImageJ cannot handle LZMA or ZSTD compression") + if description: + warnings.warn("not writing description to ImageJ file") + description = None + volume = False + if datadtypechar not in "BHhf": + raise ValueError("ImageJ does not support data type %s" % datadtypechar) + ijrgb = photometric == RGB if photometric else None + if datadtypechar not in "B": + ijrgb = False + ijshape = imagej_shape(datashape, ijrgb) + if ijshape[-1] in (3, 4): + photometric = RGB + if datadtypechar not in "B": + raise ValueError( + "ImageJ does not support data type %s " + "for RGB" % datadtypechar + ) + elif photometric is None: + photometric = MINISBLACK + planarconfig = None + if planarconfig == SEPARATE: + raise ValueError("ImageJ does not support planar images") + else: + planarconfig = CONTIG if ijrgb else None + + # define compress function + if compress: + if compresslevel is None: + compressor, compresslevel = TIFF.COMPESSORS[compresstag] + else: + compressor, _ = TIFF.COMPESSORS[compresstag] + compresslevel = int(compresslevel) + if predictor: + if datadtype.kind not in "iu": + raise ValueError("prediction not implemented for %s" % datadtype) + + def compress(data, level=compresslevel): + # horizontal differencing + diff = numpy.diff(data, axis=-2) + data = numpy.insert(diff, 0, data[..., 0, :], axis=-2) + return compressor(data, level) + + else: + + def compress(data, level=compresslevel): + return compressor(data, level) + + # verify colormap and indices + if colormap is not None: + if datadtypechar not in "BH": + raise ValueError("invalid data dtype for palette mode") + colormap = numpy.asarray(colormap, dtype=byteorder + "H") + if colormap.shape != (3, 2 ** (datadtype.itemsize * 8)): + raise ValueError("invalid color map shape") + self._colormap = colormap + + # verify tile shape + if tile: + tile = tuple(int(i) for i in tile[:3]) + volume = len(tile) == 3 + if ( + len(tile) < 2 + or tile[-1] % 16 + or tile[-2] % 16 + or any(i < 1 for i in tile) + ): + raise ValueError("invalid tile shape") + else: + tile = () + volume = False + + # normalize data shape to 5D or 6D, depending on volume: + # (pages, planar_samples, [depth,] height, width, contig_samples) + datashape = reshape_nd(datashape, 3 if photometric == RGB else 2) + shape = datashape + ndim = len(datashape) + + samplesperpixel = 1 + extrasamples = 0 + if volume and ndim < 3: + volume = False + if colormap is not None: + photometric = PALETTE + planarconfig = None + if photometric is None: + photometric = MINISBLACK + if bilevel: + photometric = TIFF.PHOTOMETRIC.MINISWHITE + elif planarconfig == CONTIG: + if ndim > 2 and shape[-1] in (3, 4): + photometric = RGB + elif planarconfig == SEPARATE: + if volume and ndim > 3 and shape[-4] in (3, 4): + photometric = RGB + elif ndim > 2 and shape[-3] in (3, 4): + photometric = RGB + elif ndim > 2 and shape[-1] in (3, 4): + photometric = RGB + elif self._imagej: + photometric = MINISBLACK + elif volume and ndim > 3 and shape[-4] in (3, 4): + photometric = RGB + elif ndim > 2 and shape[-3] in (3, 4): + photometric = RGB + if planarconfig and len(shape) <= (3 if volume else 2): + planarconfig = None + photometric = MINISBLACK + if photometric == RGB: + if len(shape) < 3: + raise ValueError("not a RGB(A) image") + if len(shape) < 4: + volume = False + if planarconfig is None: + if shape[-1] in (3, 4): + planarconfig = CONTIG + elif shape[-4 if volume else -3] in (3, 4): + planarconfig = SEPARATE + elif shape[-1] > shape[-4 if volume else -3]: + planarconfig = SEPARATE + else: + planarconfig = CONTIG + if planarconfig == CONTIG: + datashape = (-1, 1) + shape[(-4 if volume else -3) :] + samplesperpixel = datashape[-1] + else: + datashape = (-1,) + shape[(-4 if volume else -3) :] + (1,) + samplesperpixel = datashape[1] + if samplesperpixel > 3: + extrasamples = samplesperpixel - 3 + elif photometric == CFA: + if len(shape) != 2: + raise ValueError("invalid CFA image") + volume = False + planarconfig = None + datashape = (-1, 1) + shape[-2:] + (1,) + if 50706 not in (et[0] for et in extratags): + raise ValueError("must specify DNG tags for CFA image") + elif planarconfig and len(shape) > (3 if volume else 2): + if planarconfig == CONTIG: + datashape = (-1, 1) + shape[(-4 if volume else -3) :] + samplesperpixel = datashape[-1] + else: + datashape = (-1,) + shape[(-4 if volume else -3) :] + (1,) + samplesperpixel = datashape[1] + extrasamples = samplesperpixel - 1 + else: + planarconfig = None + # remove trailing 1s + while len(shape) > 2 and shape[-1] == 1: + shape = shape[:-1] + if len(shape) < 3: + volume = False + datashape = (-1, 1) + shape[(-3 if volume else -2) :] + (1,) + + # normalize shape to 6D + assert len(datashape) in (5, 6) + if len(datashape) == 5: + datashape = datashape[:2] + (1,) + datashape[2:] + if datashape[0] == -1: + s0 = product(input_shape) // product(datashape[1:]) + datashape = (s0,) + datashape[1:] + shape = datashape + if data is not None: + data = data.reshape(shape) + + if tile and not volume: + tile = (1, tile[-2], tile[-1]) + + if photometric == PALETTE: + if samplesperpixel != 1 or extrasamples or shape[1] != 1 or shape[-1] != 1: + raise ValueError("invalid data shape for palette mode") + + if photometric == RGB and samplesperpixel == 2: + raise ValueError("not a RGB image (samplesperpixel=2)") + + if bilevel: + if compress: + raise ValueError("cannot save compressed bilevel image") + if tile: + raise ValueError("cannot save tiled bilevel image") + if photometric not in (0, 1): + raise ValueError("cannot save bilevel image as %s" % str(photometric)) + datashape = list(datashape) + if datashape[-2] % 8: + datashape[-2] = datashape[-2] // 8 + 1 + else: + datashape[-2] = datashape[-2] // 8 + datashape = tuple(datashape) + assert datasize == product(datashape) + if data is not None: + data = numpy.packbits(data, axis=-2) + assert datashape[-2] == data.shape[-2] + + bytestr = ( + bytes + if sys.version[0] == "2" + else (lambda x: bytes(x, "ascii") if isinstance(x, str) else x) + ) + tags = [] # list of (code, ifdentry, ifdvalue, writeonce) + + strip_or_tile = "Tile" if tile else "Strip" + tagbytecounts = TIFF.TAG_NAMES[strip_or_tile + "ByteCounts"] + tag_offsets = TIFF.TAG_NAMES[strip_or_tile + "Offsets"] + self._tagoffsets = tag_offsets + + def pack(fmt, *val): + return struct.pack(byteorder + fmt, *val) + + def addtag(code, dtype, count, value, writeonce=False): + # Compute ifdentry & ifdvalue bytes from code, dtype, count, value + # Append (code, ifdentry, ifdvalue, writeonce) to tags list + code = int(TIFF.TAG_NAMES.get(code, code)) + try: + tifftype = TIFF.DATA_DTYPES[dtype] + except KeyError: + raise ValueError("unknown dtype %s" % dtype) + rawcount = count + + if dtype == "s": + # strings + value = bytestr(value) + b"\0" + count = rawcount = len(value) + rawcount = value.find(b"\0\0") + if rawcount < 0: + rawcount = count + else: + rawcount += 1 # length of string without buffer + value = (value,) + elif isinstance(value, bytes): + # packed binary data + dtsize = struct.calcsize(dtype) + if len(value) % dtsize: + raise ValueError("invalid packed binary data") + count = len(value) // dtsize + if len(dtype) > 1: + count *= int(dtype[:-1]) + dtype = dtype[-1] + ifdentry = [pack("HH", code, tifftype), pack(offsetformat, rawcount)] + ifdvalue = None + if struct.calcsize(dtype) * count <= offsetsize: + # value(s) can be written directly + if isinstance(value, bytes): + ifdentry.append(pack(valueformat, value)) + elif count == 1: + if isinstance(value, (tuple, list, numpy.ndarray)): + value = value[0] + ifdentry.append(pack(valueformat, pack(dtype, value))) + else: + ifdentry.append(pack(valueformat, pack(str(count) + dtype, *value))) + else: + # use offset to value(s) + ifdentry.append(pack(offsetformat, 0)) + if isinstance(value, bytes): + ifdvalue = value + elif isinstance(value, numpy.ndarray): + assert value.size == count + assert value.dtype.char == dtype + ifdvalue = value.tostring() + elif isinstance(value, (tuple, list)): + ifdvalue = pack(str(count) + dtype, *value) + else: + ifdvalue = pack(dtype, value) + tags.append((code, b"".join(ifdentry), ifdvalue, writeonce)) + + def rational(arg, max_denominator=1000000): + """ "Return nominator and denominator from float or two integers.""" + from fractions import Fraction # delayed import + + try: + f = Fraction.from_float(arg) + except TypeError: + f = Fraction(arg[0], arg[1]) + f = f.limit_denominator(max_denominator) + return f.numerator, f.denominator + + if description: + # user provided description + addtag("ImageDescription", "s", 0, description, writeonce=True) + + # write shape and metadata to ImageDescription + self._metadata = {} if not metadata else metadata.copy() + if self._imagej: + description = imagej_description( + input_shape, + shape[-1] in (3, 4), + self._colormap is not None, + **self._metadata, + ) + elif metadata or metadata == {}: + if self._truncate: + self._metadata.update(truncated=True) + description = json_description(input_shape, **self._metadata) + else: + description = None + if description: + # add 64 bytes buffer + # the image description might be updated later with the final shape + description = str2bytes(description, "ascii") + description += b"\0" * 64 + self._descriptionlen = len(description) + addtag("ImageDescription", "s", 0, description, writeonce=True) + + if software: + addtag("Software", "s", 0, software, writeonce=True) + if datetime is None: + datetime = self._now() + addtag( + "DateTime", "s", 0, datetime.strftime("%Y:%m:%d %H:%M:%S"), writeonce=True + ) + addtag("Compression", "H", 1, compresstag) + if predictor: + addtag("Predictor", "H", 1, 2) + addtag("ImageWidth", "I", 1, shape[-2]) + addtag("ImageLength", "I", 1, shape[-3]) + if tile: + addtag("TileWidth", "I", 1, tile[-1]) + addtag("TileLength", "I", 1, tile[-2]) + if tile[0] > 1: + addtag("ImageDepth", "I", 1, shape[-4]) + addtag("TileDepth", "I", 1, tile[0]) + addtag("NewSubfileType", "I", 1, 0) + if not bilevel: + sampleformat = {"u": 1, "i": 2, "f": 3, "c": 6}[datadtype.kind] + addtag( + "SampleFormat", "H", samplesperpixel, (sampleformat,) * samplesperpixel + ) + addtag("PhotometricInterpretation", "H", 1, photometric.value) + if colormap is not None: + addtag("ColorMap", "H", colormap.size, colormap) + addtag("SamplesPerPixel", "H", 1, samplesperpixel) + if bilevel: + pass + elif planarconfig and samplesperpixel > 1: + addtag("PlanarConfiguration", "H", 1, planarconfig.value) + addtag( + "BitsPerSample", + "H", + samplesperpixel, + (datadtype.itemsize * 8,) * samplesperpixel, + ) + else: + addtag("BitsPerSample", "H", 1, datadtype.itemsize * 8) + if extrasamples: + if photometric == RGB and extrasamples == 1: + addtag("ExtraSamples", "H", 1, 1) # associated alpha channel + else: + addtag("ExtraSamples", "H", extrasamples, (0,) * extrasamples) + if resolution is not None: + addtag("XResolution", "2I", 1, rational(resolution[0])) + addtag("YResolution", "2I", 1, rational(resolution[1])) + if len(resolution) > 2: + unit = resolution[2] + unit = 1 if unit is None else enumarg(TIFF.RESUNIT, unit) + elif self._imagej: + unit = 1 + else: + unit = 2 + addtag("ResolutionUnit", "H", 1, unit) + elif not self._imagej: + addtag("XResolution", "2I", 1, (1, 1)) + addtag("YResolution", "2I", 1, (1, 1)) + addtag("ResolutionUnit", "H", 1, 1) + if ijmetadata: + for t in imagej_metadata_tags(ijmetadata, byteorder): + addtag(*t) + + contiguous = not compress + if tile: + # one chunk per tile per plane + tiles = ( + (shape[2] + tile[0] - 1) // tile[0], + (shape[3] + tile[1] - 1) // tile[1], + (shape[4] + tile[2] - 1) // tile[2], + ) + numtiles = product(tiles) * shape[1] + stripbytecounts = [ + product(tile) * shape[-1] * datadtype.itemsize + ] * numtiles + addtag(tagbytecounts, offsetformat, numtiles, stripbytecounts) + addtag(tag_offsets, offsetformat, numtiles, [0] * numtiles) + contiguous = contiguous and product(tiles) == 1 + if not contiguous: + # allocate tile buffer + chunk = numpy.empty(tile + (shape[-1],), dtype=datadtype) + elif contiguous: + # one strip per plane + if bilevel: + stripbytecounts = [product(datashape[2:])] * shape[1] + else: + stripbytecounts = [product(datashape[2:]) * datadtype.itemsize] * shape[ + 1 + ] + addtag(tagbytecounts, offsetformat, shape[1], stripbytecounts) + addtag(tag_offsets, offsetformat, shape[1], [0] * shape[1]) + addtag("RowsPerStrip", "I", 1, shape[-3]) + else: + # compress rowsperstrip or ~64 KB chunks + rowsize = product(shape[-2:]) * datadtype.itemsize + if rowsperstrip is None: + rowsperstrip = 65536 // rowsize + if rowsperstrip < 1: + rowsperstrip = 1 + elif rowsperstrip > shape[-3]: + rowsperstrip = shape[-3] + addtag("RowsPerStrip", "I", 1, rowsperstrip) + + numstrips = (shape[-3] + rowsperstrip - 1) // rowsperstrip + numstrips *= shape[1] + stripbytecounts = [0] * numstrips + addtag(tagbytecounts, offsetformat, numstrips, [0] * numstrips) + addtag(tag_offsets, offsetformat, numstrips, [0] * numstrips) + + if data is None and not contiguous: + raise ValueError("cannot write non-contiguous empty file") + + # add extra tags from user + for t in extratags: + addtag(*t) + + # TODO: check TIFFReadDirectoryCheckOrder warning in files containing + # multiple tags of same code + # the entries in an IFD must be sorted in ascending order by tag code + tags = sorted(tags, key=lambda x: x[0]) + + if not (self._bigtiff or self._imagej) and (fh.tell() + datasize > 2**31 - 1): + raise ValueError("data too large for standard TIFF file") + + # if not compressed or multi-tiled, write the first IFD and then + # all data contiguously; else, write all IFDs and data interleaved + for pageindex in range(1 if contiguous else shape[0]): + # update pointer at ifd_offset + pos = fh.tell() + if pos % 2: + # location of IFD must begin on a word boundary + fh.write(b"\0") + pos += 1 + fh.seek(self._ifdoffset) + fh.write(pack(offsetformat, pos)) + fh.seek(pos) + + # write ifdentries + fh.write(pack(tagnoformat, len(tags))) + tag_offset = fh.tell() + fh.write(b"".join(t[1] for t in tags)) + self._ifdoffset = fh.tell() + fh.write(pack(offsetformat, 0)) # offset to next IFD + + # write tag values and patch offsets in ifdentries, if necessary + for tagindex, tag in enumerate(tags): + if tag[2]: + pos = fh.tell() + if pos % 2: + # tag value is expected to begin on word boundary + fh.write(b"\0") + pos += 1 + fh.seek(tag_offset + tagindex * tagsize + offsetsize + 4) + fh.write(pack(offsetformat, pos)) + fh.seek(pos) + if tag[0] == tag_offsets: + stripoffsetsoffset = pos + elif tag[0] == tagbytecounts: + strip_bytecounts_offset = pos + elif tag[0] == 270 and tag[2].endswith(b"\0\0\0\0"): + # image description buffer + self._descriptionoffset = pos + self._descriptionlenoffset = tag_offset + tagindex * tagsize + 4 + fh.write(tag[2]) + + # write image data + data_offset = fh.tell() + skip = align - data_offset % align + fh.seek(skip, 1) + data_offset += skip + if contiguous: + if data is None: + fh.write_empty(datasize) + else: + fh.write_array(data) + elif tile: + if data is None: + fh.write_empty(numtiles * stripbytecounts[0]) + else: + stripindex = 0 + for plane in data[pageindex]: + for tz in range(tiles[0]): + for ty in range(tiles[1]): + for tx in range(tiles[2]): + c0 = min(tile[0], shape[2] - tz * tile[0]) + c1 = min(tile[1], shape[3] - ty * tile[1]) + c2 = min(tile[2], shape[4] - tx * tile[2]) + chunk[c0:, c1:, c2:] = 0 + chunk[:c0, :c1, :c2] = plane[ + tz * tile[0] : tz * tile[0] + c0, + ty * tile[1] : ty * tile[1] + c1, + tx * tile[2] : tx * tile[2] + c2, + ] + if compress: + t = compress(chunk) + fh.write(t) + stripbytecounts[stripindex] = len(t) + stripindex += 1 + else: + fh.write_array(chunk) + fh.flush() + elif compress: + # write one strip per rowsperstrip + assert data.shape[2] == 1 # not handling depth + numstrips = (shape[-3] + rowsperstrip - 1) // rowsperstrip + stripindex = 0 + for plane in data[pageindex]: + for i in range(numstrips): + strip = plane[0, i * rowsperstrip : (i + 1) * rowsperstrip] + strip = compress(strip) + fh.write(strip) + stripbytecounts[stripindex] = len(strip) + stripindex += 1 + + # update strip/tile offsets and bytecounts if necessary + pos = fh.tell() + for tagindex, tag in enumerate(tags): + if tag[0] == tag_offsets: # strip/tile offsets + if tag[2]: + fh.seek(stripoffsetsoffset) + strip_offset = data_offset + for size in stripbytecounts: + fh.write(pack(offsetformat, strip_offset)) + strip_offset += size + else: + fh.seek(tag_offset + tagindex * tagsize + offsetsize + 4) + fh.write(pack(offsetformat, data_offset)) + elif tag[0] == tagbytecounts: # strip/tile bytecounts + if compress: + if tag[2]: + fh.seek(strip_bytecounts_offset) + for size in stripbytecounts: + fh.write(pack(offsetformat, size)) + else: + fh.seek(tag_offset + tagindex * tagsize + offsetsize + 4) + fh.write(pack(offsetformat, stripbytecounts[0])) + break + fh.seek(pos) + fh.flush() + + # remove tags that should be written only once + if pageindex == 0: + tags = [tag for tag in tags if not tag[-1]] + + self._shape = shape + self._datashape = (1,) + input_shape + self._datadtype = datadtype + self._dataoffset = data_offset + self._databytecounts = stripbytecounts + + if contiguous: + # write remaining IFDs/tags later + self._tags = tags + # return offset and size of image data + if returnoffset: + return data_offset, sum(stripbytecounts) + + def _write_remaining_pages(self): + """Write outstanding IFDs and tags to file.""" + if not self._tags or self._truncate: + return + + fh = self._fh + fhpos = fh.tell() + if fhpos % 2: + fh.write(b"\0") + fhpos += 1 + byteorder = self._byteorder + offsetformat = self._offsetformat + offsetsize = self._offsetsize + tagnoformat = self._tagnoformat + tagsize = self._tagsize + dataoffset = self._dataoffset + pagedatasize = sum(self._databytecounts) + pageno = self._shape[0] * self._datashape[0] - 1 + + def pack(fmt, *val): + return struct.pack(byteorder + fmt, *val) + + # construct template IFD in memory + # need to patch offsets to next IFD and data before writing to disk + ifd = io.BytesIO() + ifd.write(pack(tagnoformat, len(self._tags))) + tagoffset = ifd.tell() + ifd.write(b"".join(t[1] for t in self._tags)) + ifdoffset = ifd.tell() + ifd.write(pack(offsetformat, 0)) # offset to next IFD + # tag values + for tagindex, tag in enumerate(self._tags): + offset2value = tagoffset + tagindex * tagsize + offsetsize + 4 + if tag[2]: + pos = ifd.tell() + if pos % 2: # tag value is expected to begin on word boundary + ifd.write(b"\0") + pos += 1 + ifd.seek(offset2value) + try: + ifd.write(pack(offsetformat, pos + fhpos)) + except Exception: # struct.error + if self._imagej: + warnings.warn("truncating ImageJ file") + self._truncate = True + return + raise ValueError("data too large for non-BigTIFF file") + ifd.seek(pos) + ifd.write(tag[2]) + if tag[0] == self._tagoffsets: + # save strip/tile offsets for later updates + stripoffset2offset = offset2value + stripoffset2value = pos + elif tag[0] == self._tagoffsets: + # save strip/tile offsets for later updates + stripoffset2offset = None + stripoffset2value = offset2value + # size to word boundary + if ifd.tell() % 2: + ifd.write(b"\0") + + # check if all IFDs fit in file + pos = fh.tell() + if not self._bigtiff and pos + ifd.tell() * pageno > 2**32 - 256: + if self._imagej: + warnings.warn("truncating ImageJ file") + self._truncate = True + return + raise ValueError("data too large for non-BigTIFF file") + + # TODO: assemble IFD chain in memory + for _ in range(pageno): + # update pointer at IFD offset + pos = fh.tell() + fh.seek(self._ifdoffset) + fh.write(pack(offsetformat, pos)) + fh.seek(pos) + self._ifdoffset = pos + ifdoffset + # update strip/tile offsets in IFD + dataoffset += pagedatasize # offset to image data + if stripoffset2offset is None: + ifd.seek(stripoffset2value) + ifd.write(pack(offsetformat, dataoffset)) + else: + ifd.seek(stripoffset2offset) + ifd.write(pack(offsetformat, pos + stripoffset2value)) + ifd.seek(stripoffset2value) + stripoffset = dataoffset + for size in self._databytecounts: + ifd.write(pack(offsetformat, stripoffset)) + stripoffset += size + # write IFD entry + fh.write(ifd.getvalue()) + + self._tags = None + self._datadtype = None + self._dataoffset = None + self._databytecounts = None + # do not reset _shape or _data_shape + + def _write_image_description(self): + """Write meta data to ImageDescription tag.""" + if ( + not self._datashape + or self._datashape[0] == 1 + or self._descriptionoffset <= 0 + ): + return + + colormapped = self._colormap is not None + if self._imagej: + isrgb = self._shape[-1] in (3, 4) + description = imagej_description( + self._datashape, isrgb, colormapped, **self._metadata + ) + else: + description = json_description(self._datashape, **self._metadata) + + # rewrite description and its length to file + description = description.encode("utf-8") + description = description[: self._descriptionlen - 1] + pos = self._fh.tell() + self._fh.seek(self._descriptionoffset) + self._fh.write(description) + self._fh.seek(self._descriptionlenoffset) + self._fh.write( + struct.pack(self._byteorder + self._offsetformat, len(description) + 1) + ) + self._fh.seek(pos) + + self._descriptionoffset = 0 + self._descriptionlenoffset = 0 + self._descriptionlen = 0 + + def _now(self): + """Return current date and time.""" + return datetime.datetime.now() + + def close(self): + """Write remaining pages and close file handle.""" + if not self._truncate: + self._write_remaining_pages() + self._write_image_description() + self._fh.close() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + +class TiffFile(object): + """Read image and metadata from TIFF file. + + TiffFile instances must be closed using the 'close' method, which is + automatically called when using the 'with' context manager. + + Attributes + ---------- + pages : TiffPages + Sequence of TIFF pages in file. + series : list of TiffPageSeries + Sequences of closely related TIFF pages. These are computed + from OME, LSM, ImageJ, etc. metadata or based on similarity + of page properties such as shape, dtype, and compression. + byteorder : '>', '<' + The endianness of data in the file. + '>': big-endian (Motorola). + '>': little-endian (Intel). + is_flag : bool + If True, file is of a certain format. + Flags are: bigtiff, movie, shaped, ome, imagej, stk, lsm, fluoview, + nih, vista, 'micromanager, metaseries, mdgel, mediacy, tvips, fei, + sem, scn, svs, scanimage, andor, epics, pilatus, qptiff. + + All attributes are read-only. + + Examples + -------- + >>> # read image array from TIFF file + >>> imsave('temp.tif', numpy.random.rand(5, 301, 219)) + >>> with TiffFile('temp.tif') as tif: + ... data = tif.asarray() + >>> data.shape + (5, 301, 219) + + """ + + def __init__( + self, + arg, + name=None, + offset=None, + size=None, + multifile=True, + movie=None, + **kwargs, + ): + """Initialize instance from file. + + Parameters + ---------- + arg : str or open file + Name of file or open file object. + The file objects are closed in TiffFile.close(). + name : str + Optional name of file in case 'arg' is a file handle. + offset : int + Optional start position of embedded file. By default, this is + the current file position. + size : int + Optional size of embedded file. By default, this is the number + of bytes from the 'offset' to the end of the file. + multifile : bool + If True (default), series may include pages from multiple files. + Currently applies to OME-TIFF only. + movie : bool + If True, assume that later pages differ from first page only by + data offsets and byte counts. Significantly increases speed and + reduces memory usage when reading movies with thousands of pages. + Enabling this for non-movie files will result in data corruption + or crashes. Python 3 only. + kwargs : bool + 'is_ome': If False, disable processing of OME-XML metadata. + + """ + if "fastij" in kwargs: + del kwargs["fastij"] + raise DeprecationWarning("the fastij option will be removed") + for key, value in kwargs.items(): + if key[:3] == "is_" and key[3:] in TIFF.FILE_FLAGS: + if value is not None and not value: + setattr(self, key, bool(value)) + else: + raise TypeError("unexpected keyword argument: %s" % key) + + fh = FileHandle(arg, mode="rb", name=name, offset=offset, size=size) + self._fh = fh + self._multifile = bool(multifile) + self._files = {fh.name: self} # cache of TiffFiles + try: + fh.seek(0) + try: + byteorder = {b"II": "<", b"MM": ">"}[fh.read(2)] + except KeyError: + raise ValueError("not a TIFF file") + sys_byteorder = {"big": ">", "little": "<"}[sys.byteorder] + self.isnative = byteorder == sys_byteorder + + version = struct.unpack(byteorder + "H", fh.read(2))[0] + if version == 43: + # BigTiff + self.is_bigtiff = True + offsetsize, zero = struct.unpack(byteorder + "HH", fh.read(4)) + if zero or offsetsize != 8: + raise ValueError("invalid BigTIFF file") + self.byteorder = byteorder + self.offsetsize = 8 + self.offsetformat = byteorder + "Q" + self.tagnosize = 8 + self.tagnoformat = byteorder + "Q" + self.tagsize = 20 + self.tagformat1 = byteorder + "HH" + self.tagformat2 = byteorder + "Q8s" + elif version == 42: + self.is_bigtiff = False + self.byteorder = byteorder + self.offsetsize = 4 + self.offsetformat = byteorder + "I" + self.tagnosize = 2 + self.tagnoformat = byteorder + "H" + self.tagsize = 12 + self.tagformat1 = byteorder + "HH" + self.tagformat2 = byteorder + "I4s" + else: + raise ValueError("invalid TIFF file") + + # file handle is at offset to offset to first page + self.pages = TiffPages(self) + + if self.is_lsm and ( + self.filehandle.size >= 2**32 + or self.pages[0].compression != 1 + or self.pages[1].compression != 1 + ): + self._lsm_load_pages() + self._lsm_fix_strip_offsets() + self._lsm_fix_strip_bytecounts() + elif movie: + self.pages.useframes = True + + except Exception: + fh.close() + raise + + @property + def filehandle(self): + """Return file handle.""" + return self._fh + + @property + def filename(self): + """Return name of file handle.""" + return self._fh.name + + @lazyattr + def fstat(self): + """Return status of file handle as stat_result object.""" + try: + return os.fstat(self._fh.fileno()) + except Exception: # io.UnsupportedOperation + return None + + def close(self): + """Close open file handle(s).""" + for tif in self._files.values(): + tif.filehandle.close() + self._files = {} + + def asarray(self, key=None, series=None, out=None, validate=True, maxworkers=1): + """Return image data from multiple TIFF pages as numpy array. + + By default, the data from the first series is returned. + + Parameters + ---------- + key : int, slice, or sequence of page indices + Defines which pages to return as array. + series : int or TiffPageSeries + Defines which series of pages to return as array. + out : numpy.ndarray, str, or file-like object; optional + Buffer where image data will be saved. + If None (default), a new array will be created. + If numpy.ndarray, a writable array of compatible dtype and shape. + If 'memmap', directly memory-map the image data in the TIFF file + if possible; else create a memory-mapped array in a temporary file. + If str or open file, the file name or file object used to + create a memory-map to an array stored in a binary file on disk. + validate : bool + If True (default), validate various tags. + Passed to TiffPage.asarray(). + maxworkers : int + Maximum number of threads to concurrently get data from pages. + Default is 1. If None, up to half the CPU cores are used. + Reading data from file is limited to a single thread. + Using multiple threads can significantly speed up this function + if the bottleneck is decoding compressed data, e.g. in case of + large LZW compressed LSM files. + If the bottleneck is I/O or pure Python code, using multiple + threads might be detrimental. + + """ + if not self.pages: + return numpy.array([]) + if key is None and series is None: + series = 0 + if series is not None: + try: + series = self.series[series] + except (KeyError, TypeError): + pass + pages = series._pages + else: + pages = self.pages + + if key is None: + pass + elif isinstance(key, inttypes): + pages = [pages[key]] + elif isinstance(key, slice): + pages = pages[key] + elif isinstance(key, collections.Iterable): + pages = [pages[k] for k in key] + else: + raise TypeError("key must be an int, slice, or sequence") + + if not pages: + raise ValueError("no pages selected") + + if self.is_nih: + result = stack_pages(pages, out=out, maxworkers=maxworkers, squeeze=False) + elif key is None and series and series.offset: + typecode = self.byteorder + series.dtype.char + if out == "memmap" and pages[0].is_memmappable: + result = self.filehandle.memmap_array( + typecode, series.shape, series.offset + ) + else: + if out is not None: + out = create_output(out, series.shape, series.dtype) + self.filehandle.seek(series.offset) + result = self.filehandle.read_array( + typecode, product(series.shape), out=out, native=True + ) + elif len(pages) == 1: + result = pages[0].asarray(out=out, validate=validate) + else: + result = stack_pages(pages, out=out, maxworkers=maxworkers) + + if result is None: + return + + if key is None: + try: + result.shape = series.shape + except ValueError: + try: + warnings.warn( + "failed to reshape %s to %s" % (result.shape, series.shape) + ) + # try series of expected shapes + result.shape = (-1,) + series.shape + except ValueError: + # revert to generic shape + result.shape = (-1,) + pages[0].shape + elif len(pages) == 1: + result.shape = pages[0].shape + else: + result.shape = (-1,) + pages[0].shape + return result + + @lazyattr + def series(self): + """Return related pages as TiffPageSeries. + + Side effect: after calling this function, TiffFile.pages might contain + TiffPage and TiffFrame instances. + + """ + if not self.pages: + return [] + + useframes = self.pages.useframes + keyframe = self.pages.keyframe + series = [] + for name in "ome imagej lsm fluoview nih mdgel shaped".split(): + if getattr(self, "is_" + name, False): + series = getattr(self, "_%s_series" % name)() + break + self.pages.useframes = useframes + self.pages.keyframe = keyframe + if not series: + series = self._generic_series() + + # remove empty series, e.g. in MD Gel files + series = [s for s in series if sum(s.shape) > 0] + + for i, s in enumerate(series): + s.index = i + return series + + def _generic_series(self): + """Return image series in file.""" + if self.pages.useframes: + # movie mode + page = self.pages[0] + shape = page.shape + axes = page.axes + if len(self.pages) > 1: + shape = (len(self.pages),) + shape + axes = "I" + axes + return [ + TiffPageSeries(self.pages[:], shape, page.dtype, axes, stype="movie") + ] + + self.pages.clear(False) + self.pages.load() + result = [] + keys = [] + series = {} + compressions = TIFF.DECOMPESSORS + for page in self.pages: + if not page.shape: + continue + key = page.shape + (page.axes, page.compression in compressions) + if key in series: + series[key].append(page) + else: + keys.append(key) + series[key] = [page] + for key in keys: + pages = series[key] + page = pages[0] + shape = page.shape + axes = page.axes + if len(pages) > 1: + shape = (len(pages),) + shape + axes = "I" + axes + result.append( + TiffPageSeries(pages, shape, page.dtype, axes, stype="Generic") + ) + + return result + + def _shaped_series(self): + """Return image series in "shaped" file.""" + pages = self.pages + pages.useframes = True + lenpages = len(pages) + + def append_series(series, pages, axes, shape, reshape, name, truncated): + page = pages[0] + if not axes: + shape = page.shape + axes = page.axes + if len(pages) > 1: + shape = (len(pages),) + shape + axes = "Q" + axes + size = product(shape) + resize = product(reshape) + if page.is_contiguous and resize > size and resize % size == 0: + if truncated is None: + truncated = True + axes = "Q" + axes + shape = (resize // size,) + shape + try: + axes = reshape_axes(axes, shape, reshape) + shape = reshape + except ValueError as e: + warnings.warn(str(e)) + series.append( + TiffPageSeries( + pages, + shape, + page.dtype, + axes, + name=name, + stype="Shaped", + truncated=truncated, + ) + ) + + keyframe = axes = shape = reshape = name = None + series = [] + index = 0 + while True: + if index >= lenpages: + break + # new keyframe; start of new series + pages.keyframe = index + keyframe = pages[index] + if not keyframe.is_shaped: + warnings.warn("invalid shape metadata or corrupted file") + return + # read metadata + axes = None + shape = None + metadata = json_description_metadata(keyframe.is_shaped) + name = metadata.get("name", "") + reshape = metadata["shape"] + truncated = metadata.get("truncated", None) + if "axes" in metadata: + axes = metadata["axes"] + if len(axes) == len(reshape): + shape = reshape + else: + axes = "" + warnings.warn("axes do not match shape") + # skip pages if possible + spages = [keyframe] + size = product(reshape) + npages, mod = divmod(size, product(keyframe.shape)) + if mod: + warnings.warn("series shape does not match page shape") + return + if 1 < npages <= lenpages - index: + size *= keyframe._dtype.itemsize + if truncated: + npages = 1 + elif ( + keyframe.is_final + and keyframe.offset + size < pages[index + 1].offset + ): + truncated = False + else: + # need to read all pages for series + truncated = False + for j in range(index + 1, index + npages): + page = pages[j] + page.keyframe = keyframe + spages.append(page) + append_series(series, spages, axes, shape, reshape, name, truncated) + index += npages + + return series + + def _imagej_series(self): + """Return image series in ImageJ file.""" + # ImageJ's dimension order is always TZCYXS + # TODO: fix loading of color, composite, or palette images + self.pages.useframes = True + self.pages.keyframe = 0 + + ij = self.imagej_metadata + pages = self.pages + page = pages[0] + + def is_hyperstack(): + # ImageJ hyperstack store all image metadata in the first page and + # image data are stored contiguously before the second page, if any + if not page.is_final: + return False + images = ij.get("images", 0) + if images <= 1: + return False + offset, count = page.is_contiguous + if ( + count != product(page.shape) * page.bitspersample // 8 + or offset + count * images > self.filehandle.size + ): + raise ValueError() + # check that next page is stored after data + if len(pages) > 1 and offset + count * images > pages[1].offset: + return False + return True + + try: + hyperstack = is_hyperstack() + except ValueError: + warnings.warn("invalid ImageJ metadata or corrupted file") + return + if hyperstack: + # no need to read other pages + pages = [page] + else: + self.pages.load() + + shape = [] + axes = [] + if "frames" in ij: + shape.append(ij["frames"]) + axes.append("T") + if "slices" in ij: + shape.append(ij["slices"]) + axes.append("Z") + if "channels" in ij and not ( + page.photometric == 2 and not ij.get("hyperstack", False) + ): + shape.append(ij["channels"]) + axes.append("C") + remain = ij.get("images", len(pages)) // (product(shape) if shape else 1) + if remain > 1: + shape.append(remain) + axes.append("I") + if page.axes[0] == "I": + # contiguous multiple images + shape.extend(page.shape[1:]) + axes.extend(page.axes[1:]) + elif page.axes[:2] == "SI": + # color-mapped contiguous multiple images + shape = page.shape[0:1] + tuple(shape) + page.shape[2:] + axes = list(page.axes[0]) + axes + list(page.axes[2:]) + else: + shape.extend(page.shape) + axes.extend(page.axes) + + truncated = ( + hyperstack + and len(self.pages) == 1 + and page.is_contiguous[1] != product(shape) * page.bitspersample // 8 + ) + + return [ + TiffPageSeries( + pages, shape, page.dtype, axes, stype="ImageJ", truncated=truncated + ) + ] + + def _fluoview_series(self): + """Return image series in FluoView file.""" + self.pages.useframes = True + self.pages.keyframe = 0 + self.pages.load() + mm = self.fluoview_metadata + mmhd = list(reversed(mm["Dimensions"])) + axes = "".join( + TIFF.MM_DIMENSIONS.get(i[0].upper(), "Q") for i in mmhd if i[1] > 1 + ) + shape = tuple(int(i[1]) for i in mmhd if i[1] > 1) + return [ + TiffPageSeries( + self.pages, + shape, + self.pages[0].dtype, + axes, + name=mm["ImageName"], + stype="FluoView", + ) + ] + + def _mdgel_series(self): + """Return image series in MD Gel file.""" + # only a single page, scaled according to metadata in second page + self.pages.useframes = False + self.pages.keyframe = 0 + self.pages.load() + md = self.mdgel_metadata + if md["FileTag"] in (2, 128): + dtype = numpy.dtype("float32") + scale = md["ScalePixel"] + scale = scale[0] / scale[1] # rational + if md["FileTag"] == 2: + # squary root data format + def transform(a): + return a.astype("float32") ** 2 * scale + + else: + + def transform(a): + return a.astype("float32") * scale + + else: + transform = None + page = self.pages[0] + return [ + TiffPageSeries( + [page], page.shape, dtype, page.axes, transform=transform, stype="MDGel" + ) + ] + + def _nih_series(self): + """Return image series in NIH file.""" + self.pages.useframes = True + self.pages.keyframe = 0 + self.pages.load() + page0 = self.pages[0] + if len(self.pages) == 1: + shape = page0.shape + axes = page0.axes + else: + shape = (len(self.pages),) + page0.shape + axes = "I" + page0.axes + return [TiffPageSeries(self.pages, shape, page0.dtype, axes, stype="NIH")] + + def _ome_series(self): + """Return image series in OME-TIFF file(s).""" + from xml.etree import cElementTree as etree # delayed import + + omexml = self.pages[0].description + try: + root = etree.fromstring(omexml) + except etree.ParseError as e: + # TODO: test badly encoded OME-XML + warnings.warn("ome-xml: %s" % e) + try: + # might work on Python 2 + omexml = omexml.decode("utf-8", "ignore").encode("utf-8") + root = etree.fromstring(omexml) + except Exception: + return + + self.pages.useframes = True + self.pages.keyframe = 0 + self.pages.load() + + uuid = root.attrib.get("UUID", None) + self._files = {uuid: self} + dirname = self._fh.dirname + modulo = {} + series = [] + for element in root: + if element.tag.endswith("BinaryOnly"): + # TODO: load OME-XML from master or companion file + warnings.warn("ome-xml: not an ome-tiff master file") + break + if element.tag.endswith("StructuredAnnotations"): + for annot in element: + if not annot.attrib.get("Namespace", "").endswith("modulo"): + continue + for value in annot: + for modul in value: + for along in modul: + if not along.tag[:-1].endswith("Along"): + continue + axis = along.tag[-1] + newaxis = along.attrib.get("Type", "other") + newaxis = TIFF.AXES_LABELS[newaxis] + if "Start" in along.attrib: + step = float(along.attrib.get("Step", 1)) + start = float(along.attrib["Start"]) + stop = float(along.attrib["End"]) + step + labels = numpy.arange(start, stop, step) + else: + labels = [ + label.text + for label in along + if label.tag.endswith("Label") + ] + modulo[axis] = (newaxis, labels) + + if not element.tag.endswith("Image"): + continue + + attr = element.attrib + name = attr.get("Name", None) + + for pixels in element: + if not pixels.tag.endswith("Pixels"): + continue + attr = pixels.attrib + dtype = attr.get("PixelType", None) + axes = "".join(reversed(attr["DimensionOrder"])) + shape = list(int(attr["Size" + ax]) for ax in axes) + size = product(shape[:-2]) + ifds = None + spp = 1 # samples per pixel + # FIXME: this implementation assumes the last two + # dimensions are stored in tiff pages (shape[:-2]). + # Apparently that is not always the case. + for data in pixels: + if data.tag.endswith("Channel"): + attr = data.attrib + if ifds is None: + spp = int(attr.get("SamplesPerPixel", spp)) + ifds = [None] * (size // spp) + elif int(attr.get("SamplesPerPixel", 1)) != spp: + raise ValueError("cannot handle differing SamplesPerPixel") + continue + if ifds is None: + ifds = [None] * (size // spp) + if not data.tag.endswith("TiffData"): + continue + attr = data.attrib + ifd = int(attr.get("IFD", 0)) + num = int(attr.get("NumPlanes", 1 if "IFD" in attr else 0)) + num = int(attr.get("PlaneCount", num)) + idx = [int(attr.get("First" + ax, 0)) for ax in axes[:-2]] + try: + idx = numpy.ravel_multi_index(idx, shape[:-2]) + except ValueError: + # ImageJ produces invalid ome-xml when cropping + warnings.warn("ome-xml: invalid TiffData index") + continue + for uuid in data: + if not uuid.tag.endswith("UUID"): + continue + if uuid.text not in self._files: + if not self._multifile: + # abort reading multifile OME series + # and fall back to generic series + return [] + fname = uuid.attrib["FileName"] + try: + tif = TiffFile(os.path.join(dirname, fname)) + tif.pages.useframes = True + tif.pages.keyframe = 0 + tif.pages.load() + except (IOError, FileNotFoundError, ValueError): + warnings.warn("ome-xml: failed to read '%s'" % fname) + break + self._files[uuid.text] = tif + tif.close() + pages = self._files[uuid.text].pages + try: + for i in range(num if num else len(pages)): + ifds[idx + i] = pages[ifd + i] + except IndexError: + warnings.warn("ome-xml: index out of range") + # only process first UUID + break + else: + pages = self.pages + try: + for i in range(num if num else len(pages)): + ifds[idx + i] = pages[ifd + i] + except IndexError: + warnings.warn("ome-xml: index out of range") + + if all(i is None for i in ifds): + # skip images without data + continue + + # set a keyframe on all IFDs + keyframe = None + for i in ifds: + # try find a TiffPage + if i and i == i.keyframe: + keyframe = i + break + if not keyframe: + # reload a TiffPage from file + for i, keyframe in enumerate(ifds): + if keyframe: + keyframe.parent.pages.keyframe = keyframe.index + keyframe = keyframe.parent.pages[keyframe.index] + ifds[i] = keyframe + break + for i in ifds: + if i is not None: + i.keyframe = keyframe + + dtype = keyframe.dtype + series.append( + TiffPageSeries( + ifds, shape, dtype, axes, parent=self, name=name, stype="OME" + ) + ) + for serie in series: + shape = list(serie.shape) + for axis, (newaxis, labels) in modulo.items(): + i = serie.axes.index(axis) + size = len(labels) + if shape[i] == size: + serie.axes = serie.axes.replace(axis, newaxis, 1) + else: + shape[i] //= size + shape.insert(i + 1, size) + serie.axes = serie.axes.replace(axis, axis + newaxis, 1) + serie.shape = tuple(shape) + # squeeze dimensions + for serie in series: + serie.shape, serie.axes = squeeze_axes(serie.shape, serie.axes) + return series + + def _lsm_series(self): + """Return main image series in LSM file. Skip thumbnails.""" + lsmi = self.lsm_metadata + axes = TIFF.CZ_LSMINFO_SCANTYPE[lsmi["ScanType"]] + if self.pages[0].photometric == 2: # RGB; more than one channel + axes = axes.replace("C", "").replace("XY", "XYC") + if lsmi.get("DimensionP", 0) > 1: + axes += "P" + if lsmi.get("DimensionM", 0) > 1: + axes += "M" + axes = axes[::-1] + shape = tuple(int(lsmi[TIFF.CZ_LSMINFO_DIMENSIONS[i]]) for i in axes) + name = lsmi.get("Name", "") + self.pages.keyframe = 0 + pages = self.pages[::2] + dtype = pages[0].dtype + series = [TiffPageSeries(pages, shape, dtype, axes, name=name, stype="LSM")] + + if self.pages[1].is_reduced: + self.pages.keyframe = 1 + pages = self.pages[1::2] + dtype = pages[0].dtype + cp, i = 1, 0 + while cp < len(pages) and i < len(shape) - 2: + cp *= shape[i] + i += 1 + shape = shape[:i] + pages[0].shape + axes = axes[:i] + "CYX" + series.append( + TiffPageSeries(pages, shape, dtype, axes, name=name, stype="LSMreduced") + ) + + return series + + def _lsm_load_pages(self): + """Load all pages from LSM file.""" + self.pages.cache = True + self.pages.useframes = True + # second series: thumbnails + self.pages.keyframe = 1 + keyframe = self.pages[1] + for page in self.pages[1::2]: + page.keyframe = keyframe + # first series: data + self.pages.keyframe = 0 + keyframe = self.pages[0] + for page in self.pages[::2]: + page.keyframe = keyframe + + def _lsm_fix_strip_offsets(self): + """Unwrap strip offsets for LSM files greater than 4 GB. + + Each series and position require separate unwrapping (undocumented). + + """ + if self.filehandle.size < 2**32: + return + + pages = self.pages + npages = len(pages) + series = self.series[0] + axes = series.axes + + # find positions + positions = 1 + for i in 0, 1: + if series.axes[i] in "PM": + positions *= series.shape[i] + + # make time axis first + if positions > 1: + ntimes = 0 + for i in 1, 2: + if axes[i] == "T": + ntimes = series.shape[i] + break + if ntimes: + div, mod = divmod(npages, 2 * positions * ntimes) + assert mod == 0 + shape = (positions, ntimes, div, 2) + indices = numpy.arange(product(shape)).reshape(shape) + indices = numpy.moveaxis(indices, 1, 0) + else: + indices = numpy.arange(npages).reshape(-1, 2) + + # images of reduced page might be stored first + if pages[0].dataoffsets[0] > pages[1].dataoffsets[0]: + indices = indices[..., ::-1] + + # unwrap offsets + wrap = 0 + previousoffset = 0 + for i in indices.flat: + page = pages[i] + dataoffsets = [] + for currentoffset in page.dataoffsets: + if currentoffset < previousoffset: + wrap += 2**32 + dataoffsets.append(currentoffset + wrap) + previousoffset = currentoffset + page.dataoffsets = tuple(dataoffsets) + + def _lsm_fix_strip_bytecounts(self): + """Set databytecounts to size of compressed data. + + The StripByteCounts tag in LSM files contains the number of bytes + for the uncompressed data. + + """ + pages = self.pages + if pages[0].compression == 1: + return + # sort pages by first strip offset + pages = sorted(pages, key=lambda p: p.dataoffsets[0]) + npages = len(pages) - 1 + for i, page in enumerate(pages): + if page.index % 2: + continue + offsets = page.dataoffsets + bytecounts = page.databytecounts + if i < npages: + lastoffset = pages[i + 1].dataoffsets[0] + else: + # LZW compressed strips might be longer than uncompressed + lastoffset = min(offsets[-1] + 2 * bytecounts[-1], self._fh.size) + offsets = offsets + (lastoffset,) + page.databytecounts = tuple( + offsets[j + 1] - offsets[j] for j in range(len(bytecounts)) + ) + + def __getattr__(self, name): + """Return 'is_flag' attributes from first page.""" + if name[3:] in TIFF.FILE_FLAGS: + if not self.pages: + return False + value = bool(getattr(self.pages[0], name)) + setattr(self, name, value) + return value + raise AttributeError( + "'%s' object has no attribute '%s'" % (self.__class__.__name__, name) + ) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + def __str__(self, detail=0, width=79): + """Return string containing information about file. + + The detail parameter specifies the level of detail returned: + + 0: file only. + 1: all series, first page of series and its tags. + 2: large tag values and file metadata. + 3: all pages. + + """ + info = [ + "TiffFile '%s'", + format_size(self._fh.size), + {"<": "LittleEndian", ">": "BigEndian"}[self.byteorder], + ] + if self.is_bigtiff: + info.append("BigTiff") + info.append("|".join(f.upper() for f in self.flags)) + if len(self.pages) > 1: + info.append("%i Pages" % len(self.pages)) + if len(self.series) > 1: + info.append("%i Series" % len(self.series)) + if len(self._files) > 1: + info.append("%i Files" % (len(self._files))) + info = " ".join(info) + info = info.replace(" ", " ").replace(" ", " ") + info = info % snipstr(self._fh.name, max(12, width + 2 - len(info))) + if detail <= 0: + return info + info = [info] + info.append("\n".join(str(s) for s in self.series)) + if detail >= 3: + info.extend( + ( + TiffPage.__str__(p, detail=detail, width=width) + for p in self.pages + if p is not None + ) + ) + else: + info.extend( + ( + TiffPage.__str__(s.pages[0], detail=detail, width=width) + for s in self.series + if s.pages[0] is not None + ) + ) + if detail >= 2: + for name in sorted(self.flags): + if hasattr(self, name + "_metadata"): + m = getattr(self, name + "_metadata") + if m: + info.append( + "%s_METADATA\n%s" + % ( + name.upper(), + pformat(m, width=width, height=detail * 12), + ) + ) + return "\n\n".join(info).replace("\n\n\n", "\n\n") + + @lazyattr + def flags(self): + """Return set of file flags.""" + return set( + name.lower() + for name in sorted(TIFF.FILE_FLAGS) + if getattr(self, "is_" + name) + ) + + @lazyattr + def is_mdgel(self): + """File has MD Gel format.""" + try: + return self.pages[0].is_mdgel or self.pages[1].is_mdgel + except IndexError: + return False + + @property + def is_movie(self): + """Return if file is a movie.""" + return self.pages.useframes + + @lazyattr + def shaped_metadata(self): + """Return Tifffile metadata from JSON descriptions as dicts.""" + if not self.is_shaped: + return + return tuple( + json_description_metadata(s.pages[0].is_shaped) + for s in self.series + if s.stype.lower() == "shaped" + ) + + @lazyattr + def ome_metadata(self): + """Return OME XML as dict.""" + # TODO: remove this or return XML? + if not self.is_ome: + return + return xml2dict(self.pages[0].description)["OME"] + + @lazyattr + def qptiff_metadata(self): + """Return PerkinElmer-QPI-ImageDescription XML element as dict.""" + if not self.is_qptiff: + return + root = "PerkinElmer-QPI-ImageDescription" + xml = self.pages[0].description.replace(" " + root + " ", root) + return xml2dict(xml)[root] + + @lazyattr + def lsm_metadata(self): + """Return LSM metadata from CZ_LSMINFO tag as dict.""" + if not self.is_lsm: + return + return self.pages[0].tags["CZ_LSMINFO"].value + + @lazyattr + def stk_metadata(self): + """Return STK metadata from UIC tags as dict.""" + if not self.is_stk: + return + page = self.pages[0] + tags = page.tags + result = {} + result["NumberPlanes"] = tags["UIC2tag"].count + if page.description: + result["PlaneDescriptions"] = page.description.split("\0") + # result['plane_descriptions'] = stk_description_metadata( + # page.image_description) + if "UIC1tag" in tags: + result.update(tags["UIC1tag"].value) + if "UIC3tag" in tags: + result.update(tags["UIC3tag"].value) # wavelengths + if "UIC4tag" in tags: + result.update(tags["UIC4tag"].value) # override uic1 tags + uic2tag = tags["UIC2tag"].value + result["ZDistance"] = uic2tag["ZDistance"] + result["TimeCreated"] = uic2tag["TimeCreated"] + result["TimeModified"] = uic2tag["TimeModified"] + try: + result["DatetimeCreated"] = numpy.array( + [ + julian_datetime(*dt) + for dt in zip(uic2tag["DateCreated"], uic2tag["TimeCreated"]) + ], + dtype="datetime64[ns]", + ) + result["DatetimeModified"] = numpy.array( + [ + julian_datetime(*dt) + for dt in zip(uic2tag["DateModified"], uic2tag["TimeModified"]) + ], + dtype="datetime64[ns]", + ) + except ValueError as e: + warnings.warn("stk_metadata: %s" % e) + return result + + @lazyattr + def imagej_metadata(self): + """Return consolidated ImageJ metadata as dict.""" + if not self.is_imagej: + return + page = self.pages[0] + result = imagej_description_metadata(page.is_imagej) + if "IJMetadata" in page.tags: + try: + result.update(page.tags["IJMetadata"].value) + except Exception: + pass + return result + + @lazyattr + def fluoview_metadata(self): + """Return consolidated FluoView metadata as dict.""" + if not self.is_fluoview: + return + result = {} + page = self.pages[0] + result.update(page.tags["MM_Header"].value) + # TODO: read stamps from all pages + result["Stamp"] = page.tags["MM_Stamp"].value + # skip parsing image description; not reliable + # try: + # t = fluoview_description_metadata(page.image_description) + # if t is not None: + # result['ImageDescription'] = t + # except Exception as e: + # warnings.warn( + # "failed to read FluoView image description: %s" % e) + return result + + @lazyattr + def nih_metadata(self): + """Return NIH Image metadata from NIHImageHeader tag as dict.""" + if not self.is_nih: + return + return self.pages[0].tags["NIHImageHeader"].value + + @lazyattr + def fei_metadata(self): + """Return FEI metadata from SFEG or HELIOS tags as dict.""" + if not self.is_fei: + return + tags = self.pages[0].tags + if "FEI_SFEG" in tags: + return tags["FEI_SFEG"].value + if "FEI_HELIOS" in tags: + return tags["FEI_HELIOS"].value + + @lazyattr + def sem_metadata(self): + """Return SEM metadata from CZ_SEM tag as dict.""" + if not self.is_sem: + return + return self.pages[0].tags["CZ_SEM"].value + + @lazyattr + def mdgel_metadata(self): + """Return consolidated metadata from MD GEL tags as dict.""" + for page in self.pages[:2]: + if "MDFileTag" in page.tags: + tags = page.tags + break + else: + return + result = {} + for code in range(33445, 33453): + name = TIFF.TAGS[code] + if name not in tags: + continue + result[name[2:]] = tags[name].value + return result + + @lazyattr + def andor_metadata(self): + """Return Andor tags as dict.""" + return self.pages[0].andor_tags + + @lazyattr + def epics_metadata(self): + """Return EPICS areaDetector tags as dict.""" + return self.pages[0].epics_tags + + @lazyattr + def tvips_metadata(self): + """Return TVIPS tag as dict.""" + if not self.is_tvips: + return + return self.pages[0].tags["TVIPS"].value + + @lazyattr + def metaseries_metadata(self): + """Return MetaSeries metadata from image description as dict.""" + if not self.is_metaseries: + return + return metaseries_description_metadata(self.pages[0].description) + + @lazyattr + def pilatus_metadata(self): + """Return Pilatus metadata from image description as dict.""" + if not self.is_pilatus: + return + return pilatus_description_metadata(self.pages[0].description) + + @lazyattr + def micromanager_metadata(self): + """Return consolidated MicroManager metadata as dict.""" + if not self.is_micromanager: + return + # from file header + result = read_micromanager_metadata(self._fh) + # from tag + result.update(self.pages[0].tags["MicroManagerMetadata"].value) + return result + + @lazyattr + def scanimage_metadata(self): + """Return ScanImage non-varying frame and ROI metadata as dict.""" + if not self.is_scanimage: + return + result = {} + try: + framedata, roidata = read_scanimage_metadata(self._fh) + result["FrameData"] = framedata + result.update(roidata) + except ValueError: + pass + # TODO: scanimage_artist_metadata + try: + result["Description"] = scanimage_description_metadata( + self.pages[0].description + ) + except Exception as e: + warnings.warn("scanimage_description_metadata failed: %s" % e) + return result + + @property + def geotiff_metadata(self): + """Return GeoTIFF metadata from first page as dict.""" + if not self.is_geotiff: + return + return self.pages[0].geotiff_tags + + +class TiffPages(object): + """Sequence of TIFF image file directories.""" + + def __init__(self, parent): + """Initialize instance from file. Read first TiffPage from file. + + The file position must be at an offset to an offset to a TiffPage. + + """ + self.parent = parent + self.pages = [] # cache of TiffPages, TiffFrames, or their offsets + self.complete = False # True if offsets to all pages were read + self._tiffpage = TiffPage # class for reading tiff pages + self._keyframe = None + self._cache = True + + # read offset to first page + fh = parent.filehandle + self._nextpageoffset = fh.tell() + offset = struct.unpack(parent.offsetformat, fh.read(parent.offsetsize))[0] + + if offset == 0: + # warnings.warn('file contains no pages') + self.complete = True + return + if offset >= fh.size: + warnings.warn("invalid page offset (%i)" % offset) + self.complete = True + return + + # always read and cache first page + fh.seek(offset) + page = TiffPage(parent, index=0) + self.pages.append(page) + self._keyframe = page + + @property + def cache(self): + """Return if pages/frames are currently being cached.""" + return self._cache + + @cache.setter + def cache(self, value): + """Enable or disable caching of pages/frames. Clear cache if False.""" + value = bool(value) + if self._cache and not value: + self.clear() + self._cache = value + + @property + def useframes(self): + """Return if currently using TiffFrame (True) or TiffPage (False).""" + return self._tiffpage == TiffFrame and TiffFrame is not TiffPage + + @useframes.setter + def useframes(self, value): + """Set to use TiffFrame (True) or TiffPage (False).""" + self._tiffpage = TiffFrame if value else TiffPage + + @property + def keyframe(self): + """Return index of current keyframe.""" + return self._keyframe.index + + @keyframe.setter + def keyframe(self, index): + """Set current keyframe. Load TiffPage from file if necessary.""" + if self._keyframe.index == index: + return + if self.complete or 0 <= index < len(self.pages): + page = self.pages[index] + if isinstance(page, TiffPage): + self._keyframe = page + return + elif isinstance(page, TiffFrame): + # remove existing frame + self.pages[index] = page.offset + # load TiffPage from file + useframes = self.useframes + self._tiffpage = TiffPage + self._keyframe = self[index] + self.useframes = useframes + + @property + def next_page_offset(self): + """Return offset where offset to a new page can be stored.""" + if not self.complete: + self._seek(-1) + return self._nextpageoffset + + def load(self): + """Read all remaining pages from file.""" + fh = self.parent.filehandle + keyframe = self._keyframe + pages = self.pages + if not self.complete: + self._seek(-1) + for i, page in enumerate(pages): + if isinstance(page, inttypes): + fh.seek(page) + page = self._tiffpage(self.parent, index=i, keyframe=keyframe) + pages[i] = page + + def clear(self, fully=True): + """Delete all but first page from cache. Set keyframe to first page.""" + pages = self.pages + if not self._cache or len(pages) < 1: + return + self._keyframe = pages[0] + if fully: + # delete all but first TiffPage/TiffFrame + for i, page in enumerate(pages[1:]): + if not isinstance(page, inttypes): + pages[i + 1] = page.offset + elif TiffFrame is not TiffPage: + # delete only TiffFrames + for i, page in enumerate(pages): + if isinstance(page, TiffFrame): + pages[i] = page.offset + + def _seek(self, index, maxpages=2**22): + """Seek file to offset of specified page.""" + pages = self.pages + if not pages: + return + + fh = self.parent.filehandle + if fh.closed: + raise RuntimeError("FileHandle is closed") + + if self.complete or 0 <= index < len(pages): + page = pages[index] + offset = page if isinstance(page, inttypes) else page.offset + fh.seek(offset) + return + + offsetformat = self.parent.offsetformat + offsetsize = self.parent.offsetsize + tagnoformat = self.parent.tagnoformat + tagnosize = self.parent.tagnosize + tagsize = self.parent.tagsize + unpack = struct.unpack + + page = pages[-1] + offset = page if isinstance(page, inttypes) else page.offset + + while len(pages) < maxpages: + # read offsets to pages from file until index is reached + fh.seek(offset) + # skip tags + try: + tagno = unpack(tagnoformat, fh.read(tagnosize))[0] + if tagno > 4096: + raise ValueError("suspicious number of tags") + except Exception: + warnings.warn("corrupted tag list at offset %i" % offset) + del pages[-1] + self.complete = True + break + self._nextpageoffset = offset + tagnosize + tagno * tagsize + fh.seek(self._nextpageoffset) + + # read offset to next page + offset = unpack(offsetformat, fh.read(offsetsize))[0] + if offset == 0: + self.complete = True + break + if offset >= fh.size: + warnings.warn("invalid page offset (%i)" % offset) + self.complete = True + break + + pages.append(offset) + if 0 <= index < len(pages): + break + + if index >= len(pages): + raise IndexError("list index out of range") + + page = pages[index] + fh.seek(page if isinstance(page, inttypes) else page.offset) + + def __bool__(self): + """Return True if file contains any pages.""" + return len(self.pages) > 0 + + def __len__(self): + """Return number of pages in file.""" + if not self.complete: + self._seek(-1) + return len(self.pages) + + def __getitem__(self, key): + """Return specified page(s) from cache or file.""" + pages = self.pages + if not pages: + raise IndexError("list index out of range") + if key == 0: + return pages[key] + + if isinstance(key, slice): + start, stop, _ = key.indices(2**31 - 1) + if not self.complete and max(stop, start) > len(pages): + self._seek(-1) + return [self[i] for i in range(*key.indices(len(pages)))] + + if self.complete and key >= len(pages): + raise IndexError("list index out of range") + + try: + page = pages[key] + except IndexError: + page = 0 + if not isinstance(page, inttypes): + return page + + self._seek(key) + page = self._tiffpage(self.parent, index=key, keyframe=self._keyframe) + if self._cache: + pages[key] = page + return page + + def __iter__(self): + """Return iterator over all pages.""" + i = 0 + while True: + try: + yield self[i] + i += 1 + except IndexError: + break + + +class TiffPage(object): + """TIFF image file directory (IFD). + + Attributes + ---------- + index : int + Index of page in file. + dtype : numpy.dtype or None + Data type (native byte order) of the image in IFD. + shape : tuple + Dimensions of the image in IFD. + axes : str + Axes label codes: + 'X' width, 'Y' height, 'S' sample, 'I' image series|page|plane, + 'Z' depth, 'C' color|em-wavelength|channel, 'E' ex-wavelength|lambda, + 'T' time, 'R' region|tile, 'A' angle, 'P' phase, 'H' lifetime, + 'L' exposure, 'V' event, 'Q' unknown, '_' missing + tags : dict + Dictionary of tags in IFD. {tag.name: TiffTag} + colormap : numpy.ndarray + Color look up table, if exists. + + All attributes are read-only. + + Notes + ----- + The internal, normalized '_shape' attribute is 6 dimensional: + + 0 : number planes/images (stk, ij). + 1 : planar samplesperpixel. + 2 : imagedepth Z (sgi). + 3 : imagelength Y. + 4 : imagewidth X. + 5 : contig samplesperpixel. + + """ + + # default properties; will be updated from tags + imagewidth = 0 + imagelength = 0 + imagedepth = 1 + tilewidth = 0 + tilelength = 0 + tiledepth = 1 + bitspersample = 1 + samplesperpixel = 1 + sampleformat = 1 + rowsperstrip = 2**32 - 1 + compression = 1 + planarconfig = 1 + fillorder = 1 + photometric = 0 + predictor = 1 + extrasamples = 1 + colormap = None + software = "" + description = "" + description1 = "" + + def __init__(self, parent, index, keyframe=None): + """Initialize instance from file. + + The file handle position must be at offset to a valid IFD. + + """ + self.parent = parent + self.index = index + self.shape = () + self._shape = () + self.dtype = None + self._dtype = None + self.axes = "" + self.tags = {} + + self.dataoffsets = () + self.databytecounts = () + + # read TIFF IFD structure and its tags from file + fh = parent.filehandle + self.offset = fh.tell() # offset to this IFD + try: + tagno = struct.unpack(parent.tagnoformat, fh.read(parent.tagnosize))[0] + if tagno > 4096: + raise ValueError("suspicious number of tags") + except Exception: + raise ValueError("corrupted tag list at offset %i" % self.offset) + + tagsize = parent.tagsize + data = fh.read(tagsize * tagno) + tags = self.tags + index = -tagsize + for _ in range(tagno): + index += tagsize + try: + tag = TiffTag(self.parent, data[index : index + tagsize]) + except TiffTag.Error as e: + warnings.warn(str(e)) + continue + tagname = tag.name + if tagname not in tags: + name = tagname + tags[name] = tag + else: + # some files contain multiple tags with same code + # e.g. MicroManager files contain two ImageDescription tags + i = 1 + while True: + name = "%s%i" % (tagname, i) + if name not in tags: + tags[name] = tag + break + name = TIFF.TAG_ATTRIBUTES.get(name, "") + if name: + if name[:3] in "sof des" and not isinstance(tag.value, str): + pass # wrong string type for software, description + else: + setattr(self, name, tag.value) + + if not tags: + return # found in FIBICS + + # consolidate private tags; remove them from self.tags + if self.is_andor: + self.andor_tags + elif self.is_epics: + self.epics_tags + + if self.is_lsm or (self.index and self.parent.is_lsm): + # correct non standard LSM bitspersample tags + self.tags["BitsPerSample"]._fix_lsm_bitspersample(self) + + if self.is_vista or (self.index and self.parent.is_vista): + # ISS Vista writes wrong ImageDepth tag + self.imagedepth = 1 + + if self.is_stk and "UIC1tag" in tags and not tags["UIC1tag"].value: + # read UIC1tag now that plane count is known + uic1tag = tags["UIC1tag"] + fh.seek(uic1tag.valueoffset) + tags["UIC1tag"].value = read_uic1tag( + fh, + self.parent.byteorder, + uic1tag.dtype, + uic1tag.count, + None, + tags["UIC2tag"].count, + ) + + if "IJMetadata" in tags: + # decode IJMetadata tag + try: + tags["IJMetadata"].value = imagej_metadata( + tags["IJMetadata"].value, + tags["IJMetadataByteCounts"].value, + self.parent.byteorder, + ) + except Exception as e: + warnings.warn(str(e)) + + if "BitsPerSample" in tags: + tag = tags["BitsPerSample"] + if tag.count == 1: + self.bitspersample = tag.value + else: + # LSM might list more items than samplesperpixel + value = tag.value[: self.samplesperpixel] + if any((v - value[0] for v in value)): + self.bitspersample = value + else: + self.bitspersample = value[0] + + if "SampleFormat" in tags: + tag = tags["SampleFormat"] + if tag.count == 1: + self.sampleformat = tag.value + else: + value = tag.value[: self.samplesperpixel] + if any((v - value[0] for v in value)): + self.sampleformat = value + else: + self.sampleformat = value[0] + + if "ImageLength" in tags: + if "RowsPerStrip" not in tags or tags["RowsPerStrip"].count > 1: + self.rowsperstrip = self.imagelength + # self.stripsperimage = int(math.floor( + # float(self.imagelength + self.rowsperstrip - 1) / + # self.rowsperstrip)) + + # determine dtype + dtype = self.sampleformat, self.bitspersample + dtype = TIFF.SAMPLE_DTYPES.get(dtype, None) + if dtype is not None: + dtype = numpy.dtype(dtype) + self.dtype = self._dtype = dtype + + # determine shape of data + imagelength = self.imagelength + imagewidth = self.imagewidth + imagedepth = self.imagedepth + samplesperpixel = self.samplesperpixel + + if self.is_stk: + assert self.imagedepth == 1 + uictag = tags["UIC2tag"].value + planes = tags["UIC2tag"].count + if self.planarconfig == 1: + self._shape = (planes, 1, 1, imagelength, imagewidth, samplesperpixel) + if samplesperpixel == 1: + self.shape = (planes, imagelength, imagewidth) + self.axes = "YX" + else: + self.shape = (planes, imagelength, imagewidth, samplesperpixel) + self.axes = "YXS" + else: + self._shape = (planes, samplesperpixel, 1, imagelength, imagewidth, 1) + if samplesperpixel == 1: + self.shape = (planes, imagelength, imagewidth) + self.axes = "YX" + else: + self.shape = (planes, samplesperpixel, imagelength, imagewidth) + self.axes = "SYX" + # detect type of series + if planes == 1: + self.shape = self.shape[1:] + elif numpy.all(uictag["ZDistance"] != 0): + self.axes = "Z" + self.axes + elif numpy.all(numpy.diff(uictag["TimeCreated"]) != 0): + self.axes = "T" + self.axes + else: + self.axes = "I" + self.axes + elif self.photometric == 2 or samplesperpixel > 1: # PHOTOMETRIC.RGB + if self.planarconfig == 1: + self._shape = ( + 1, + 1, + imagedepth, + imagelength, + imagewidth, + samplesperpixel, + ) + if imagedepth == 1: + self.shape = (imagelength, imagewidth, samplesperpixel) + self.axes = "YXS" + else: + self.shape = (imagedepth, imagelength, imagewidth, samplesperpixel) + self.axes = "ZYXS" + else: + self._shape = ( + 1, + samplesperpixel, + imagedepth, + imagelength, + imagewidth, + 1, + ) + if imagedepth == 1: + self.shape = (samplesperpixel, imagelength, imagewidth) + self.axes = "SYX" + else: + self.shape = (samplesperpixel, imagedepth, imagelength, imagewidth) + self.axes = "SZYX" + else: + self._shape = (1, 1, imagedepth, imagelength, imagewidth, 1) + if imagedepth == 1: + self.shape = (imagelength, imagewidth) + self.axes = "YX" + else: + self.shape = (imagedepth, imagelength, imagewidth) + self.axes = "ZYX" + + # dataoffsets and databytecounts + if "TileOffsets" in tags: + self.dataoffsets = tags["TileOffsets"].value + elif "StripOffsets" in tags: + self.dataoffsets = tags["StripOffsets"].value + else: + self.dataoffsets = (0,) + + if "TileByteCounts" in tags: + self.databytecounts = tags["TileByteCounts"].value + elif "StripByteCounts" in tags: + self.databytecounts = tags["StripByteCounts"].value + else: + self.databytecounts = (product(self.shape) * (self.bitspersample // 8),) + if self.compression != 1: + warnings.warn("required ByteCounts tag is missing") + + assert len(self.shape) == len(self.axes) + + def asarray( + self, + out=None, + squeeze=True, + lock=None, + reopen=True, + maxsize=2**44, + validate=True, + ): + """Read image data from file and return as numpy array. + + Raise ValueError if format is unsupported. + + Parameters + ---------- + out : numpy.ndarray, str, or file-like object; optional + Buffer where image data will be saved. + If None (default), a new array will be created. + If numpy.ndarray, a writable array of compatible dtype and shape. + If 'memmap', directly memory-map the image data in the TIFF file + if possible; else create a memory-mapped array in a temporary file. + If str or open file, the file name or file object used to + create a memory-map to an array stored in a binary file on disk. + squeeze : bool + If True, all length-1 dimensions (except X and Y) are + squeezed out from the array. + If False, the shape of the returned array might be different from + the page.shape. + lock : {RLock, NullContext} + A reentrant lock used to synchronize reads from file. + If None (default), the lock of the parent's filehandle is used. + reopen : bool + If True (default) and the parent file handle is closed, the file + is temporarily re-opened and closed if no exception occurs. + maxsize: int or None + Maximum size of data before a ValueError is raised. + Can be used to catch DOS. Default: 16 TB. + validate : bool + If True (default), validate various parameters. + If None, only validate parameters and return None. + + """ + self_ = self + self = self.keyframe # self or keyframe + + if not self._shape or product(self._shape) == 0: + return + + tags = self.tags + + if validate or validate is None: + if maxsize and product(self._shape) > maxsize: + raise ValueError("data are too large %s" % str(self._shape)) + if self.dtype is None: + raise ValueError( + "data type not supported: %s%i" + % (self.sampleformat, self.bitspersample) + ) + if self.compression not in TIFF.DECOMPESSORS: + raise ValueError("cannot decompress %s" % self.compression.name) + if "SampleFormat" in tags: + tag = tags["SampleFormat"] + if tag.count != 1 and any((i - tag.value[0] for i in tag.value)): + raise ValueError("sample formats do not match %s" % tag.value) + if self.is_chroma_subsampled and ( + self.compression != 7 or self.planarconfig == 2 + ): + raise NotImplementedError("chroma subsampling not supported") + if validate is None: + return + + fh = self_.parent.filehandle + lock = fh.lock if lock is None else lock + with lock: + closed = fh.closed + if closed: + if reopen: + fh.open() + else: + raise IOError("file handle is closed") + + dtype = self._dtype + shape = self._shape + imagewidth = self.imagewidth + imagelength = self.imagelength + imagedepth = self.imagedepth + bitspersample = self.bitspersample + typecode = self.parent.byteorder + dtype.char + lsb2msb = self.fillorder == 2 + offsets, bytecounts = self_.offsets_bytecounts + istiled = self.is_tiled + + if istiled: + tilewidth = self.tilewidth + tilelength = self.tilelength + tiledepth = self.tiledepth + tw = (imagewidth + tilewidth - 1) // tilewidth + tl = (imagelength + tilelength - 1) // tilelength + td = (imagedepth + tiledepth - 1) // tiledepth + shape = ( + shape[0], + shape[1], + td * tiledepth, + tl * tilelength, + tw * tilewidth, + shape[-1], + ) + tileshape = (tiledepth, tilelength, tilewidth, shape[-1]) + runlen = tilewidth + else: + runlen = imagewidth + + if self.planarconfig == 1: + runlen *= self.samplesperpixel + + if out == "memmap" and self.is_memmappable: + with lock: + result = fh.memmap_array(typecode, shape, offset=offsets[0]) + elif self.is_contiguous: + if out is not None: + out = create_output(out, shape, dtype) + with lock: + fh.seek(offsets[0]) + result = fh.read_array(typecode, product(shape), out=out) + if out is None and not result.dtype.isnative: + # swap byte order and dtype without copy + result.byteswap(True) + result = result.view(result.dtype.newbyteorder()) + if lsb2msb: + reverse_bitorder(result) + else: + result = create_output(out, shape, dtype) + + decompress = TIFF.DECOMPESSORS[self.compression] + + if self.compression == 7: # COMPRESSION.JPEG + if bitspersample not in (8, 12): + raise ValueError("unsupported JPEG precision %i" % bitspersample) + if "JPEGTables" in tags: + table = tags["JPEGTables"].value + else: + table = b"" + unpack = identityfunc + colorspace = TIFF.PHOTOMETRIC(self.photometric).name + + def decompress( + x, + func=decompress, + table=table, + bitspersample=bitspersample, + colorspace=colorspace, + ): + return func(x, table, bitspersample, colorspace).reshape(-1) + + elif bitspersample in (8, 16, 32, 64, 128): + if (bitspersample * runlen) % 8: + raise ValueError("data and sample size mismatch") + + def unpack(x, typecode=typecode): + if self.predictor == 3: # PREDICTOR.FLOATINGPOINT + # the floating point horizontal differencing decoder + # needs the raw byte order + typecode = dtype.char + try: + # read only numpy array + return numpy.frombuffer(x, typecode) + except ValueError: + # strips may be missing EOI + # warnings.warn('unpack: %s' % e) + xlen = (len(x) // (bitspersample // 8)) * (bitspersample // 8) + return numpy.frombuffer(x[:xlen], typecode) + + elif isinstance(bitspersample, tuple): + + def unpack(x, typecode=typecode, bitspersample=bitspersample): + return unpack_rgb(x, typecode, bitspersample) + + else: + + def unpack( + x, typecode=typecode, bitspersample=bitspersample, runlen=runlen + ): + return unpack_ints(x, typecode, bitspersample, runlen) + + if istiled: + writable = None + tw, tl, td, pl = 0, 0, 0, 0 + for tile in buffered_read(fh, lock, offsets, bytecounts): + if lsb2msb: + tile = reverse_bitorder(tile) + tile = decompress(tile) + tile = unpack(tile) + try: + tile.shape = tileshape + except ValueError: + # incomplete tiles; see gdal issue #1179 + warnings.warn("invalid tile data") + t = numpy.zeros(tileshape, dtype).reshape(-1) + s = min(tile.size, t.size) + t[:s] = tile[:s] + tile = t.reshape(tileshape) + if self.predictor == 2: # PREDICTOR.HORIZONTAL + if writable is None: + writable = tile.flags["WRITEABLE"] + if writable: + numpy.cumsum(tile, axis=-2, dtype=dtype, out=tile) + else: + tile = numpy.cumsum(tile, axis=-2, dtype=dtype) + elif self.predictor == 3: # PREDICTOR.FLOATINGPOINT + raise NotImplementedError() + result[ + 0, + pl, + td : td + tiledepth, + tl : tl + tilelength, + tw : tw + tilewidth, + :, + ] = tile + del tile + tw += tilewidth + if tw >= shape[4]: + tw, tl = 0, tl + tilelength + if tl >= shape[3]: + tl, td = 0, td + tiledepth + if td >= shape[2]: + td, pl = 0, pl + 1 + result = result[..., :imagedepth, :imagelength, :imagewidth, :] + else: + strip_size = self.rowsperstrip * self.imagewidth + if self.planarconfig == 1: + strip_size *= self.samplesperpixel + result = result.reshape(-1) + index = 0 + for strip in buffered_read(fh, lock, offsets, bytecounts): + if lsb2msb: + strip = reverse_bitorder(strip) + strip = decompress(strip) + strip = unpack(strip) + size = min(result.size, strip.size, strip_size, result.size - index) + result[index : index + size] = strip[:size] + del strip + index += size + + result.shape = self._shape + + if self.predictor != 1 and not (istiled and not self.is_contiguous): + if self.parent.is_lsm and self.compression == 1: + pass # work around bug in LSM510 software + elif self.predictor == 2: # PREDICTOR.HORIZONTAL + numpy.cumsum(result, axis=-2, dtype=dtype, out=result) + elif self.predictor == 3: # PREDICTOR.FLOATINGPOINT + result = decode_floats(result) + + if squeeze: + try: + result.shape = self.shape + except ValueError: + warnings.warn( + "failed to reshape from %s to %s" + % (str(result.shape), str(self.shape)) + ) + + if closed: + # TODO: file should remain open if an exception occurred above + fh.close() + return result + + def asrgb( + self, + uint8=False, + alpha=None, + colormap=None, + dmin=None, + dmax=None, + *args, + **kwargs, + ): + """Return image data as RGB(A). + + Work in progress. + + """ + data = self.asarray(*args, **kwargs) + self = self.keyframe # self or keyframe + photometric = self.photometric + PHOTOMETRIC = TIFF.PHOTOMETRIC + + if photometric == PHOTOMETRIC.PALETTE: + colormap = self.colormap + if colormap.shape[1] < 2**self.bitspersample or self.dtype.char not in "BH": + raise ValueError("cannot apply colormap") + if uint8: + if colormap.max() > 255: + colormap >>= 8 + colormap = colormap.astype("uint8") + if "S" in self.axes: + data = data[..., 0] if self.planarconfig == 1 else data[0] + data = apply_colormap(data, colormap) + + elif photometric == PHOTOMETRIC.RGB: + if "ExtraSamples" in self.tags: + if alpha is None: + alpha = TIFF.EXTRASAMPLE + extrasamples = self.extrasamples + if self.tags["ExtraSamples"].count == 1: + extrasamples = (extrasamples,) + for i, exs in enumerate(extrasamples): + if exs in alpha: + if self.planarconfig == 1: + data = data[..., [0, 1, 2, 3 + i]] + else: + data = data[:, [0, 1, 2, 3 + i]] + break + else: + if self.planarconfig == 1: + data = data[..., :3] + else: + data = data[:, :3] + # TODO: convert to uint8? + + elif photometric == PHOTOMETRIC.MINISBLACK: + raise NotImplementedError() + elif photometric == PHOTOMETRIC.MINISWHITE: + raise NotImplementedError() + elif photometric == PHOTOMETRIC.SEPARATED: + raise NotImplementedError() + else: + raise NotImplementedError() + return data + + def aspage(self): + return self + + @property + def keyframe(self): + return self + + @keyframe.setter + def keyframe(self, index): + return + + @lazyattr + def offsets_bytecounts(self): + """Return simplified offsets and bytecounts.""" + if self.is_contiguous: + offset, byte_count = self.is_contiguous + return [offset], [byte_count] + return clean_offsets_counts(self.dataoffsets, self.databytecounts) + + @lazyattr + def is_contiguous(self): + """Return offset and size of contiguous data, else None. + + Excludes prediction and fill_order. + + """ + if self.compression != 1 or self.bitspersample not in (8, 16, 32, 64): + return + if "TileWidth" in self.tags: + if ( + self.imagewidth != self.tilewidth + or self.imagelength % self.tilelength + or self.tilewidth % 16 + or self.tilelength % 16 + ): + return + if ( + "ImageDepth" in self.tags + and "TileDepth" in self.tags + and ( + self.imagelength != self.tilelength + or self.imagedepth % self.tiledepth + ) + ): + return + + offsets = self.dataoffsets + bytecounts = self.databytecounts + if len(offsets) == 1: + return offsets[0], bytecounts[0] + if self.is_stk or all( + ( + offsets[i] + bytecounts[i] == offsets[i + 1] or bytecounts[i + 1] == 0 + ) # no data/ignore offset + for i in range(len(offsets) - 1) + ): + return offsets[0], sum(bytecounts) + + @lazyattr + def is_final(self): + """Return if page's image data are stored in final form. + + Excludes byte-swapping. + + """ + return ( + self.is_contiguous + and self.fillorder == 1 + and self.predictor == 1 + and not self.is_chroma_subsampled + ) + + @lazyattr + def is_memmappable(self): + """Return if page's image data in file can be memory-mapped.""" + return ( + self.parent.filehandle.is_file + and self.is_final + and + # (self.bitspersample == 8 or self.parent.isnative) and + self.is_contiguous[0] % self.dtype.itemsize == 0 + ) # aligned? + + def __str__(self, detail=0, width=79): + """Return string containing information about page.""" + if self.keyframe != self: + return TiffFrame.__str__(self, detail) + attr = "" + for name in ("memmappable", "final", "contiguous"): + attr = getattr(self, "is_" + name) + if attr: + attr = name.upper() + break + info = " ".join( + s + for s in ( + "x".join(str(i) for i in self.shape), + "%s%s" + % (TIFF.SAMPLEFORMAT(self.sampleformat).name, self.bitspersample), + "|".join( + i + for i in ( + TIFF.PHOTOMETRIC(self.photometric).name, + "TILED" if self.is_tiled else "", + self.compression.name if self.compression != 1 else "", + self.planarconfig.name if self.planarconfig != 1 else "", + self.predictor.name if self.predictor != 1 else "", + self.fillorder.name if self.fillorder != 1 else "", + ) + if i + ), + attr, + "|".join((f.upper() for f in self.flags)), + ) + if s + ) + info = "TiffPage %i @%i %s" % (self.index, self.offset, info) + if detail <= 0: + return info + info = [info] + tags = self.tags + tlines = [] + vlines = [] + for tag in sorted(tags.values(), key=lambda x: x.code): + value = tag.__str__(width=width + 1) + tlines.append(value[:width].strip()) + if detail > 1 and len(value) > width: + name = tag.name.upper() + if detail <= 2 and ("COUNTS" in name or "OFFSETS" in name): + value = pformat(tag.value, width=width, height=detail * 4) + else: + value = pformat(tag.value, width=width, height=detail * 12) + vlines.append("%s\n%s" % (tag.name, value)) + info.append("\n".join(tlines)) + if detail > 1: + info.append("\n\n".join(vlines)) + if detail > 3: + try: + info.append( + "DATA\n%s" % pformat(self.asarray(), width=width, height=detail * 8) + ) + except Exception: + pass + return "\n\n".join(info) + + @lazyattr + def flags(self): + """Return set of flags.""" + return set( + ( + name.lower() + for name in sorted(TIFF.FILE_FLAGS) + if getattr(self, "is_" + name) + ) + ) + + @property + def ndim(self): + """Return number of array dimensions.""" + return len(self.shape) + + @property + def size(self): + """Return number of elements in array.""" + return product(self.shape) + + @lazyattr + def andor_tags(self): + """Return consolidated metadata from Andor tags as dict. + + Remove Andor tags from self.tags. + + """ + if not self.is_andor: + return + tags = self.tags + result = {"Id": tags["AndorId"].value} + for tag in list(self.tags.values()): + code = tag.code + if not 4864 < code < 5031: + continue + value = tag.value + name = tag.name[5:] if len(tag.name) > 5 else tag.name + result[name] = value + del tags[tag.name] + return result + + @lazyattr + def epics_tags(self): + """Return consolidated metadata from EPICS areaDetector tags as dict. + + Remove areaDetector tags from self.tags. + + """ + if not self.is_epics: + return + result = {} + tags = self.tags + for tag in list(self.tags.values()): + code = tag.code + if not 65000 <= code < 65500: + continue + value = tag.value + if code == 65000: + result["timeStamp"] = datetime.datetime.fromtimestamp(float(value)) + elif code == 65001: + result["uniqueID"] = int(value) + elif code == 65002: + result["epicsTSSec"] = int(value) + elif code == 65003: + result["epicsTSNsec"] = int(value) + else: + key, value = value.split(":", 1) + result[key] = astype(value) + del tags[tag.name] + return result + + @lazyattr + def geotiff_tags(self): + """Return consolidated metadata from GeoTIFF tags as dict.""" + if not self.is_geotiff: + return + tags = self.tags + + gkd = tags["GeoKeyDirectoryTag"].value + if gkd[0] != 1: + warnings.warn("invalid GeoKeyDirectoryTag") + return {} + + result = { + "KeyDirectoryVersion": gkd[0], + "KeyRevision": gkd[1], + "KeyRevisionMinor": gkd[2], + # 'NumberOfKeys': gkd[3], + } + # deltags = ['GeoKeyDirectoryTag'] + geokeys = TIFF.GEO_KEYS + geocodes = TIFF.GEO_CODES + for index in range(gkd[3]): + keyid, tagid, count, offset = gkd[4 + index * 4 : index * 4 + 8] + keyid = geokeys.get(keyid, keyid) + if tagid == 0: + value = offset + else: + tagname = TIFF.TAGS[tagid] + # deltags.append(tagname) + value = tags[tagname].value[offset : offset + count] + if tagid == 34737 and count > 1 and value[-1] == "|": + value = value[:-1] + value = value if count > 1 else value[0] + if keyid in geocodes: + try: + value = geocodes[keyid](value) + except Exception: + pass + result[keyid] = value + + if "IntergraphMatrixTag" in tags: + value = tags["IntergraphMatrixTag"].value + value = numpy.array(value) + if len(value) == 16: + value = value.reshape((4, 4)).tolist() + result["IntergraphMatrix"] = value + if "ModelPixelScaleTag" in tags: + value = numpy.array(tags["ModelPixelScaleTag"].value).tolist() + result["ModelPixelScale"] = value + if "ModelTiepointTag" in tags: + value = tags["ModelTiepointTag"].value + value = numpy.array(value).reshape((-1, 6)).squeeze().tolist() + result["ModelTiepoint"] = value + if "ModelTransformationTag" in tags: + value = tags["ModelTransformationTag"].value + value = numpy.array(value).reshape((4, 4)).tolist() + result["ModelTransformation"] = value + elif False: + # if 'ModelPixelScaleTag' in tags and 'ModelTiepointTag' in tags: + sx, sy, sz = tags["ModelPixelScaleTag"].value + tiepoints = tags["ModelTiepointTag"].value + transforms = [] + for tp in range(0, len(tiepoints), 6): + i, j, k, x, y, z = tiepoints[tp : tp + 6] + transforms.append( + [ + [sx, 0.0, 0.0, x - i * sx], + [0.0, -sy, 0.0, y + j * sy], + [0.0, 0.0, sz, z - k * sz], + [0.0, 0.0, 0.0, 1.0], + ] + ) + if len(tiepoints) == 6: + transforms = transforms[0] + result["ModelTransformation"] = transforms + + if "RPCCoefficientTag" in tags: + rpcc = tags["RPCCoefficientTag"].value + result["RPCCoefficient"] = { + "ERR_BIAS": rpcc[0], + "ERR_RAND": rpcc[1], + "LINE_OFF": rpcc[2], + "SAMP_OFF": rpcc[3], + "LAT_OFF": rpcc[4], + "LONG_OFF": rpcc[5], + "HEIGHT_OFF": rpcc[6], + "LINE_SCALE": rpcc[7], + "SAMP_SCALE": rpcc[8], + "LAT_SCALE": rpcc[9], + "LONG_SCALE": rpcc[10], + "HEIGHT_SCALE": rpcc[11], + "LINE_NUM_COEFF": rpcc[12:33], + "LINE_DEN_COEFF ": rpcc[33:53], + "SAMP_NUM_COEFF": rpcc[53:73], + "SAMP_DEN_COEFF": rpcc[73:], + } + + return result + + @property + def is_tiled(self): + """Page contains tiled image.""" + return "TileWidth" in self.tags + + @property + def is_reduced(self): + """Page is reduced image of another image.""" + return "NewSubfileType" in self.tags and self.tags["NewSubfileType"].value & 1 + + @property + def is_chroma_subsampled(self): + """Page contains chroma subsampled image.""" + return "YCbCrSubSampling" in self.tags and self.tags[ + "YCbCrSubSampling" + ].value != (1, 1) + + @lazyattr + def is_imagej(self): + """Return ImageJ description if exists, else None.""" + for description in (self.description, self.description1): + if not description: + return + if description[:7] == "ImageJ=": + return description + + @lazyattr + def is_shaped(self): + """Return description containing array shape if exists, else None.""" + for description in (self.description, self.description1): + if not description: + return + if description[:1] == "{" and '"shape":' in description: + return description + if description[:6] == "shape=": + return description + + @property + def is_mdgel(self): + """Page contains MDFileTag tag.""" + return "MDFileTag" in self.tags + + @property + def is_mediacy(self): + """Page contains Media Cybernetics Id tag.""" + return "MC_Id" in self.tags and self.tags["MC_Id"].value[:7] == b"MC TIFF" + + @property + def is_stk(self): + """Page contains UIC2Tag tag.""" + return "UIC2tag" in self.tags + + @property + def is_lsm(self): + """Page contains CZ_LSMINFO tag.""" + return "CZ_LSMINFO" in self.tags + + @property + def is_fluoview(self): + """Page contains FluoView MM_STAMP tag.""" + return "MM_Stamp" in self.tags + + @property + def is_nih(self): + """Page contains NIH image header.""" + return "NIHImageHeader" in self.tags + + @property + def is_sgi(self): + """Page contains SGI image and tile depth tags.""" + return "ImageDepth" in self.tags and "TileDepth" in self.tags + + @property + def is_vista(self): + """Software tag is 'ISS Vista'.""" + return self.software == "ISS Vista" + + @property + def is_metaseries(self): + """Page contains MDS MetaSeries metadata in ImageDescription tag.""" + if self.index > 1 or self.software != "MetaSeries": + return False + d = self.description + return d.startswith("") and d.endswith("") + + @property + def is_ome(self): + """Page contains OME-XML in ImageDescription tag.""" + if self.index > 1 or not self.description: + return False + d = self.description + return d[:14] == "" + + @property + def is_scn(self): + """Page contains Leica SCN XML in ImageDescription tag.""" + if self.index > 1 or not self.description: + return False + d = self.description + return d[:14] == "" + + @property + def is_micromanager(self): + """Page contains Micro-Manager metadata.""" + return "MicroManagerMetadata" in self.tags + + @property + def is_andor(self): + """Page contains Andor Technology tags.""" + return "AndorId" in self.tags + + @property + def is_pilatus(self): + """Page contains Pilatus tags.""" + return self.software[:8] == "TVX TIFF" and self.description[:2] == "# " + + @property + def is_epics(self): + """Page contains EPICS areaDetector tags.""" + return ( + self.description == "EPICS areaDetector" + or self.software == "EPICS areaDetector" + ) + + @property + def is_tvips(self): + """Page contains TVIPS metadata.""" + return "TVIPS" in self.tags + + @property + def is_fei(self): + """Page contains SFEG or HELIOS metadata.""" + return "FEI_SFEG" in self.tags or "FEI_HELIOS" in self.tags + + @property + def is_sem(self): + """Page contains Zeiss SEM metadata.""" + return "CZ_SEM" in self.tags + + @property + def is_svs(self): + """Page contains Aperio metadata.""" + return self.description[:20] == "Aperio Image Library" + + @property + def is_scanimage(self): + """Page contains ScanImage metadata.""" + return ( + self.description[:12] == "state.config" + or self.software[:22] == "SI.LINE_FORMAT_VERSION" + or "scanimage.SI." in self.description[-256:] + ) + + @property + def is_qptiff(self): + """Page contains PerkinElmer tissue images metadata.""" + # The ImageDescription tag contains XML with a top-level + # element + return self.software[:15] == "PerkinElmer-QPI" + + @property + def is_geotiff(self): + """Page contains GeoTIFF metadata.""" + return "GeoKeyDirectoryTag" in self.tags + + +class TiffFrame(object): + """Lightweight TIFF image file directory (IFD). + + Only a limited number of tag values are read from file, e.g. StripOffsets, + and StripByteCounts. Other tag values are assumed to be identical with a + specified TiffPage instance, the keyframe. + + TiffFrame is intended to reduce resource usage and speed up reading data + from file, not for introspection of metadata. + + Not compatible with Python 2. + + """ + + __slots__ = ( + "keyframe", + "parent", + "index", + "offset", + "dataoffsets", + "databytecounts", + ) + + is_mdgel = False + tags = {} + + def __init__(self, parent, index, keyframe): + """Read specified tags from file. + + The file handle position must be at the offset to a valid IFD. + + """ + self.keyframe = keyframe + self.parent = parent + self.index = index + self.dataoffsets = None + self.databytecounts = None + + unpack = struct.unpack + fh = parent.filehandle + self.offset = fh.tell() + try: + tagno = unpack(parent.tagnoformat, fh.read(parent.tagnosize))[0] + if tagno > 4096: + raise ValueError("suspicious number of tags") + except Exception: + raise ValueError("corrupted page list at offset %i" % self.offset) + + # tags = {} + tagcodes = {273, 279, 324, 325} # TIFF.FRAME_TAGS + tagsize = parent.tagsize + codeformat = parent.tagformat1[:2] + + data = fh.read(tagsize * tagno) + index = -tagsize + for _ in range(tagno): + index += tagsize + code = unpack(codeformat, data[index : index + 2])[0] + if code not in tagcodes: + continue + try: + tag = TiffTag(parent, data[index : index + tagsize]) + except TiffTag.Error as e: + warnings.warn(str(e)) + continue + if code == 273 or code == 324: + setattr(self, "dataoffsets", tag.value) + elif code == 279 or code == 325: + setattr(self, "databytecounts", tag.value) + # elif code == 270: + # tagname = tag.name + # if tagname not in tags: + # tags[tagname] = bytes2str(tag.value) + # elif 'ImageDescription1' not in tags: + # tags['ImageDescription1'] = bytes2str(tag.value) + # else: + # tags[tag.name] = tag.value + + def aspage(self): + """Return TiffPage from file.""" + self.parent.filehandle.seek(self.offset) + return TiffPage(self.parent, index=self.index, keyframe=None) + + def asarray(self, *args, **kwargs): + """Read image data from file and return as numpy array.""" + # TODO: fix TypeError on Python 2 + # "TypeError: unbound method asarray() must be called with TiffPage + # instance as first argument (got TiffFrame instance instead)" + kwargs["validate"] = False + return TiffPage.asarray(self, *args, **kwargs) + + def asrgb(self, *args, **kwargs): + """Read image data from file and return RGB image as numpy array.""" + kwargs["validate"] = False + return TiffPage.asrgb(self, *args, **kwargs) + + @property + def offsets_bytecounts(self): + """Return simplified offsets and bytecounts.""" + if self.keyframe.is_contiguous: + return self.dataoffsets[:1], self.keyframe.is_contiguous[1:] + return clean_offsets_counts(self.dataoffsets, self.databytecounts) + + @property + def is_contiguous(self): + """Return offset and size of contiguous data, else None.""" + if self.keyframe.is_contiguous: + return self.dataoffsets[0], self.keyframe.is_contiguous[1] + + @property + def is_memmappable(self): + """Return if page's image data in file can be memory-mapped.""" + return self.keyframe.is_memmappable + + def __getattr__(self, name): + """Return attribute from keyframe.""" + if name in TIFF.FRAME_ATTRS: + return getattr(self.keyframe, name) + # this error could be raised because an AttributeError was + # raised inside a @property function + raise AttributeError( + "'%s' object has no attribute '%s'" % (self.__class__.__name__, name) + ) + + def __str__(self, detail=0): + """Return string containing information about frame.""" + info = " ".join( + s for s in ("x".join(str(i) for i in self.shape), str(self.dtype)) + ) + return "TiffFrame %i @%i %s" % (self.index, self.offset, info) + + +class TiffTag(object): + """TIFF tag structure. + + Attributes + ---------- + name : string + Name of tag. + code : int + Decimal code of tag. + dtype : str + Datatype of tag data. One of TIFF DATA_FORMATS. + count : int + Number of values. + value : various types + Tag data as Python object. + ImageSourceData : int + Location of value in file. + + All attributes are read-only. + + """ + + __slots__ = ("code", "count", "dtype", "value", "valueoffset") + + class Error(Exception): + pass + + def __init__(self, parent, tagheader, **kwargs): + """Initialize instance from tag header.""" + fh = parent.filehandle + byteorder = parent.byteorder + unpack = struct.unpack + offsetsize = parent.offsetsize + + self.valueoffset = fh.tell() + offsetsize + 4 + code, type_ = unpack(parent.tagformat1, tagheader[:4]) + count, value = unpack(parent.tagformat2, tagheader[4:]) + + try: + dtype = TIFF.DATA_FORMATS[type_] + except KeyError: + raise TiffTag.Error("unknown tag data type %i" % type_) + + fmt = "%s%i%s" % (byteorder, count * int(dtype[0]), dtype[1]) + size = struct.calcsize(fmt) + if size > offsetsize or code in TIFF.TAG_READERS: + self.valueoffset = offset = unpack(parent.offsetformat, value)[0] + if offset < 8 or offset > fh.size - size: + raise TiffTag.Error("invalid tag value offset") + # if offset % 2: + # warnings.warn('tag value does not begin on word boundary') + fh.seek(offset) + if code in TIFF.TAG_READERS: + readfunc = TIFF.TAG_READERS[code] + value = readfunc(fh, byteorder, dtype, count, offsetsize) + elif type_ == 7 or (count > 1 and dtype[-1] == "B"): + value = read_bytes(fh, byteorder, dtype, count, offsetsize) + elif code in TIFF.TAGS or dtype[-1] == "s": + value = unpack(fmt, fh.read(size)) + else: + value = read_numpy(fh, byteorder, dtype, count, offsetsize) + elif dtype[-1] == "B" or type_ == 7: + value = value[:size] + else: + value = unpack(fmt, value[:size]) + + process = ( + code not in TIFF.TAG_READERS and code not in TIFF.TAG_TUPLE and type_ != 7 + ) + if process and dtype[-1] == "s" and isinstance(value[0], bytes): + # TIFF ASCII fields can contain multiple strings, + # each terminated with a NUL + value = value[0] + try: + value = bytes2str(stripascii(value).strip()) + except UnicodeDecodeError: + warnings.warn("tag %i: coercing invalid ASCII to bytes" % code) + dtype = "1B" + else: + if code in TIFF.TAG_ENUM: + t = TIFF.TAG_ENUM[code] + try: + value = tuple(t(v) for v in value) + except ValueError as e: + warnings.warn(str(e)) + if process: + if len(value) == 1: + value = value[0] + + self.code = code + self.dtype = dtype + self.count = count + self.value = value + + @property + def name(self): + return TIFF.TAGS.get(self.code, str(self.code)) + + def _fix_lsm_bitspersample(self, parent): + """Correct LSM bitspersample tag. + + Old LSM writers may use a separate region for two 16-bit values, + although they fit into the tag value element of the tag. + + """ + if self.code == 258 and self.count == 2: + # TODO: test this case; need example file + warnings.warn("correcting LSM bitspersample tag") + tof = parent.offsetformat[parent.offsetsize] + self.valueoffset = struct.unpack(tof, self._value)[0] + parent.filehandle.seek(self.valueoffset) + self.value = struct.unpack(">> # read image stack from sequence of TIFF files + >>> imsave('temp_C001T001.tif', numpy.random.rand(64, 64)) + >>> imsave('temp_C001T002.tif', numpy.random.rand(64, 64)) + >>> tifs = TiffSequence('temp_C001*.tif') + >>> tifs.shape + (1, 2) + >>> tifs.axes + 'CT' + >>> data = tifs.asarray() + >>> data.shape + (1, 2, 64, 64) + + """ + + _patterns = { + "axes": r""" + # matches Olympus OIF and Leica TIFF series + _?(?:(q|l|p|a|c|t|x|y|z|ch|tp)(\d{1,4})) + _?(?:(q|l|p|a|c|t|x|y|z|ch|tp)(\d{1,4}))? + _?(?:(q|l|p|a|c|t|x|y|z|ch|tp)(\d{1,4}))? + _?(?:(q|l|p|a|c|t|x|y|z|ch|tp)(\d{1,4}))? + _?(?:(q|l|p|a|c|t|x|y|z|ch|tp)(\d{1,4}))? + _?(?:(q|l|p|a|c|t|x|y|z|ch|tp)(\d{1,4}))? + _?(?:(q|l|p|a|c|t|x|y|z|ch|tp)(\d{1,4}))? + """ + } + + class ParseError(Exception): + pass + + def __init__(self, files, imread=TiffFile, pattern="axes", *args, **kwargs): + """Initialize instance from multiple files. + + Parameters + ---------- + files : str, pathlib.Path, or sequence thereof + Glob pattern or sequence of file names. + Binary streams are not supported. + imread : function or class + Image read function or class with asarray function returning numpy + array from single file. + pattern : str + Regular expression pattern that matches axes names and sequence + indices in file names. + By default, the pattern matches Olympus OIF and Leica TIFF series. + + """ + if isinstance(files, pathlib.Path): + files = str(files) + if isinstance(files, basestring): + files = natural_sorted(glob.glob(files)) + files = list(files) + if not files: + raise ValueError("no files found") + if isinstance(files[0], pathlib.Path): + files = [str(pathlib.Path(f)) for f in files] + elif not isinstance(files[0], basestring): + raise ValueError("not a file name") + self.files = files + + if hasattr(imread, "asarray"): + # redefine imread + _imread = imread + + def imread(fname, *args, **kwargs): + with _imread(fname) as im: + return im.asarray(*args, **kwargs) + + self.imread = imread + + self.pattern = self._patterns.get(pattern, pattern) + try: + self._parse() + if not self.axes: + self.axes = "I" + except self.ParseError: + self.axes = "I" + self.shape = (len(files),) + self._startindex = (0,) + self._indices = tuple((i,) for i in range(len(files))) + + def __str__(self): + """Return string with information about image sequence.""" + return "\n".join( + [ + self.files[0], + " size: %i" % len(self.files), + " axes: %s" % self.axes, + " shape: %s" % str(self.shape), + ] + ) + + def __len__(self): + return len(self.files) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + def close(self): + pass + + def asarray(self, out=None, *args, **kwargs): + """Read image data from all files and return as numpy array. + + The args and kwargs parameters are passed to the imread function. + + Raise IndexError or ValueError if image shapes do not match. + + """ + im = self.imread(self.files[0], *args, **kwargs) + shape = self.shape + im.shape + result = create_output(out, shape, dtype=im.dtype) + result = result.reshape(-1, *im.shape) + for index, fname in zip(self._indices, self.files): + index = [i - j for i, j in zip(index, self._startindex)] + index = numpy.ravel_multi_index(index, self.shape) + im = self.imread(fname, *args, **kwargs) + result[index] = im + result.shape = shape + return result + + def _parse(self): + """Get axes and shape from file names.""" + if not self.pattern: + raise self.ParseError("invalid pattern") + pattern = re.compile(self.pattern, re.IGNORECASE | re.VERBOSE) + matches = pattern.findall(self.files[0]) + if not matches: + raise self.ParseError("pattern does not match file names") + matches = matches[-1] + if len(matches) % 2: + raise self.ParseError("pattern does not match axis name and index") + axes = "".join(m for m in matches[::2] if m) + if not axes: + raise self.ParseError("pattern does not match file names") + + indices = [] + for fname in self.files: + matches = pattern.findall(fname)[-1] + if axes != "".join(m for m in matches[::2] if m): + raise ValueError("axes do not match within the image sequence") + indices.append([int(m) for m in matches[1::2] if m]) + shape = tuple(numpy.max(indices, axis=0)) + startindex = tuple(numpy.min(indices, axis=0)) + shape = tuple(i - j + 1 for i, j in zip(shape, startindex)) + if product(shape) != len(self.files): + warnings.warn("files are missing. Missing data are zeroed") + + self.axes = axes.upper() + self.shape = shape + self._indices = indices + self._startindex = startindex + + +class FileHandle(object): + """Binary file handle. + + A limited, special purpose file handler that can: + + * handle embedded files (for CZI within CZI files) + * re-open closed files (for multi-file formats, such as OME-TIFF) + * read and write numpy arrays and records from file like objects + + Only 'rb' and 'wb' modes are supported. Concurrently reading and writing + of the same stream is untested. + + When initialized from another file handle, do not use it unless this + FileHandle is closed. + + Attributes + ---------- + name : str + Name of the file. + path : str + Absolute path to file. + size : int + Size of file in bytes. + is_file : bool + If True, file has a filno and can be memory-mapped. + + All attributes are read-only. + + """ + + __slots__ = ( + "_fh", + "_file", + "_mode", + "_name", + "_dir", + "_lock", + "_offset", + "_size", + "_close", + "is_file", + ) + + def __init__(self, file, mode="rb", name=None, offset=None, size=None): + """Initialize file handle from file name or another file handle. + + Parameters + ---------- + file : str, pathlib.Path, binary stream, or FileHandle + File name or seekable binary stream, such as an open file + or BytesIO. + mode : str + File open mode in case 'file' is a file name. Must be 'rb' or 'wb'. + name : str + Optional name of file in case 'file' is a binary stream. + offset : int + Optional start position of embedded file. By default, this is + the current file position. + size : int + Optional size of embedded file. By default, this is the number + of bytes from the 'offset' to the end of the file. + + """ + self._file = file + self._fh = None + self._mode = mode + self._name = name + self._dir = "" + self._offset = offset + self._size = size + self._close = True + self.is_file = False + self._lock = NullContext() + self.open() + + def open(self): + """Open or re-open file.""" + if self._fh: + return # file is open + + if isinstance(self._file, pathlib.Path): + self._file = str(self._file) + if isinstance(self._file, basestring): + # file name + self._file = os.path.realpath(self._file) + self._dir, self._name = os.path.split(self._file) + self._fh = open(self._file, self._mode) + self._close = True + if self._offset is None: + self._offset = 0 + elif isinstance(self._file, FileHandle): + # FileHandle + self._fh = self._file._fh + if self._offset is None: + self._offset = 0 + self._offset += self._file._offset + self._close = False + if not self._name: + if self._offset: + name, ext = os.path.splitext(self._file._name) + self._name = "%s@%i%s" % (name, self._offset, ext) + else: + self._name = self._file._name + if self._mode and self._mode != self._file._mode: + raise ValueError("FileHandle has wrong mode") + self._mode = self._file._mode + self._dir = self._file._dir + elif hasattr(self._file, "seek"): + # binary stream: open file, BytesIO + try: + self._file.tell() + except Exception: + raise ValueError("binary stream is not seekable") + self._fh = self._file + if self._offset is None: + self._offset = self._file.tell() + self._close = False + if not self._name: + try: + self._dir, self._name = os.path.split(self._fh.name) + except AttributeError: + self._name = "Unnamed binary stream" + try: + self._mode = self._fh.mode + except AttributeError: + pass + else: + raise ValueError( + "The first parameter must be a file name, " + "seekable binary stream, or FileHandle" + ) + + if self._offset: + self._fh.seek(self._offset) + + if self._size is None: + pos = self._fh.tell() + self._fh.seek(self._offset, 2) + self._size = self._fh.tell() + self._fh.seek(pos) + + try: + self._fh.fileno() + self.is_file = True + except Exception: + self.is_file = False + + def read(self, size=-1): + """Read 'size' bytes from file, or until EOF is reached.""" + if size < 0 and self._offset: + size = self._size + return self._fh.read(size) + + def write(self, bytestring): + """Write bytestring to file.""" + return self._fh.write(bytestring) + + def flush(self): + """Flush write buffers if applicable.""" + return self._fh.flush() + + def memmap_array(self, dtype, shape, offset=0, mode="r", order="C"): + """Return numpy.memmap of data stored in file.""" + if not self.is_file: + raise ValueError("Cannot memory-map file without fileno") + return numpy.memmap( + self._fh, + dtype=dtype, + mode=mode, + offset=self._offset + offset, + shape=shape, + order=order, + ) + + def read_array( + self, dtype, count=-1, sep="", chunksize=2**25, out=None, native=False + ): + """Return numpy array from file. + + Work around numpy issue #2230, "numpy.fromfile does not accept + StringIO object" https://github.com/numpy/numpy/issues/2230. + + """ + fh = self._fh + dtype = numpy.dtype(dtype) + size = self._size if count < 0 else count * dtype.itemsize + + if out is None: + try: + result = numpy.fromfile(fh, dtype, count, sep) + except IOError: + # ByteIO + data = fh.read(size) + result = numpy.frombuffer(data, dtype, count).copy() + if native and not result.dtype.isnative: + # swap byte order and dtype without copy + result.byteswap(True) + result = result.view(result.dtype.newbyteorder()) + return result + + # Read data from file in chunks and copy to output array + shape = out.shape + size = min(out.nbytes, size) + out = out.reshape(-1) + index = 0 + while size > 0: + data = fh.read(min(chunksize, size)) + datasize = len(data) + if datasize == 0: + break + size -= datasize + data = numpy.frombuffer(data, dtype) + out[index : index + data.size] = data + index += data.size + + if hasattr(out, "flush"): + out.flush() + return out.reshape(shape) + + def read_record(self, dtype, shape=1, byteorder=None): + """Return numpy record from file.""" + rec = numpy.rec + try: + record = rec.fromfile(self._fh, dtype, shape, byteorder=byteorder) + except Exception: + dtype = numpy.dtype(dtype) + if shape is None: + shape = self._size // dtype.itemsize + size = product(sequence(shape)) * dtype.itemsize + data = self._fh.read(size) + record = rec.fromstring(data, dtype, shape, byteorder=byteorder) + return record[0] if shape == 1 else record + + def write_empty(self, size): + """Append size bytes to file. Position must be at end of file.""" + if size < 1: + return + self._fh.seek(size - 1, 1) + self._fh.write(b"\x00") + + def write_array(self, data): + """Write numpy array to binary file.""" + try: + data.tofile(self._fh) + except Exception: + # BytesIO + self._fh.write(data.tostring()) + + def tell(self): + """Return file's current position.""" + return self._fh.tell() - self._offset + + def seek(self, offset, whence=0): + """Set file's current position.""" + if self._offset: + if whence == 0: + self._fh.seek(self._offset + offset, whence) + return + elif whence == 2 and self._size > 0: + self._fh.seek(self._offset + self._size + offset, 0) + return + self._fh.seek(offset, whence) + + def close(self): + """Close file.""" + if self._close and self._fh: + self._fh.close() + self._fh = None + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + def __getattr__(self, name): + """Return attribute from underlying file object.""" + if self._offset: + warnings.warn("FileHandle: '%s' not implemented for embedded files" % name) + return getattr(self._fh, name) + + @property + def name(self): + return self._name + + @property + def dirname(self): + return self._dir + + @property + def path(self): + return os.path.join(self._dir, self._name) + + @property + def size(self): + return self._size + + @property + def closed(self): + return self._fh is None + + @property + def lock(self): + return self._lock + + @lock.setter + def lock(self, value): + self._lock = threading.RLock() if value else NullContext() + + +class NullContext(object): + """Null context manager. + + >>> with NullContext(): + ... pass + + """ + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + pass + + +class OpenFileCache(object): + """Keep files open.""" + + __slots__ = ("files", "past", "lock", "size") + + def __init__(self, size, lock=None): + """Initialize open file cache.""" + self.past = [] # FIFO of opened files + self.files = {} # refcounts of opened files + self.lock = NullContext() if lock is None else lock + self.size = int(size) + + def open(self, filehandle): + """Re-open file if necessary.""" + with self.lock: + if filehandle in self.files: + self.files[filehandle] += 1 + elif filehandle.closed: + filehandle.open() + self.files[filehandle] = 1 + self.past.append(filehandle) + + def close(self, filehandle): + """Close opened file if no longer used.""" + with self.lock: + if filehandle in self.files: + self.files[filehandle] -= 1 + # trim the file cache + index = 0 + size = len(self.past) + while size > self.size and index < size: + filehandle = self.past[index] + if self.files[filehandle] == 0: + filehandle.close() + del self.files[filehandle] + del self.past[index] + size -= 1 + else: + index += 1 + + def clear(self): + """Close all opened files if not in use.""" + with self.lock: + for filehandle, refcount in list(self.files.items()): + if refcount == 0: + filehandle.close() + del self.files[filehandle] + del self.past[self.past.index(filehandle)] + + +class LazyConst(object): + """Class whose attributes are computed on first access from its methods.""" + + def __init__(self, cls): + self._cls = cls + self.__doc__ = getattr(cls, "__doc__") + + def __getattr__(self, name): + func = getattr(self._cls, name) + if not callable(func): + return func + try: + value = func() + except TypeError: + # Python 2 unbound method + value = func.__func__() + setattr(self, name, value) + return value + + +@LazyConst +class TIFF(object): + """Namespace for module constants.""" + + def TAGS(): + # TIFF tag codes and names from TIFF6, TIFF/EP, EXIF, and other specs + return { + 11: "ProcessingSoftware", + 254: "NewSubfileType", + 255: "SubfileType", + 256: "ImageWidth", + 257: "ImageLength", + 258: "BitsPerSample", + 259: "Compression", + 262: "PhotometricInterpretation", + 263: "Thresholding", + 264: "CellWidth", + 265: "CellLength", + 266: "FillOrder", + 269: "DocumentName", + 270: "ImageDescription", + 271: "Make", + 272: "Model", + 273: "StripOffsets", + 274: "Orientation", + 277: "SamplesPerPixel", + 278: "RowsPerStrip", + 279: "StripByteCounts", + 280: "MinSampleValue", + 281: "MaxSampleValue", + 282: "XResolution", + 283: "YResolution", + 284: "PlanarConfiguration", + 285: "PageName", + 286: "XPosition", + 287: "YPosition", + 288: "FreeOffsets", + 289: "FreeByteCounts", + 290: "GrayResponseUnit", + 291: "GrayResponseCurve", + 292: "T4Options", + 293: "T6Options", + 296: "ResolutionUnit", + 297: "PageNumber", + 300: "ColorResponseUnit", + 301: "TransferFunction", + 305: "Software", + 306: "DateTime", + 315: "Artist", + 316: "HostComputer", + 317: "Predictor", + 318: "WhitePoint", + 319: "PrimaryChromaticities", + 320: "ColorMap", + 321: "HalftoneHints", + 322: "TileWidth", + 323: "TileLength", + 324: "TileOffsets", + 325: "TileByteCounts", + 326: "BadFaxLines", + 327: "CleanFaxData", + 328: "ConsecutiveBadFaxLines", + 330: "SubIFDs", + 332: "InkSet", + 333: "InkNames", + 334: "NumberOfInks", + 336: "DotRange", + 337: "TargetPrinter", + 338: "ExtraSamples", + 339: "SampleFormat", + 340: "SMinSampleValue", + 341: "SMaxSampleValue", + 342: "TransferRange", + 343: "ClipPath", + 344: "XClipPathUnits", + 345: "YClipPathUnits", + 346: "Indexed", + 347: "JPEGTables", + 351: "OPIProxy", + 400: "GlobalParametersIFD", + 401: "ProfileType", + 402: "FaxProfile", + 403: "CodingMethods", + 404: "VersionYear", + 405: "ModeNumber", + 433: "Decode", + 434: "DefaultImageColor", + 435: "T82Options", + 437: "JPEGTables_", # 347 + 512: "JPEGProc", + 513: "JPEGInterchangeFormat", + 514: "JPEGInterchangeFormatLength", + 515: "JPEGRestartInterval", + 517: "JPEGLosslessPredictors", + 518: "JPEGPointTransforms", + 519: "JPEGQTables", + 520: "JPEGDCTables", + 521: "JPEGACTables", + 529: "YCbCrCoefficients", + 530: "YCbCrSubSampling", + 531: "YCbCrPositioning", + 532: "ReferenceBlackWhite", + 559: "StripRowCounts", + 700: "XMP", # XMLPacket + 769: "GDIGamma", # GDI+ + 770: "ICCProfileDescriptor", # GDI+ + 771: "SRGBRenderingIntent", # GDI+ + 800: "ImageTitle", # GDI+ + 999: "USPTO_Miscellaneous", + 4864: "AndorId", # TODO: Andor Technology 4864 - 5030 + 4869: "AndorTemperature", + 4876: "AndorExposureTime", + 4878: "AndorKineticCycleTime", + 4879: "AndorAccumulations", + 4881: "AndorAcquisitionCycleTime", + 4882: "AndorReadoutTime", + 4884: "AndorPhotonCounting", + 4885: "AndorEmDacLevel", + 4890: "AndorFrames", + 4896: "AndorHorizontalFlip", + 4897: "AndorVerticalFlip", + 4898: "AndorClockwise", + 4899: "AndorCounterClockwise", + 4904: "AndorVerticalClockVoltage", + 4905: "AndorVerticalShiftSpeed", + 4907: "AndorPreAmpSetting", + 4908: "AndorCameraSerial", + 4911: "AndorActualTemperature", + 4912: "AndorBaselineClamp", + 4913: "AndorPrescans", + 4914: "AndorModel", + 4915: "AndorChipSizeX", + 4916: "AndorChipSizeY", + 4944: "AndorBaselineOffset", + 4966: "AndorSoftwareVersion", + 18246: "Rating", + 18247: "XP_DIP_XML", + 18248: "StitchInfo", + 18249: "RatingPercent", + 20481: "ResolutionXUnit", # GDI+ + 20482: "ResolutionYUnit", # GDI+ + 20483: "ResolutionXLengthUnit", # GDI+ + 20484: "ResolutionYLengthUnit", # GDI+ + 20485: "PrintFlags", # GDI+ + 20486: "PrintFlagsVersion", # GDI+ + 20487: "PrintFlagsCrop", # GDI+ + 20488: "PrintFlagsBleedWidth", # GDI+ + 20489: "PrintFlagsBleedWidthScale", # GDI+ + 20490: "HalftoneLPI", # GDI+ + 20491: "HalftoneLPIUnit", # GDI+ + 20492: "HalftoneDegree", # GDI+ + 20493: "HalftoneShape", # GDI+ + 20494: "HalftoneMisc", # GDI+ + 20495: "HalftoneScreen", # GDI+ + 20496: "JPEGQuality", # GDI+ + 20497: "GridSize", # GDI+ + 20498: "ThumbnailFormat", # GDI+ + 20499: "ThumbnailWidth", # GDI+ + 20500: "ThumbnailHeight", # GDI+ + 20501: "ThumbnailColorDepth", # GDI+ + 20502: "ThumbnailPlanes", # GDI+ + 20503: "ThumbnailRawBytes", # GDI+ + 20504: "ThumbnailSize", # GDI+ + 20505: "ThumbnailCompressedSize", # GDI+ + 20506: "ColorTransferFunction", # GDI+ + 20507: "ThumbnailData", + 20512: "ThumbnailImageWidth", # GDI+ + 20513: "ThumbnailImageHeight", # GDI+ + 20514: "ThumbnailBitsPerSample", # GDI+ + 20515: "ThumbnailCompression", + 20516: "ThumbnailPhotometricInterp", # GDI+ + 20517: "ThumbnailImageDescription", # GDI+ + 20518: "ThumbnailEquipMake", # GDI+ + 20519: "ThumbnailEquipModel", # GDI+ + 20520: "ThumbnailStripOffsets", # GDI+ + 20521: "ThumbnailOrientation", # GDI+ + 20522: "ThumbnailSamplesPerPixel", # GDI+ + 20523: "ThumbnailRowsPerStrip", # GDI+ + 20524: "ThumbnailStripBytesCount", # GDI+ + 20525: "ThumbnailResolutionX", + 20526: "ThumbnailResolutionY", + 20527: "ThumbnailPlanarConfig", # GDI+ + 20528: "ThumbnailResolutionUnit", + 20529: "ThumbnailTransferFunction", + 20530: "ThumbnailSoftwareUsed", # GDI+ + 20531: "ThumbnailDateTime", # GDI+ + 20532: "ThumbnailArtist", # GDI+ + 20533: "ThumbnailWhitePoint", # GDI+ + 20534: "ThumbnailPrimaryChromaticities", # GDI+ + 20535: "ThumbnailYCbCrCoefficients", # GDI+ + 20536: "ThumbnailYCbCrSubsampling", # GDI+ + 20537: "ThumbnailYCbCrPositioning", + 20538: "ThumbnailRefBlackWhite", # GDI+ + 20539: "ThumbnailCopyRight", # GDI+ + 20545: "InteroperabilityIndex", + 20546: "InteroperabilityVersion", + 20624: "LuminanceTable", + 20625: "ChrominanceTable", + 20736: "FrameDelay", # GDI+ + 20737: "LoopCount", # GDI+ + 20738: "GlobalPalette", # GDI+ + 20739: "IndexBackground", # GDI+ + 20740: "IndexTransparent", # GDI+ + 20752: "PixelUnit", # GDI+ + 20753: "PixelPerUnitX", # GDI+ + 20754: "PixelPerUnitY", # GDI+ + 20755: "PaletteHistogram", # GDI+ + 28672: "SonyRawFileType", # Sony ARW + 28722: "VignettingCorrParams", # Sony ARW + 28725: "ChromaticAberrationCorrParams", # Sony ARW + 28727: "DistortionCorrParams", # Sony ARW + # Private tags >= 32768 + 32781: "ImageID", + 32931: "WangTag1", + 32932: "WangAnnotation", + 32933: "WangTag3", + 32934: "WangTag4", + 32953: "ImageReferencePoints", + 32954: "RegionXformTackPoint", + 32955: "WarpQuadrilateral", + 32956: "AffineTransformMat", + 32995: "Matteing", + 32996: "DataType", + 32997: "ImageDepth", + 32998: "TileDepth", + 33300: "ImageFullWidth", + 33301: "ImageFullLength", + 33302: "TextureFormat", + 33303: "TextureWrapModes", + 33304: "FieldOfViewCotangent", + 33305: "MatrixWorldToScreen", + 33306: "MatrixWorldToCamera", + 33405: "Model2", + 33421: "CFARepeatPatternDim", + 33422: "CFAPattern", + 33423: "BatteryLevel", + 33424: "KodakIFD", + 33434: "ExposureTime", + 33437: "FNumber", + 33432: "Copyright", + 33445: "MDFileTag", + 33446: "MDScalePixel", + 33447: "MDColorTable", + 33448: "MDLabName", + 33449: "MDSampleInfo", + 33450: "MDPrepDate", + 33451: "MDPrepTime", + 33452: "MDFileUnits", + 33550: "ModelPixelScaleTag", + 33589: "AdventScale", + 33590: "AdventRevision", + 33628: "UIC1tag", # Metamorph Universal Imaging Corp STK + 33629: "UIC2tag", + 33630: "UIC3tag", + 33631: "UIC4tag", + 33723: "IPTCNAA", + 33858: "ExtendedTagsOffset", # DEFF points IFD with private tags + 33918: "IntergraphPacketData", # INGRPacketDataTag + 33919: "IntergraphFlagRegisters", # INGRFlagRegisters + 33920: "IntergraphMatrixTag", # IrasBTransformationMatrix + 33921: "INGRReserved", + 33922: "ModelTiepointTag", + 33923: "LeicaMagic", + 34016: "Site", + 34017: "ColorSequence", + 34018: "IT8Header", + 34019: "RasterPadding", + 34020: "BitsPerRunLength", + 34021: "BitsPerExtendedRunLength", + 34022: "ColorTable", + 34023: "ImageColorIndicator", + 34024: "BackgroundColorIndicator", + 34025: "ImageColorValue", + 34026: "BackgroundColorValue", + 34027: "PixelIntensityRange", + 34028: "TransparencyIndicator", + 34029: "ColorCharacterization", + 34030: "HCUsage", + 34031: "TrapIndicator", + 34032: "CMYKEquivalent", + 34118: "CZ_SEM", # Zeiss SEM + 34152: "AFCP_IPTC", + 34232: "PixelMagicJBIGOptions", + 34263: "JPLCartoIFD", + 34122: "IPLAB", # number of images + 34264: "ModelTransformationTag", + 34306: "WB_GRGBLevels", # Leaf MOS + 34310: "LeafData", + 34361: "MM_Header", + 34362: "MM_Stamp", + 34363: "MM_Unknown", + 34377: "ImageResources", # Photoshop + 34386: "MM_UserBlock", + 34412: "CZ_LSMINFO", + 34665: "ExifTag", + 34675: "InterColorProfile", # ICCProfile + 34680: "FEI_SFEG", # + 34682: "FEI_HELIOS", # + 34683: "FEI_TITAN", # + 34687: "FXExtensions", + 34688: "MultiProfiles", + 34689: "SharedData", + 34690: "T88Options", + 34710: "MarCCD", # offset to MarCCD header + 34732: "ImageLayer", + 34735: "GeoKeyDirectoryTag", + 34736: "GeoDoubleParamsTag", + 34737: "GeoAsciiParamsTag", + 34750: "JBIGOptions", + 34821: "PIXTIFF", # ? Pixel Translations Inc + 34850: "ExposureProgram", + 34852: "SpectralSensitivity", + 34853: "GPSTag", # GPSIFD + 34855: "ISOSpeedRatings", + 34856: "OECF", + 34857: "Interlace", + 34858: "TimeZoneOffset", + 34859: "SelfTimerMode", + 34864: "SensitivityType", + 34865: "StandardOutputSensitivity", + 34866: "RecommendedExposureIndex", + 34867: "ISOSpeed", + 34868: "ISOSpeedLatitudeyyy", + 34869: "ISOSpeedLatitudezzz", + 34908: "HylaFAXFaxRecvParams", + 34909: "HylaFAXFaxSubAddress", + 34910: "HylaFAXFaxRecvTime", + 34911: "FaxDcs", + 34929: "FedexEDR", + 34954: "LeafSubIFD", + 34959: "Aphelion1", + 34960: "Aphelion2", + 34961: "AphelionInternal", # ADCIS + 36864: "ExifVersion", + 36867: "DateTimeOriginal", + 36868: "DateTimeDigitized", + 36873: "GooglePlusUploadCode", + 36880: "OffsetTime", + 36881: "OffsetTimeOriginal", + 36882: "OffsetTimeDigitized", + # TODO: Pilatus/CHESS/TV6 36864..37120 conflicting with Exif tags + # 36864: 'TVX ?', + # 36865: 'TVX_NumExposure', + # 36866: 'TVX_NumBackground', + # 36867: 'TVX_ExposureTime', + # 36868: 'TVX_BackgroundTime', + # 36870: 'TVX ?', + # 36873: 'TVX_SubBpp', + # 36874: 'TVX_SubWide', + # 36875: 'TVX_SubHigh', + # 36876: 'TVX_BlackLevel', + # 36877: 'TVX_DarkCurrent', + # 36878: 'TVX_ReadNoise', + # 36879: 'TVX_DarkCurrentNoise', + # 36880: 'TVX_BeamMonitor', + # 37120: 'TVX_UserVariables', # A/D values + 37121: "ComponentsConfiguration", + 37122: "CompressedBitsPerPixel", + 37377: "ShutterSpeedValue", + 37378: "ApertureValue", + 37379: "BrightnessValue", + 37380: "ExposureBiasValue", + 37381: "MaxApertureValue", + 37382: "SubjectDistance", + 37383: "MeteringMode", + 37384: "LightSource", + 37385: "Flash", + 37386: "FocalLength", + 37387: "FlashEnergy_", # 37387 + 37388: "SpatialFrequencyResponse_", # 37388 + 37389: "Noise", + 37390: "FocalPlaneXResolution", + 37391: "FocalPlaneYResolution", + 37392: "FocalPlaneResolutionUnit", + 37393: "ImageNumber", + 37394: "SecurityClassification", + 37395: "ImageHistory", + 37396: "SubjectLocation", + 37397: "ExposureIndex", + 37398: "TIFFEPStandardID", + 37399: "SensingMethod", + 37434: "CIP3DataFile", + 37435: "CIP3Sheet", + 37436: "CIP3Side", + 37439: "StoNits", + 37500: "MakerNote", + 37510: "UserComment", + 37520: "SubsecTime", + 37521: "SubsecTimeOriginal", + 37522: "SubsecTimeDigitized", + 37679: "MODIText", # Microsoft Office Document Imaging + 37680: "MODIOLEPropertySetStorage", + 37681: "MODIPositioning", + 37706: "TVIPS", # offset to TemData structure + 37707: "TVIPS1", + 37708: "TVIPS2", # same TemData structure as undefined + 37724: "ImageSourceData", # Photoshop + 37888: "Temperature", + 37889: "Humidity", + 37890: "Pressure", + 37891: "WaterDepth", + 37892: "Acceleration", + 37893: "CameraElevationAngle", + 40001: "MC_IpWinScal", # Media Cybernetics + 40100: "MC_IdOld", + 40965: "InteroperabilityTag", # InteropOffset + 40091: "XPTitle", + 40092: "XPComment", + 40093: "XPAuthor", + 40094: "XPKeywords", + 40095: "XPSubject", + 40960: "FlashpixVersion", + 40961: "ColorSpace", + 40962: "PixelXDimension", + 40963: "PixelYDimension", + 40964: "RelatedSoundFile", + 40976: "SamsungRawPointersOffset", + 40977: "SamsungRawPointersLength", + 41217: "SamsungRawByteOrder", + 41218: "SamsungRawUnknown", + 41483: "FlashEnergy", + 41484: "SpatialFrequencyResponse", + 41485: "Noise_", # 37389 + 41486: "FocalPlaneXResolution_", # 37390 + 41487: "FocalPlaneYResolution_", # 37391 + 41488: "FocalPlaneResolutionUnit_", # 37392 + 41489: "ImageNumber_", # 37393 + 41490: "SecurityClassification_", # 37394 + 41491: "ImageHistory_", # 37395 + 41492: "SubjectLocation_", # 37395 + 41493: "ExposureIndex_ ", # 37397 + 41494: "TIFF-EPStandardID", + 41495: "SensingMethod_", # 37399 + 41728: "FileSource", + 41729: "SceneType", + 41730: "CFAPattern_", # 33422 + 41985: "CustomRendered", + 41986: "ExposureMode", + 41987: "WhiteBalance", + 41988: "DigitalZoomRatio", + 41989: "FocalLengthIn35mmFilm", + 41990: "SceneCaptureType", + 41991: "GainControl", + 41992: "Contrast", + 41993: "Saturation", + 41994: "Sharpness", + 41995: "DeviceSettingDescription", + 41996: "SubjectDistanceRange", + 42016: "ImageUniqueID", + 42032: "CameraOwnerName", + 42033: "BodySerialNumber", + 42034: "LensSpecification", + 42035: "LensMake", + 42036: "LensModel", + 42037: "LensSerialNumber", + 42112: "GDAL_METADATA", + 42113: "GDAL_NODATA", + 42240: "Gamma", + 43314: "NIHImageHeader", + 44992: "ExpandSoftware", + 44993: "ExpandLens", + 44994: "ExpandFilm", + 44995: "ExpandFilterLens", + 44996: "ExpandScanner", + 44997: "ExpandFlashLamp", + 48129: "PixelFormat", # HDP and WDP + 48130: "Transformation", + 48131: "Uncompressed", + 48132: "ImageType", + 48256: "ImageWidth_", # 256 + 48257: "ImageHeight_", + 48258: "WidthResolution", + 48259: "HeightResolution", + 48320: "ImageOffset", + 48321: "ImageByteCount", + 48322: "AlphaOffset", + 48323: "AlphaByteCount", + 48324: "ImageDataDiscard", + 48325: "AlphaDataDiscard", + 50215: "OceScanjobDescription", + 50216: "OceApplicationSelector", + 50217: "OceIdentificationNumber", + 50218: "OceImageLogicCharacteristics", + 50255: "Annotations", + 50288: "MC_Id", # Media Cybernetics + 50289: "MC_XYPosition", + 50290: "MC_ZPosition", + 50291: "MC_XYCalibration", + 50292: "MC_LensCharacteristics", + 50293: "MC_ChannelName", + 50294: "MC_ExcitationWavelength", + 50295: "MC_TimeStamp", + 50296: "MC_FrameProperties", + 50341: "PrintImageMatching", + 50495: "PCO_RAW", # TODO: PCO CamWare + 50547: "OriginalFileName", + 50560: "USPTO_OriginalContentType", # US Patent Office + 50561: "USPTO_RotationCode", + 50656: "CR2CFAPattern", + 50706: "DNGVersion", # DNG 50706 .. 51112 + 50707: "DNGBackwardVersion", + 50708: "UniqueCameraModel", + 50709: "LocalizedCameraModel", + 50710: "CFAPlaneColor", + 50711: "CFALayout", + 50712: "LinearizationTable", + 50713: "BlackLevelRepeatDim", + 50714: "BlackLevel", + 50715: "BlackLevelDeltaH", + 50716: "BlackLevelDeltaV", + 50717: "WhiteLevel", + 50718: "DefaultScale", + 50719: "DefaultCropOrigin", + 50720: "DefaultCropSize", + 50721: "ColorMatrix1", + 50722: "ColorMatrix2", + 50723: "CameraCalibration1", + 50724: "CameraCalibration2", + 50725: "ReductionMatrix1", + 50726: "ReductionMatrix2", + 50727: "AnalogBalance", + 50728: "AsShotNeutral", + 50729: "AsShotWhiteXY", + 50730: "BaselineExposure", + 50731: "BaselineNoise", + 50732: "BaselineSharpness", + 50733: "BayerGreenSplit", + 50734: "LinearResponseLimit", + 50735: "CameraSerialNumber", + 50736: "LensInfo", + 50737: "ChromaBlurRadius", + 50738: "AntiAliasStrength", + 50739: "ShadowScale", + 50740: "DNGPrivateData", + 50741: "MakerNoteSafety", + 50752: "RawImageSegmentation", + 50778: "CalibrationIlluminant1", + 50779: "CalibrationIlluminant2", + 50780: "BestQualityScale", + 50781: "RawDataUniqueID", + 50784: "AliasLayerMetadata", + 50827: "OriginalRawFileName", + 50828: "OriginalRawFileData", + 50829: "ActiveArea", + 50830: "MaskedAreas", + 50831: "AsShotICCProfile", + 50832: "AsShotPreProfileMatrix", + 50833: "CurrentICCProfile", + 50834: "CurrentPreProfileMatrix", + 50838: "IJMetadataByteCounts", + 50839: "IJMetadata", + 50844: "RPCCoefficientTag", + 50879: "ColorimetricReference", + 50885: "SRawType", + 50898: "PanasonicTitle", + 50899: "PanasonicTitle2", + 50931: "CameraCalibrationSignature", + 50932: "ProfileCalibrationSignature", + 50933: "ProfileIFD", + 50934: "AsShotProfileName", + 50935: "NoiseReductionApplied", + 50936: "ProfileName", + 50937: "ProfileHueSatMapDims", + 50938: "ProfileHueSatMapData1", + 50939: "ProfileHueSatMapData2", + 50940: "ProfileToneCurve", + 50941: "ProfileEmbedPolicy", + 50942: "ProfileCopyright", + 50964: "ForwardMatrix1", + 50965: "ForwardMatrix2", + 50966: "PreviewApplicationName", + 50967: "PreviewApplicationVersion", + 50968: "PreviewSettingsName", + 50969: "PreviewSettingsDigest", + 50970: "PreviewColorSpace", + 50971: "PreviewDateTime", + 50972: "RawImageDigest", + 50973: "OriginalRawFileDigest", + 50974: "SubTileBlockSize", + 50975: "RowInterleaveFactor", + 50981: "ProfileLookTableDims", + 50982: "ProfileLookTableData", + 51008: "OpcodeList1", + 51009: "OpcodeList2", + 51022: "OpcodeList3", + 51023: "FibicsXML", # + 51041: "NoiseProfile", + 51043: "TimeCodes", + 51044: "FrameRate", + 51058: "TStop", + 51081: "ReelName", + 51089: "OriginalDefaultFinalSize", + 51090: "OriginalBestQualitySize", + 51091: "OriginalDefaultCropSize", + 51105: "CameraLabel", + 51107: "ProfileHueSatMapEncoding", + 51108: "ProfileLookTableEncoding", + 51109: "BaselineExposureOffset", + 51110: "DefaultBlackRender", + 51111: "NewRawImageDigest", + 51112: "RawToPreviewGain", + 51125: "DefaultUserCrop", + 51123: "MicroManagerMetadata", + 59932: "Padding", + 59933: "OffsetSchema", + # Reusable Tags 65000-65535 + # 65000: Dimap_Document XML + # 65000-65112: Photoshop Camera RAW EXIF tags + # 65000: 'OwnerName', + # 65001: 'SerialNumber', + # 65002: 'Lens', + # 65024: 'KDC_IFD', + # 65100: 'RawFile', + # 65101: 'Converter', + # 65102: 'WhiteBalance', + # 65105: 'Exposure', + # 65106: 'Shadows', + # 65107: 'Brightness', + # 65108: 'Contrast', + # 65109: 'Saturation', + # 65110: 'Sharpness', + # 65111: 'Smoothness', + # 65112: 'MoireFilter', + 65200: "FlexXML", # + 65563: "PerSample", + } + + def TAG_NAMES(): + return {v: c for c, v in TIFF.TAGS.items()} + + def TAG_READERS(): + # Map TIFF tag codes to import functions + return { + 320: read_colormap, + # 700: read_bytes, # read_utf8, + # 34377: read_bytes, + 33723: read_bytes, + # 34675: read_bytes, + 33628: read_uic1tag, # Universal Imaging Corp STK + 33629: read_uic2tag, + 33630: read_uic3tag, + 33631: read_uic4tag, + 34118: read_cz_sem, # Carl Zeiss SEM + 34361: read_mm_header, # Olympus FluoView + 34362: read_mm_stamp, + 34363: read_numpy, # MM_Unknown + 34386: read_numpy, # MM_UserBlock + 34412: read_cz_lsminfo, # Carl Zeiss LSM + 34680: read_fei_metadata, # S-FEG + 34682: read_fei_metadata, # Helios NanoLab + 37706: read_tvips_header, # TVIPS EMMENU + 37724: read_bytes, # ImageSourceData + 33923: read_bytes, # read_leica_magic + 43314: read_nih_image_header, + # 40001: read_bytes, + 40100: read_bytes, + 50288: read_bytes, + 50296: read_bytes, + 50839: read_bytes, + 51123: read_json, + 34665: read_exif_ifd, + 34853: read_gps_ifd, + 40965: read_interoperability_ifd, + } + + def TAG_TUPLE(): + # Tags whose values must be stored as tuples + return frozenset((273, 279, 324, 325, 530, 531, 34736)) + + def TAG_ATTRIBUTES(): + # Map tag codes to TiffPage attribute names + return { + "ImageWidth": "imagewidth", + "ImageLength": "imagelength", + "BitsPerSample": "bitspersample", + "Compression": "compression", + "PlanarConfiguration": "planarconfig", + "FillOrder": "fillorder", + "PhotometricInterpretation": "photometric", + "ColorMap": "colormap", + "ImageDescription": "description", + "ImageDescription1": "description1", + "SamplesPerPixel": "samplesperpixel", + "RowsPerStrip": "rowsperstrip", + "Software": "software", + "Predictor": "predictor", + "TileWidth": "tilewidth", + "TileLength": "tilelength", + "ExtraSamples": "extrasamples", + "SampleFormat": "sampleformat", + "ImageDepth": "imagedepth", + "TileDepth": "tiledepth", + } + + def TAG_ENUM(): + return { + # 254: TIFF.FILETYPE, + 255: TIFF.OFILETYPE, + 259: TIFF.COMPRESSION, + 262: TIFF.PHOTOMETRIC, + 263: TIFF.THRESHHOLD, + 266: TIFF.FILLORDER, + 274: TIFF.ORIENTATION, + 284: TIFF.PLANARCONFIG, + 290: TIFF.GRAYRESPONSEUNIT, + # 292: TIFF.GROUP3OPT, + # 293: TIFF.GROUP4OPT, + 296: TIFF.RESUNIT, + 300: TIFF.COLORRESPONSEUNIT, + 317: TIFF.PREDICTOR, + 338: TIFF.EXTRASAMPLE, + 339: TIFF.SAMPLEFORMAT, + # 512: TIFF.JPEGPROC, + # 531: TIFF.YCBCRPOSITION, + } + + def FILETYPE(): + class FILETYPE(enum.IntFlag): + # Python 3.6 only + UNDEFINED = 0 + REDUCEDIMAGE = 1 + PAGE = 2 + MASK = 4 + + return FILETYPE + + def OFILETYPE(): + class OFILETYPE(enum.IntEnum): + UNDEFINED = 0 + IMAGE = 1 + REDUCEDIMAGE = 2 + PAGE = 3 + + return OFILETYPE + + def COMPRESSION(): + class COMPRESSION(enum.IntEnum): + NONE = 1 # Uncompressed + CCITTRLE = 2 # CCITT 1D + CCITT_T4 = 3 # 'T4/Group 3 Fax', + CCITT_T6 = 4 # 'T6/Group 4 Fax', + LZW = 5 + OJPEG = 6 # old-style JPEG + JPEG = 7 + ADOBE_DEFLATE = 8 + JBIG_BW = 9 + JBIG_COLOR = 10 + JPEG_99 = 99 + KODAK_262 = 262 + NEXT = 32766 + SONY_ARW = 32767 + PACKED_RAW = 32769 + SAMSUNG_SRW = 32770 + CCIRLEW = 32771 + SAMSUNG_SRW2 = 32772 + PACKBITS = 32773 + THUNDERSCAN = 32809 + IT8CTPAD = 32895 + IT8LW = 32896 + IT8MP = 32897 + IT8BL = 32898 + PIXARFILM = 32908 + PIXARLOG = 32909 + DEFLATE = 32946 + DCS = 32947 + APERIO_JP2000_YCBC = 33003 # Leica Aperio + APERIO_JP2000_RGB = 33005 # Leica Aperio + JBIG = 34661 + SGILOG = 34676 + SGILOG24 = 34677 + JPEG2000 = 34712 + NIKON_NEF = 34713 + JBIG2 = 34715 + MDI_BINARY = 34718 # 'Microsoft Document Imaging + MDI_PROGRESSIVE = 34719 # 'Microsoft Document Imaging + MDI_VECTOR = 34720 # 'Microsoft Document Imaging + JPEG_LOSSY = 34892 + LZMA = 34925 + ZSTD = 34926 + OPS_PNG = 34933 # Objective Pathology Services + OPS_JPEGXR = 34934 # Objective Pathology Services + PIXTIFF = 50013 + KODAK_DCR = 65000 + PENTAX_PEF = 65535 + # def __bool__(self): return self != 1 # Python 3.6 only + + return COMPRESSION + + def PHOTOMETRIC(): + class PHOTOMETRIC(enum.IntEnum): + MINISWHITE = 0 + MINISBLACK = 1 + RGB = 2 + PALETTE = 3 + MASK = 4 + SEPARATED = 5 # CMYK + YCBCR = 6 + CIELAB = 8 + ICCLAB = 9 + ITULAB = 10 + CFA = 32803 # Color Filter Array + LOGL = 32844 + LOGLUV = 32845 + LINEAR_RAW = 34892 + + return PHOTOMETRIC + + def THRESHHOLD(): + class THRESHHOLD(enum.IntEnum): + BILEVEL = 1 + HALFTONE = 2 + ERRORDIFFUSE = 3 + + return THRESHHOLD + + def FILLORDER(): + class FILLORDER(enum.IntEnum): + MSB2LSB = 1 + LSB2MSB = 2 + + return FILLORDER + + def ORIENTATION(): + class ORIENTATION(enum.IntEnum): + TOPLEFT = 1 + TOPRIGHT = 2 + BOTRIGHT = 3 + BOTLEFT = 4 + LEFTTOP = 5 + RIGHTTOP = 6 + RIGHTBOT = 7 + LEFTBOT = 8 + + return ORIENTATION + + def PLANARCONFIG(): + class PLANARCONFIG(enum.IntEnum): + CONTIG = 1 + SEPARATE = 2 + + return PLANARCONFIG + + def GRAYRESPONSEUNIT(): + class GRAYRESPONSEUNIT(enum.IntEnum): + _10S = 1 + _100S = 2 + _1000S = 3 + _10000S = 4 + _100000S = 5 + + return GRAYRESPONSEUNIT + + def GROUP4OPT(): + class GROUP4OPT(enum.IntEnum): + UNCOMPRESSED = 2 + + return GROUP4OPT + + def RESUNIT(): + class RESUNIT(enum.IntEnum): + NONE = 1 + INCH = 2 + CENTIMETER = 3 + # def __bool__(self): return self != 1 # Python 3.6 only + + return RESUNIT + + def COLORRESPONSEUNIT(): + class COLORRESPONSEUNIT(enum.IntEnum): + _10S = 1 + _100S = 2 + _1000S = 3 + _10000S = 4 + _100000S = 5 + + return COLORRESPONSEUNIT + + def PREDICTOR(): + class PREDICTOR(enum.IntEnum): + NONE = 1 + HORIZONTAL = 2 + FLOATINGPOINT = 3 + # def __bool__(self): return self != 1 # Python 3.6 only + + return PREDICTOR + + def EXTRASAMPLE(): + class EXTRASAMPLE(enum.IntEnum): + UNSPECIFIED = 0 + ASSOCALPHA = 1 + UNASSALPHA = 2 + + return EXTRASAMPLE + + def SAMPLEFORMAT(): + class SAMPLEFORMAT(enum.IntEnum): + UINT = 1 + INT = 2 + IEEEFP = 3 + VOID = 4 + COMPLEXINT = 5 + COMPLEXIEEEFP = 6 + + return SAMPLEFORMAT + + def DATATYPES(): + class DATATYPES(enum.IntEnum): + NOTYPE = 0 + BYTE = 1 + ASCII = 2 + SHORT = 3 + LONG = 4 + RATIONAL = 5 + SBYTE = 6 + UNDEFINED = 7 + SSHORT = 8 + SLONG = 9 + SRATIONAL = 10 + FLOAT = 11 + DOUBLE = 12 + IFD = 13 + UNICODE = 14 + COMPLEX = 15 + LONG8 = 16 + SLONG8 = 17 + IFD8 = 18 + + return DATATYPES + + def DATA_FORMATS(): + # Map TIFF DATATYPES to Python struct formats + return { + 1: "1B", # BYTE 8-bit unsigned integer. + 2: "1s", # ASCII 8-bit byte that contains a 7-bit ASCII code; + # the last byte must be NULL (binary zero). + 3: "1H", # SHORT 16-bit (2-byte) unsigned integer + 4: "1I", # LONG 32-bit (4-byte) unsigned integer. + 5: "2I", # RATIONAL Two LONGs: the first represents the numerator + # of a fraction; the second, the denominator. + 6: "1b", # SBYTE An 8-bit signed (twos-complement) integer. + 7: "1B", # UNDEFINED An 8-bit byte that may contain anything, + # depending on the definition of the field. + 8: "1h", # SSHORT A 16-bit (2-byte) signed (twos-complement) + # integer. + 9: "1i", # SLONG A 32-bit (4-byte) signed (twos-complement) + # integer. + 10: "2i", # SRATIONAL Two SLONGs: the first represents the + # numerator of a fraction, the second the denominator. + 11: "1f", # FLOAT Single precision (4-byte) IEEE format. + 12: "1d", # DOUBLE Double precision (8-byte) IEEE format. + 13: "1I", # IFD unsigned 4 byte IFD offset. + # 14: '', # UNICODE + # 15: '', # COMPLEX + 16: "1Q", # LONG8 unsigned 8 byte integer (BigTiff) + 17: "1q", # SLONG8 signed 8 byte integer (BigTiff) + 18: "1Q", # IFD8 unsigned 8 byte IFD offset (BigTiff) + } + + def DATA_DTYPES(): + # Map numpy dtypes to TIFF DATATYPES + return { + "B": 1, + "s": 2, + "H": 3, + "I": 4, + "2I": 5, + "b": 6, + "h": 8, + "i": 9, + "2i": 10, + "f": 11, + "d": 12, + "Q": 16, + "q": 17, + } + + def SAMPLE_DTYPES(): + # Map TIFF SampleFormats and BitsPerSample to numpy dtype + return { + (1, 1): "?", # bitmap + (1, 2): "B", + (1, 3): "B", + (1, 4): "B", + (1, 5): "B", + (1, 6): "B", + (1, 7): "B", + (1, 8): "B", + (1, 9): "H", + (1, 10): "H", + (1, 11): "H", + (1, 12): "H", + (1, 13): "H", + (1, 14): "H", + (1, 15): "H", + (1, 16): "H", + (1, 17): "I", + (1, 18): "I", + (1, 19): "I", + (1, 20): "I", + (1, 21): "I", + (1, 22): "I", + (1, 23): "I", + (1, 24): "I", + (1, 25): "I", + (1, 26): "I", + (1, 27): "I", + (1, 28): "I", + (1, 29): "I", + (1, 30): "I", + (1, 31): "I", + (1, 32): "I", + (1, 64): "Q", + (2, 8): "b", + (2, 16): "h", + (2, 32): "i", + (2, 64): "q", + (3, 16): "e", + (3, 32): "f", + (3, 64): "d", + (6, 64): "F", + (6, 128): "D", + (1, (5, 6, 5)): "B", + } + + def COMPESSORS(): + # Map COMPRESSION to compress functions and default compression levels + + class Compressors(object): + """Delay import compressor functions.""" + + def __init__(self): + self._compressors = {8: (zlib.compress, 6), 32946: (zlib.compress, 6)} + + def __getitem__(self, key): + if key in self._compressors: + return self._compressors[key] + + if key == 34925: + try: + import lzma # delayed import + except ImportError: + try: + import backports.lzma as lzma # delayed import + except ImportError: + raise KeyError + + def lzma_compress(x, level): + return lzma.compress(x) + + self._compressors[key] = lzma_compress, 0 + return lzma_compress, 0 + + if key == 34926: + try: + import zstd # delayed import + except ImportError: + raise KeyError + self._compressors[key] = zstd.compress, 9 + return zstd.compress, 9 + + raise KeyError + + def __contains__(self, key): + try: + self[key] + return True + except KeyError: + return False + + return Compressors() + + def DECOMPESSORS(): + # Map COMPRESSION to decompress functions + + class Decompressors(object): + """Delay import decompressor functions.""" + + def __init__(self): + self._decompressors = { + None: identityfunc, + 1: identityfunc, + 5: decode_lzw, + 8: zlib.decompress, + 32773: decode_packbits, + 32946: zlib.decompress, + } + + def __getitem__(self, key): + if key in self._decompressors: + return self._decompressors[key] + + if key == 7: + try: + from imagecodecs import jpeg, jpeg_12 + except ImportError: + raise KeyError + + def decode_jpeg(x, table, bps, colorspace=None): + if bps == 8: + return jpeg.decode_jpeg(x, table, colorspace) + elif bps == 12: + return jpeg_12.decode_jpeg_12(x, table, colorspace) + else: + raise ValueError("bitspersample not supported") + + self._decompressors[key] = decode_jpeg + return decode_jpeg + + if key == 34925: + try: + import lzma # delayed import + except ImportError: + try: + import backports.lzma as lzma # delayed import + except ImportError: + raise KeyError + self._decompressors[key] = lzma.decompress + return lzma.decompress + + if key == 34926: + try: + import zstd # delayed import + except ImportError: + raise KeyError + self._decompressors[key] = zstd.decompress + return zstd.decompress + raise KeyError + + def __contains__(self, item): + try: + self[item] + return True + except KeyError: + return False + + return Decompressors() + + def FRAME_ATTRS(): + # Attributes that a TiffFrame shares with its keyframe + return set("shape ndim size dtype axes is_final".split()) + + def FILE_FLAGS(): + # TiffFile and TiffPage 'is_\*' attributes + exclude = set( + "reduced final memmappable contiguous tiled " "chroma_subsampled".split() + ) + return set( + a[3:] for a in dir(TiffPage) if a[:3] == "is_" and a[3:] not in exclude + ) + + def FILE_EXTENSIONS(): + # TIFF file extensions + return tuple( + "tif tiff ome.tif lsm stk qptiff pcoraw " + "gel seq svs bif tf8 tf2 btf".split() + ) + + def FILEOPEN_FILTER(): + # String for use in Windows File Open box + return [ + ("%s files" % ext.upper(), "*.%s" % ext) for ext in TIFF.FILE_EXTENSIONS + ] + [("allfiles", "*")] + + def AXES_LABELS(): + # TODO: is there a standard for character axes labels? + axes = { + "X": "width", + "Y": "height", + "Z": "depth", + "S": "sample", # rgb(a) + "I": "series", # general sequence, plane, page, IFD + "T": "time", + "C": "channel", # color, emission wavelength + "A": "angle", + "P": "phase", # formerly F # P is Position in LSM! + "R": "tile", # region, point, mosaic + "H": "lifetime", # histogram + "E": "lambda", # excitation wavelength + "L": "exposure", # lux + "V": "event", + "Q": "other", + "M": "mosaic", # LSM 6 + } + axes.update(dict((v, k) for k, v in axes.items())) + return axes + + def ANDOR_TAGS(): + # Andor Technology tags #4864 - 5030 + return set(range(4864, 5030)) + + def EXIF_TAGS(): + tags = { + # 65000 - 65112 Photoshop Camera RAW EXIF tags + 65000: "OwnerName", + 65001: "SerialNumber", + 65002: "Lens", + 65100: "RawFile", + 65101: "Converter", + 65102: "WhiteBalance", + 65105: "Exposure", + 65106: "Shadows", + 65107: "Brightness", + 65108: "Contrast", + 65109: "Saturation", + 65110: "Sharpness", + 65111: "Smoothness", + 65112: "MoireFilter", + } + tags.update(TIFF.TAGS) + return tags + + def GPS_TAGS(): + return { + 0: "GPSVersionID", + 1: "GPSLatitudeRef", + 2: "GPSLatitude", + 3: "GPSLongitudeRef", + 4: "GPSLongitude", + 5: "GPSAltitudeRef", + 6: "GPSAltitude", + 7: "GPSTimeStamp", + 8: "GPSSatellites", + 9: "GPSStatus", + 10: "GPSMeasureMode", + 11: "GPSDOP", + 12: "GPSSpeedRef", + 13: "GPSSpeed", + 14: "GPSTrackRef", + 15: "GPSTrack", + 16: "GPSImgDirectionRef", + 17: "GPSImgDirection", + 18: "GPSMapDatum", + 19: "GPSDestLatitudeRef", + 20: "GPSDestLatitude", + 21: "GPSDestLongitudeRef", + 22: "GPSDestLongitude", + 23: "GPSDestBearingRef", + 24: "GPSDestBearing", + 25: "GPSDestDistanceRef", + 26: "GPSDestDistance", + 27: "GPSProcessingMethod", + 28: "GPSAreaInformation", + 29: "GPSDateStamp", + 30: "GPSDifferential", + 31: "GPSHPositioningError", + } + + def IOP_TAGS(): + return { + 1: "InteroperabilityIndex", + 2: "InteroperabilityVersion", + 4096: "RelatedImageFileFormat", + 4097: "RelatedImageWidth", + 4098: "RelatedImageLength", + } + + def GEO_KEYS(): + return { + 1024: "GTModelTypeGeoKey", + 1025: "GTRasterTypeGeoKey", + 1026: "GTCitationGeoKey", + 2048: "GeographicTypeGeoKey", + 2049: "GeogCitationGeoKey", + 2050: "GeogGeodeticDatumGeoKey", + 2051: "GeogPrimeMeridianGeoKey", + 2052: "GeogLinearUnitsGeoKey", + 2053: "GeogLinearUnitSizeGeoKey", + 2054: "GeogAngularUnitsGeoKey", + 2055: "GeogAngularUnitsSizeGeoKey", + 2056: "GeogEllipsoidGeoKey", + 2057: "GeogSemiMajorAxisGeoKey", + 2058: "GeogSemiMinorAxisGeoKey", + 2059: "GeogInvFlatteningGeoKey", + 2060: "GeogAzimuthUnitsGeoKey", + 2061: "GeogPrimeMeridianLongGeoKey", + 2062: "GeogTOWGS84GeoKey", + 3059: "ProjLinearUnitsInterpCorrectGeoKey", # GDAL + 3072: "ProjectedCSTypeGeoKey", + 3073: "PCSCitationGeoKey", + 3074: "ProjectionGeoKey", + 3075: "ProjCoordTransGeoKey", + 3076: "ProjLinearUnitsGeoKey", + 3077: "ProjLinearUnitSizeGeoKey", + 3078: "ProjStdParallel1GeoKey", + 3079: "ProjStdParallel2GeoKey", + 3080: "ProjNatOriginLongGeoKey", + 3081: "ProjNatOriginLatGeoKey", + 3082: "ProjFalseEastingGeoKey", + 3083: "ProjFalseNorthingGeoKey", + 3084: "ProjFalseOriginLongGeoKey", + 3085: "ProjFalseOriginLatGeoKey", + 3086: "ProjFalseOriginEastingGeoKey", + 3087: "ProjFalseOriginNorthingGeoKey", + 3088: "ProjCenterLongGeoKey", + 3089: "ProjCenterLatGeoKey", + 3090: "ProjCenterEastingGeoKey", + 3091: "ProjFalseOriginNorthingGeoKey", + 3092: "ProjScaleAtNatOriginGeoKey", + 3093: "ProjScaleAtCenterGeoKey", + 3094: "ProjAzimuthAngleGeoKey", + 3095: "ProjStraightVertPoleLongGeoKey", + 3096: "ProjRectifiedGridAngleGeoKey", + 4096: "VerticalCSTypeGeoKey", + 4097: "VerticalCitationGeoKey", + 4098: "VerticalDatumGeoKey", + 4099: "VerticalUnitsGeoKey", + } + + def GEO_CODES(): + try: + from .tifffile_geodb import GEO_CODES # delayed import + except (ImportError, ValueError): + try: + from tifffile_geodb import GEO_CODES # delayed import + except (ImportError, ValueError): + GEO_CODES = {} + return GEO_CODES + + def CZ_LSMINFO(): + return [ + ("MagicNumber", "u4"), + ("StructureSize", "i4"), + ("DimensionX", "i4"), + ("DimensionY", "i4"), + ("DimensionZ", "i4"), + ("DimensionChannels", "i4"), + ("DimensionTime", "i4"), + ("DataType", "i4"), # DATATYPES + ("ThumbnailX", "i4"), + ("ThumbnailY", "i4"), + ("VoxelSizeX", "f8"), + ("VoxelSizeY", "f8"), + ("VoxelSizeZ", "f8"), + ("OriginX", "f8"), + ("OriginY", "f8"), + ("OriginZ", "f8"), + ("ScanType", "u2"), + ("SpectralScan", "u2"), + ("TypeOfData", "u4"), # TYPEOFDATA + ("OffsetVectorOverlay", "u4"), + ("OffsetInputLut", "u4"), + ("OffsetOutputLut", "u4"), + ("OffsetChannelColors", "u4"), + ("TimeIntervall", "f8"), + ("OffsetChannelDataTypes", "u4"), + ("OffsetScanInformation", "u4"), # SCANINFO + ("OffsetKsData", "u4"), + ("OffsetTimeStamps", "u4"), + ("OffsetEventList", "u4"), + ("OffsetRoi", "u4"), + ("OffsetBleachRoi", "u4"), + ("OffsetNextRecording", "u4"), + # LSM 2.0 ends here + ("DisplayAspectX", "f8"), + ("DisplayAspectY", "f8"), + ("DisplayAspectZ", "f8"), + ("DisplayAspectTime", "f8"), + ("OffsetMeanOfRoisOverlay", "u4"), + ("OffsetTopoIsolineOverlay", "u4"), + ("OffsetTopoProfileOverlay", "u4"), + ("OffsetLinescanOverlay", "u4"), + ("ToolbarFlags", "u4"), + ("OffsetChannelWavelength", "u4"), + ("OffsetChannelFactors", "u4"), + ("ObjectiveSphereCorrection", "f8"), + ("OffsetUnmixParameters", "u4"), + # LSM 3.2, 4.0 end here + ("OffsetAcquisitionParameters", "u4"), + ("OffsetCharacteristics", "u4"), + ("OffsetPalette", "u4"), + ("TimeDifferenceX", "f8"), + ("TimeDifferenceY", "f8"), + ("TimeDifferenceZ", "f8"), + ("InternalUse1", "u4"), + ("DimensionP", "i4"), + ("DimensionM", "i4"), + ("DimensionsReserved", "16i4"), + ("OffsetTilePositions", "u4"), + ("", "9u4"), # Reserved + ("OffsetPositions", "u4"), + # ('', '21u4'), # must be 0 + ] + + def CZ_LSMINFO_READERS(): + # Import functions for CZ_LSMINFO sub-records + # TODO: read more CZ_LSMINFO sub-records + return { + "ScanInformation": read_lsm_scaninfo, + "TimeStamps": read_lsm_timestamps, + "EventList": read_lsm_eventlist, + "ChannelColors": read_lsm_channelcolors, + "Positions": read_lsm_floatpairs, + "TilePositions": read_lsm_floatpairs, + "VectorOverlay": None, + "InputLut": None, + "OutputLut": None, + "TimeIntervall": None, + "ChannelDataTypes": None, + "KsData": None, + "Roi": None, + "BleachRoi": None, + "NextRecording": None, + "MeanOfRoisOverlay": None, + "TopoIsolineOverlay": None, + "TopoProfileOverlay": None, + "ChannelWavelength": None, + "SphereCorrection": None, + "ChannelFactors": None, + "UnmixParameters": None, + "AcquisitionParameters": None, + "Characteristics": None, + } + + def CZ_LSMINFO_SCANTYPE(): + # Map CZ_LSMINFO.ScanType to dimension order + return { + 0: "XYZCT", # 'Stack' normal x-y-z-scan + 1: "XYZCT", # 'Z-Scan' x-z-plane Y=1 + 2: "XYZCT", # 'Line' + 3: "XYTCZ", # 'Time Series Plane' time series x-y XYCTZ ? Z=1 + 4: "XYZTC", # 'Time Series z-Scan' time series x-z + 5: "XYTCZ", # 'Time Series Mean-of-ROIs' + 6: "XYZTC", # 'Time Series Stack' time series x-y-z + 7: "XYCTZ", # Spline Scan + 8: "XYCZT", # Spline Plane x-z + 9: "XYTCZ", # Time Series Spline Plane x-z + 10: "XYZCT", # 'Time Series Point' point mode + } + + def CZ_LSMINFO_DIMENSIONS(): + # Map dimension codes to CZ_LSMINFO attribute + return { + "X": "DimensionX", + "Y": "DimensionY", + "Z": "DimensionZ", + "C": "DimensionChannels", + "T": "DimensionTime", + "P": "DimensionP", + "M": "DimensionM", + } + + def CZ_LSMINFO_DATATYPES(): + # Description of CZ_LSMINFO.DataType + return { + 0: "varying data types", + 1: "8 bit unsigned integer", + 2: "12 bit unsigned integer", + 5: "32 bit float", + } + + def CZ_LSMINFO_TYPEOFDATA(): + # Description of CZ_LSMINFO.TypeOfData + return { + 0: "Original scan data", + 1: "Calculated data", + 2: "3D reconstruction", + 3: "Topography height map", + } + + def CZ_LSMINFO_SCANINFO_ARRAYS(): + return { + 0x20000000: "Tracks", + 0x30000000: "Lasers", + 0x60000000: "DetectionChannels", + 0x80000000: "IlluminationChannels", + 0xA0000000: "BeamSplitters", + 0xC0000000: "DataChannels", + 0x11000000: "Timers", + 0x13000000: "Markers", + } + + def CZ_LSMINFO_SCANINFO_STRUCTS(): + return { + # 0x10000000: 'Recording', + 0x40000000: "Track", + 0x50000000: "Laser", + 0x70000000: "DetectionChannel", + 0x90000000: "IlluminationChannel", + 0xB0000000: "BeamSplitter", + 0xD0000000: "DataChannel", + 0x12000000: "Timer", + 0x14000000: "Marker", + } + + def CZ_LSMINFO_SCANINFO_ATTRIBUTES(): + return { + # Recording + 0x10000001: "Name", + 0x10000002: "Description", + 0x10000003: "Notes", + 0x10000004: "Objective", + 0x10000005: "ProcessingSummary", + 0x10000006: "SpecialScanMode", + 0x10000007: "ScanType", + 0x10000008: "ScanMode", + 0x10000009: "NumberOfStacks", + 0x1000000A: "LinesPerPlane", + 0x1000000B: "SamplesPerLine", + 0x1000000C: "PlanesPerVolume", + 0x1000000D: "ImagesWidth", + 0x1000000E: "ImagesHeight", + 0x1000000F: "ImagesNumberPlanes", + 0x10000010: "ImagesNumberStacks", + 0x10000011: "ImagesNumberChannels", + 0x10000012: "LinscanXySize", + 0x10000013: "ScanDirection", + 0x10000014: "TimeSeries", + 0x10000015: "OriginalScanData", + 0x10000016: "ZoomX", + 0x10000017: "ZoomY", + 0x10000018: "ZoomZ", + 0x10000019: "Sample0X", + 0x1000001A: "Sample0Y", + 0x1000001B: "Sample0Z", + 0x1000001C: "SampleSpacing", + 0x1000001D: "LineSpacing", + 0x1000001E: "PlaneSpacing", + 0x1000001F: "PlaneWidth", + 0x10000020: "PlaneHeight", + 0x10000021: "VolumeDepth", + 0x10000023: "Nutation", + 0x10000034: "Rotation", + 0x10000035: "Precession", + 0x10000036: "Sample0time", + 0x10000037: "StartScanTriggerIn", + 0x10000038: "StartScanTriggerOut", + 0x10000039: "StartScanEvent", + 0x10000040: "StartScanTime", + 0x10000041: "StopScanTriggerIn", + 0x10000042: "StopScanTriggerOut", + 0x10000043: "StopScanEvent", + 0x10000044: "StopScanTime", + 0x10000045: "UseRois", + 0x10000046: "UseReducedMemoryRois", + 0x10000047: "User", + 0x10000048: "UseBcCorrection", + 0x10000049: "PositionBcCorrection1", + 0x10000050: "PositionBcCorrection2", + 0x10000051: "InterpolationY", + 0x10000052: "CameraBinning", + 0x10000053: "CameraSupersampling", + 0x10000054: "CameraFrameWidth", + 0x10000055: "CameraFrameHeight", + 0x10000056: "CameraOffsetX", + 0x10000057: "CameraOffsetY", + 0x10000059: "RtBinning", + 0x1000005A: "RtFrameWidth", + 0x1000005B: "RtFrameHeight", + 0x1000005C: "RtRegionWidth", + 0x1000005D: "RtRegionHeight", + 0x1000005E: "RtOffsetX", + 0x1000005F: "RtOffsetY", + 0x10000060: "RtZoom", + 0x10000061: "RtLinePeriod", + 0x10000062: "Prescan", + 0x10000063: "ScanDirectionZ", + # Track + 0x40000001: "MultiplexType", # 0 After Line; 1 After Frame + 0x40000002: "MultiplexOrder", + 0x40000003: "SamplingMode", # 0 Sample; 1 Line Avg; 2 Frame Avg + 0x40000004: "SamplingMethod", # 1 Mean; 2 Sum + 0x40000005: "SamplingNumber", + 0x40000006: "Acquire", + 0x40000007: "SampleObservationTime", + 0x4000000B: "TimeBetweenStacks", + 0x4000000C: "Name", + 0x4000000D: "Collimator1Name", + 0x4000000E: "Collimator1Position", + 0x4000000F: "Collimator2Name", + 0x40000010: "Collimator2Position", + 0x40000011: "IsBleachTrack", + 0x40000012: "IsBleachAfterScanNumber", + 0x40000013: "BleachScanNumber", + 0x40000014: "TriggerIn", + 0x40000015: "TriggerOut", + 0x40000016: "IsRatioTrack", + 0x40000017: "BleachCount", + 0x40000018: "SpiCenterWavelength", + 0x40000019: "PixelTime", + 0x40000021: "CondensorFrontlens", + 0x40000023: "FieldStopValue", + 0x40000024: "IdCondensorAperture", + 0x40000025: "CondensorAperture", + 0x40000026: "IdCondensorRevolver", + 0x40000027: "CondensorFilter", + 0x40000028: "IdTransmissionFilter1", + 0x40000029: "IdTransmission1", + 0x40000030: "IdTransmissionFilter2", + 0x40000031: "IdTransmission2", + 0x40000032: "RepeatBleach", + 0x40000033: "EnableSpotBleachPos", + 0x40000034: "SpotBleachPosx", + 0x40000035: "SpotBleachPosy", + 0x40000036: "SpotBleachPosz", + 0x40000037: "IdTubelens", + 0x40000038: "IdTubelensPosition", + 0x40000039: "TransmittedLight", + 0x4000003A: "ReflectedLight", + 0x4000003B: "SimultanGrabAndBleach", + 0x4000003C: "BleachPixelTime", + # Laser + 0x50000001: "Name", + 0x50000002: "Acquire", + 0x50000003: "Power", + # DetectionChannel + 0x70000001: "IntegrationMode", + 0x70000002: "SpecialMode", + 0x70000003: "DetectorGainFirst", + 0x70000004: "DetectorGainLast", + 0x70000005: "AmplifierGainFirst", + 0x70000006: "AmplifierGainLast", + 0x70000007: "AmplifierOffsFirst", + 0x70000008: "AmplifierOffsLast", + 0x70000009: "PinholeDiameter", + 0x7000000A: "CountingTrigger", + 0x7000000B: "Acquire", + 0x7000000C: "PointDetectorName", + 0x7000000D: "AmplifierName", + 0x7000000E: "PinholeName", + 0x7000000F: "FilterSetName", + 0x70000010: "FilterName", + 0x70000013: "IntegratorName", + 0x70000014: "ChannelName", + 0x70000015: "DetectorGainBc1", + 0x70000016: "DetectorGainBc2", + 0x70000017: "AmplifierGainBc1", + 0x70000018: "AmplifierGainBc2", + 0x70000019: "AmplifierOffsetBc1", + 0x70000020: "AmplifierOffsetBc2", + 0x70000021: "SpectralScanChannels", + 0x70000022: "SpiWavelengthStart", + 0x70000023: "SpiWavelengthStop", + 0x70000026: "DyeName", + 0x70000027: "DyeFolder", + # IlluminationChannel + 0x90000001: "Name", + 0x90000002: "Power", + 0x90000003: "Wavelength", + 0x90000004: "Aquire", + 0x90000005: "DetchannelName", + 0x90000006: "PowerBc1", + 0x90000007: "PowerBc2", + # BeamSplitter + 0xB0000001: "FilterSet", + 0xB0000002: "Filter", + 0xB0000003: "Name", + # DataChannel + 0xD0000001: "Name", + 0xD0000003: "Acquire", + 0xD0000004: "Color", + 0xD0000005: "SampleType", + 0xD0000006: "BitsPerSample", + 0xD0000007: "RatioType", + 0xD0000008: "RatioTrack1", + 0xD0000009: "RatioTrack2", + 0xD000000A: "RatioChannel1", + 0xD000000B: "RatioChannel2", + 0xD000000C: "RatioConst1", + 0xD000000D: "RatioConst2", + 0xD000000E: "RatioConst3", + 0xD000000F: "RatioConst4", + 0xD0000010: "RatioConst5", + 0xD0000011: "RatioConst6", + 0xD0000012: "RatioFirstImages1", + 0xD0000013: "RatioFirstImages2", + 0xD0000014: "DyeName", + 0xD0000015: "DyeFolder", + 0xD0000016: "Spectrum", + 0xD0000017: "Acquire", + # Timer + 0x12000001: "Name", + 0x12000002: "Description", + 0x12000003: "Interval", + 0x12000004: "TriggerIn", + 0x12000005: "TriggerOut", + 0x12000006: "ActivationTime", + 0x12000007: "ActivationNumber", + # Marker + 0x14000001: "Name", + 0x14000002: "Description", + 0x14000003: "TriggerIn", + 0x14000004: "TriggerOut", + } + + def NIH_IMAGE_HEADER(): + return [ + ("FileID", "a8"), + ("nLines", "i2"), + ("PixelsPerLine", "i2"), + ("Version", "i2"), + ("OldLutMode", "i2"), + ("OldnColors", "i2"), + ("Colors", "u1", (3, 32)), + ("OldColorStart", "i2"), + ("ColorWidth", "i2"), + ("ExtraColors", "u2", (6, 3)), + ("nExtraColors", "i2"), + ("ForegroundIndex", "i2"), + ("BackgroundIndex", "i2"), + ("XScale", "f8"), + ("Unused2", "i2"), + ("Unused3", "i2"), + ("UnitsID", "i2"), # NIH_UNITS_TYPE + ("p1", [("x", "i2"), ("y", "i2")]), + ("p2", [("x", "i2"), ("y", "i2")]), + ("CurveFitType", "i2"), # NIH_CURVEFIT_TYPE + ("nCoefficients", "i2"), + ("Coeff", "f8", 6), + ("UMsize", "u1"), + ("UM", "a15"), + ("UnusedBoolean", "u1"), + ("BinaryPic", "b1"), + ("SliceStart", "i2"), + ("SliceEnd", "i2"), + ("ScaleMagnification", "f4"), + ("nSlices", "i2"), + ("SliceSpacing", "f4"), + ("CurrentSlice", "i2"), + ("FrameInterval", "f4"), + ("PixelAspectRatio", "f4"), + ("ColorStart", "i2"), + ("ColorEnd", "i2"), + ("nColors", "i2"), + ("Fill1", "3u2"), + ("Fill2", "3u2"), + ("Table", "u1"), # NIH_COLORTABLE_TYPE + ("LutMode", "u1"), # NIH_LUTMODE_TYPE + ("InvertedTable", "b1"), + ("ZeroClip", "b1"), + ("XUnitSize", "u1"), + ("XUnit", "a11"), + ("StackType", "i2"), # NIH_STACKTYPE_TYPE + # ('UnusedBytes', 'u1', 200) + ] + + def NIH_COLORTABLE_TYPE(): + return ( + "CustomTable", + "AppleDefault", + "Pseudo20", + "Pseudo32", + "Rainbow", + "Fire1", + "Fire2", + "Ice", + "Grays", + "Spectrum", + ) + + def NIH_LUTMODE_TYPE(): + return ( + "PseudoColor", + "OldAppleDefault", + "OldSpectrum", + "GrayScale", + "ColorLut", + "CustomGrayscale", + ) + + def NIH_CURVEFIT_TYPE(): + return ( + "StraightLine", + "Poly2", + "Poly3", + "Poly4", + "Poly5", + "ExpoFit", + "PowerFit", + "LogFit", + "RodbardFit", + "SpareFit1", + "Uncalibrated", + "UncalibratedOD", + ) + + def NIH_UNITS_TYPE(): + return ( + "Nanometers", + "Micrometers", + "Millimeters", + "Centimeters", + "Meters", + "Kilometers", + "Inches", + "Feet", + "Miles", + "Pixels", + "OtherUnits", + ) + + def NIH_STACKTYPE_TYPE(): + return ("VolumeStack", "RGBStack", "MovieStack", "HSVStack") + + def TVIPS_HEADER_V1(): + # TVIPS TemData structure from EMMENU Help file + return [ + ("Version", "i4"), + ("CommentV1", "a80"), + ("HighTension", "i4"), + ("SphericalAberration", "i4"), + ("IlluminationAperture", "i4"), + ("Magnification", "i4"), + ("PostMagnification", "i4"), + ("FocalLength", "i4"), + ("Defocus", "i4"), + ("Astigmatism", "i4"), + ("AstigmatismDirection", "i4"), + ("BiprismVoltage", "i4"), + ("SpecimenTiltAngle", "i4"), + ("SpecimenTiltDirection", "i4"), + ("IlluminationTiltDirection", "i4"), + ("IlluminationTiltAngle", "i4"), + ("ImageMode", "i4"), + ("EnergySpread", "i4"), + ("ChromaticAberration", "i4"), + ("ShutterType", "i4"), + ("DefocusSpread", "i4"), + ("CcdNumber", "i4"), + ("CcdSize", "i4"), + ("OffsetXV1", "i4"), + ("OffsetYV1", "i4"), + ("PhysicalPixelSize", "i4"), + ("Binning", "i4"), + ("ReadoutSpeed", "i4"), + ("GainV1", "i4"), + ("SensitivityV1", "i4"), + ("ExposureTimeV1", "i4"), + ("FlatCorrected", "i4"), + ("DeadPxCorrected", "i4"), + ("ImageMean", "i4"), + ("ImageStd", "i4"), + ("DisplacementX", "i4"), + ("DisplacementY", "i4"), + ("DateV1", "i4"), + ("TimeV1", "i4"), + ("ImageMin", "i4"), + ("ImageMax", "i4"), + ("ImageStatisticsQuality", "i4"), + ] + + def TVIPS_HEADER_V2(): + return [ + ("ImageName", "V160"), # utf16 + ("ImageFolder", "V160"), + ("ImageSizeX", "i4"), + ("ImageSizeY", "i4"), + ("ImageSizeZ", "i4"), + ("ImageSizeE", "i4"), + ("ImageDataType", "i4"), + ("Date", "i4"), + ("Time", "i4"), + ("Comment", "V1024"), + ("ImageHistory", "V1024"), + ("Scaling", "16f4"), + ("ImageStatistics", "16c16"), + ("ImageType", "i4"), + ("ImageDisplaType", "i4"), + ("PixelSizeX", "f4"), # distance between two px in x, [nm] + ("PixelSizeY", "f4"), # distance between two px in y, [nm] + ("ImageDistanceZ", "f4"), + ("ImageDistanceE", "f4"), + ("ImageMisc", "32f4"), + ("TemType", "V160"), + ("TemHighTension", "f4"), + ("TemAberrations", "32f4"), + ("TemEnergy", "32f4"), + ("TemMode", "i4"), + ("TemMagnification", "f4"), + ("TemMagnificationCorrection", "f4"), + ("PostMagnification", "f4"), + ("TemStageType", "i4"), + ("TemStagePosition", "5f4"), # x, y, z, a, b + ("TemImageShift", "2f4"), + ("TemBeamShift", "2f4"), + ("TemBeamTilt", "2f4"), + ("TilingParameters", "7f4"), # 0: tiling? 1:x 2:y 3: max x + # 4: max y 5: overlap x 6: overlap y + ("TemIllumination", "3f4"), # 0: spotsize 1: intensity + ("TemShutter", "i4"), + ("TemMisc", "32f4"), + ("CameraType", "V160"), + ("PhysicalPixelSizeX", "f4"), + ("PhysicalPixelSizeY", "f4"), + ("OffsetX", "i4"), + ("OffsetY", "i4"), + ("BinningX", "i4"), + ("BinningY", "i4"), + ("ExposureTime", "f4"), + ("Gain", "f4"), + ("ReadoutRate", "f4"), + ("FlatfieldDescription", "V160"), + ("Sensitivity", "f4"), + ("Dose", "f4"), + ("CamMisc", "32f4"), + ("FeiMicroscopeInformation", "V1024"), + ("FeiSpecimenInformation", "V1024"), + ("Magic", "u4"), + ] + + def MM_HEADER(): + # Olympus FluoView MM_Header + MM_DIMENSION = [ + ("Name", "a16"), + ("Size", "i4"), + ("Origin", "f8"), + ("Resolution", "f8"), + ("Unit", "a64"), + ] + return [ + ("HeaderFlag", "i2"), + ("ImageType", "u1"), + ("ImageName", "a257"), + ("OffsetData", "u4"), + ("PaletteSize", "i4"), + ("OffsetPalette0", "u4"), + ("OffsetPalette1", "u4"), + ("CommentSize", "i4"), + ("OffsetComment", "u4"), + ("Dimensions", MM_DIMENSION, 10), + ("OffsetPosition", "u4"), + ("MapType", "i2"), + ("MapMin", "f8"), + ("MapMax", "f8"), + ("MinValue", "f8"), + ("MaxValue", "f8"), + ("OffsetMap", "u4"), + ("Gamma", "f8"), + ("Offset", "f8"), + ("GrayChannel", MM_DIMENSION), + ("OffsetThumbnail", "u4"), + ("VoiceField", "i4"), + ("OffsetVoiceField", "u4"), + ] + + def MM_DIMENSIONS(): + # Map FluoView MM_Header.Dimensions to axes characters + return { + "X": "X", + "Y": "Y", + "Z": "Z", + "T": "T", + "CH": "C", + "WAVELENGTH": "C", + "TIME": "T", + "XY": "R", + "EVENT": "V", + "EXPOSURE": "L", + } + + def UIC_TAGS(): + # Map Universal Imaging Corporation MetaMorph internal tag ids to + # name and type + from fractions import Fraction # delayed import + + return [ + ("AutoScale", int), + ("MinScale", int), + ("MaxScale", int), + ("SpatialCalibration", int), + ("XCalibration", Fraction), + ("YCalibration", Fraction), + ("CalibrationUnits", str), + ("Name", str), + ("ThreshState", int), + ("ThreshStateRed", int), + ("tagid_10", None), # undefined + ("ThreshStateGreen", int), + ("ThreshStateBlue", int), + ("ThreshStateLo", int), + ("ThreshStateHi", int), + ("Zoom", int), + ("CreateTime", julian_datetime), + ("LastSavedTime", julian_datetime), + ("currentBuffer", int), + ("grayFit", None), + ("grayPointCount", None), + ("grayX", Fraction), + ("grayY", Fraction), + ("grayMin", Fraction), + ("grayMax", Fraction), + ("grayUnitName", str), + ("StandardLUT", int), + ("wavelength", int), + ("StagePosition", "(%i,2,2)u4"), # N xy positions as fract + ("CameraChipOffset", "(%i,2,2)u4"), # N xy offsets as fract + ("OverlayMask", None), + ("OverlayCompress", None), + ("Overlay", None), + ("SpecialOverlayMask", None), + ("SpecialOverlayCompress", None), + ("SpecialOverlay", None), + ("ImageProperty", read_uic_image_property), + ("StageLabel", "%ip"), # N str + ("AutoScaleLoInfo", Fraction), + ("AutoScaleHiInfo", Fraction), + ("AbsoluteZ", "(%i,2)u4"), # N fractions + ("AbsoluteZValid", "(%i,)u4"), # N long + ("Gamma", "I"), # 'I' uses offset + ("GammaRed", "I"), + ("GammaGreen", "I"), + ("GammaBlue", "I"), + ("CameraBin", "2I"), + ("NewLUT", int), + ("ImagePropertyEx", None), + ("PlaneProperty", int), + ("UserLutTable", "(256,3)u1"), + ("RedAutoScaleInfo", int), + ("RedAutoScaleLoInfo", Fraction), + ("RedAutoScaleHiInfo", Fraction), + ("RedMinScaleInfo", int), + ("RedMaxScaleInfo", int), + ("GreenAutoScaleInfo", int), + ("GreenAutoScaleLoInfo", Fraction), + ("GreenAutoScaleHiInfo", Fraction), + ("GreenMinScaleInfo", int), + ("GreenMaxScaleInfo", int), + ("BlueAutoScaleInfo", int), + ("BlueAutoScaleLoInfo", Fraction), + ("BlueAutoScaleHiInfo", Fraction), + ("BlueMinScaleInfo", int), + ("BlueMaxScaleInfo", int), + # ('OverlayPlaneColor', read_uic_overlay_plane_color), + ] + + def PILATUS_HEADER(): + # PILATUS CBF Header Specification, Version 1.4 + # Map key to [value_indices], type + return { + "Detector": ([slice(1, None)], str), + "Pixel_size": ([1, 4], float), + "Silicon": ([3], float), + "Exposure_time": ([1], float), + "Exposure_period": ([1], float), + "Tau": ([1], float), + "Count_cutoff": ([1], int), + "Threshold_setting": ([1], float), + "Gain_setting": ([1, 2], str), + "N_excluded_pixels": ([1], int), + "Excluded_pixels": ([1], str), + "Flat_field": ([1], str), + "Trim_file": ([1], str), + "Image_path": ([1], str), + # optional + "Wavelength": ([1], float), + "Energy_range": ([1, 2], float), + "Detector_distance": ([1], float), + "Detector_Voffset": ([1], float), + "Beam_xy": ([1, 2], float), + "Flux": ([1], str), + "Filter_transmission": ([1], float), + "Start_angle": ([1], float), + "Angle_increment": ([1], float), + "Detector_2theta": ([1], float), + "Polarization": ([1], float), + "Alpha": ([1], float), + "Kappa": ([1], float), + "Phi": ([1], float), + "Phi_increment": ([1], float), + "Chi": ([1], float), + "Chi_increment": ([1], float), + "Oscillation_axis": ([slice(1, None)], str), + "N_oscillations": ([1], int), + "Start_position": ([1], float), + "Position_increment": ([1], float), + "Shutter_time": ([1], float), + "Omega": ([1], float), + "Omega_increment": ([1], float), + } + + def REVERSE_BITORDER_BYTES(): + # Bytes with reversed bitorder + return ( + b"\x00\x80@\xc0 \xa0`\xe0\x10\x90P\xd00\xb0p\xf0\x08\x88H\xc8(" + b"\xa8h\xe8\x18\x98X\xd88\xb8x\xf8\x04\x84D\xc4$\xa4d\xe4\x14" + b"\x94T\xd44\xb4t\xf4\x0c\x8cL\xcc,\xacl\xec\x1c\x9c\\\xdc<\xbc|" + b'\xfc\x02\x82B\xc2"\xa2b\xe2\x12\x92R\xd22\xb2r\xf2\n\x8aJ\xca*' + b"\xaaj\xea\x1a\x9aZ\xda:\xbaz\xfa\x06\x86F\xc6&\xa6f\xe6\x16" + b"\x96V\xd66\xb6v\xf6\x0e\x8eN\xce.\xaen\xee\x1e\x9e^\xde>\xbe~" + b"\xfe\x01\x81A\xc1!\xa1a\xe1\x11\x91Q\xd11\xb1q\xf1\t\x89I\xc9)" + b"\xa9i\xe9\x19\x99Y\xd99\xb9y\xf9\x05\x85E\xc5%\xa5e\xe5\x15" + b"\x95U\xd55\xb5u\xf5\r\x8dM\xcd-\xadm\xed\x1d\x9d]\xdd=\xbd}" + b"\xfd\x03\x83C\xc3#\xa3c\xe3\x13\x93S\xd33\xb3s\xf3\x0b\x8bK" + b"\xcb+\xabk\xeb\x1b\x9b[\xdb;\xbb{\xfb\x07\x87G\xc7'\xa7g\xe7" + b"\x17\x97W\xd77\xb7w\xf7\x0f\x8fO\xcf/\xafo\xef\x1f\x9f_" + b"\xdf?\xbf\x7f\xff" + ) + + def REVERSE_BITORDER_ARRAY(): + # Numpy array of bytes with reversed bitorder + return numpy.frombuffer(TIFF.REVERSE_BITORDER_BYTES, dtype="uint8") + + def ALLOCATIONGRANULARITY(): + # alignment for writing contiguous data to TIFF + import mmap # delayed import + + return mmap.ALLOCATIONGRANULARITY + + +def read_tags(fh, byteorder, offsetsize, tagnames, customtags=None, maxifds=None): + """Read tags from chain of IFDs and return as list of dicts. + + The file handle position must be at a valid IFD header. + + """ + if offsetsize == 4: + offsetformat = byteorder + "I" + tagnosize = 2 + tagnoformat = byteorder + "H" + tagsize = 12 + tagformat1 = byteorder + "HH" + tagformat2 = byteorder + "I4s" + elif offsetsize == 8: + offsetformat = byteorder + "Q" + tagnosize = 8 + tagnoformat = byteorder + "Q" + tagsize = 20 + tagformat1 = byteorder + "HH" + tagformat2 = byteorder + "Q8s" + else: + raise ValueError("invalid offset size") + + if customtags is None: + customtags = {} + if maxifds is None: + maxifds = 2**32 + + result = [] + unpack = struct.unpack + offset = fh.tell() + while len(result) < maxifds: + # loop over IFDs + try: + tagno = unpack(tagnoformat, fh.read(tagnosize))[0] + if tagno > 4096: + raise ValueError("suspicious number of tags") + except Exception: + warnings.warn("corrupted tag list at offset %i" % offset) + break + + tags = {} + data = fh.read(tagsize * tagno) + pos = fh.tell() + index = 0 + for _ in range(tagno): + code, type_ = unpack(tagformat1, data[index : index + 4]) + count, value = unpack(tagformat2, data[index + 4 : index + tagsize]) + index += tagsize + name = tagnames.get(code, str(code)) + try: + dtype = TIFF.DATA_FORMATS[type_] + except KeyError: + raise TiffTag.Error("unknown tag data type %i" % type_) + + fmt = "%s%i%s" % (byteorder, count * int(dtype[0]), dtype[1]) + size = struct.calcsize(fmt) + if size > offsetsize or code in customtags: + offset = unpack(offsetformat, value)[0] + if offset < 8 or offset > fh.size - size: + raise TiffTag.Error("invalid tag value offset %i" % offset) + fh.seek(offset) + if code in customtags: + readfunc = customtags[code][1] + value = readfunc(fh, byteorder, dtype, count, offsetsize) + elif type_ == 7 or (count > 1 and dtype[-1] == "B"): + value = read_bytes(fh, byteorder, dtype, count, offsetsize) + elif code in tagnames or dtype[-1] == "s": + value = unpack(fmt, fh.read(size)) + else: + value = read_numpy(fh, byteorder, dtype, count, offsetsize) + elif dtype[-1] == "B" or type_ == 7: + value = value[:size] + else: + value = unpack(fmt, value[:size]) + + if code not in customtags and code not in TIFF.TAG_TUPLE: + if len(value) == 1: + value = value[0] + if type_ != 7 and dtype[-1] == "s" and isinstance(value, bytes): + # TIFF ASCII fields can contain multiple strings, + # each terminated with a NUL + try: + value = bytes2str(stripascii(value).strip()) + except UnicodeDecodeError: + warnings.warn("tag %i: coercing invalid ASCII to bytes" % code) + + tags[name] = value + + result.append(tags) + # read offset to next page + fh.seek(pos) + offset = unpack(offsetformat, fh.read(offsetsize))[0] + if offset == 0: + break + if offset >= fh.size: + warnings.warn("invalid page offset %i" % offset) + break + fh.seek(offset) + + if result and maxifds == 1: + result = result[0] + return result + + +def read_exif_ifd(fh, byteorder, dtype, count, offsetsize): + """Read EXIF tags from file and return as dict.""" + exif = read_tags(fh, byteorder, offsetsize, TIFF.EXIF_TAGS, maxifds=1) + for name in ("ExifVersion", "FlashpixVersion"): + try: + exif[name] = bytes2str(exif[name]) + except Exception: + pass + if "UserComment" in exif: + idcode = exif["UserComment"][:8] + try: + if idcode == b"ASCII\x00\x00\x00": + exif["UserComment"] = bytes2str(exif["UserComment"][8:]) + elif idcode == b"UNICODE\x00": + exif["UserComment"] = exif["UserComment"][8:].decode("utf-16") + except Exception: + pass + return exif + + +def read_gps_ifd(fh, byteorder, dtype, count, offsetsize): + """Read GPS tags from file and return as dict.""" + return read_tags(fh, byteorder, offsetsize, TIFF.GPS_TAGS, maxifds=1) + + +def read_interoperability_ifd(fh, byteorder, dtype, count, offsetsize): + """Read Interoperability tags from file and return as dict.""" + tag_names = {1: "InteroperabilityIndex"} + return read_tags(fh, byteorder, offsetsize, tag_names, maxifds=1) + + +def read_bytes(fh, byteorder, dtype, count, offsetsize): + """Read tag data from file and return as byte string.""" + dtype = "B" if dtype[-1] == "s" else byteorder + dtype[-1] + count *= numpy.dtype(dtype).itemsize + data = fh.read(count) + if len(data) != count: + warnings.warn("failed to read all bytes: %i, %i" % (len(data), count)) + return data + + +def read_utf8(fh, byteorder, dtype, count, offsetsize): + """Read tag data from file and return as unicode string.""" + return fh.read(count).decode("utf-8") + + +def read_numpy(fh, byteorder, dtype, count, offsetsize): + """Read tag data from file and return as numpy array.""" + dtype = "b" if dtype[-1] == "s" else byteorder + dtype[-1] + return fh.read_array(dtype, count) + + +def read_colormap(fh, byteorder, dtype, count, offsetsize): + """Read ColorMap data from file and return as numpy array.""" + cmap = fh.read_array(byteorder + dtype[-1], count) + cmap.shape = (3, -1) + return cmap + + +def read_json(fh, byteorder, dtype, count, offsetsize): + """Read JSON tag data from file and return as object.""" + data = fh.read(count) + try: + return json.loads(unicode(stripnull(data), "utf-8")) + except ValueError: + warnings.warn("invalid JSON '%s'" % data) + + +def read_mm_header(fh, byteorder, dtype, count, offsetsize): + """Read FluoView mm_header tag from file and return as dict.""" + mmh = fh.read_record(TIFF.MM_HEADER, byteorder=byteorder) + mmh = recarray2dict(mmh) + mmh["Dimensions"] = [ + (bytes2str(d[0]).strip(), d[1], d[2], d[3], bytes2str(d[4]).strip()) + for d in mmh["Dimensions"] + ] + d = mmh["GrayChannel"] + mmh["GrayChannel"] = ( + bytes2str(d[0]).strip(), + d[1], + d[2], + d[3], + bytes2str(d[4]).strip(), + ) + return mmh + + +def read_mm_stamp(fh, byteorder, dtype, count, offsetsize): + """Read FluoView mm_stamp tag from file and return as numpy.ndarray.""" + return fh.read_array(byteorder + "f8", 8) + + +def read_uic1tag(fh, byteorder, dtype, count, offsetsize, planecount=None): + """Read MetaMorph STK UIC1Tag from file and return as dict. + + Return empty dictionary if planecount is unknown. + + """ + assert dtype in ("2I", "1I") and byteorder == "<" + result = {} + if dtype == "2I": + # pre MetaMorph 2.5 (not tested) + values = fh.read_array(" structure_size: + break + lsminfo.append((name, dtype)) + else: + lsminfo = TIFF.CZ_LSMINFO + + lsminfo = fh.read_record(lsminfo, byteorder=byteorder) + lsminfo = recarray2dict(lsminfo) + + # read LSM info subrecords at offsets + for name, reader in TIFF.CZ_LSMINFO_READERS.items(): + if reader is None: + continue + offset = lsminfo.get("Offset" + name, 0) + if offset < 8: + continue + fh.seek(offset) + try: + lsminfo[name] = reader(fh) + except ValueError: + pass + return lsminfo + + +def read_lsm_floatpairs(fh): + """Read LSM sequence of float pairs from file and return as list.""" + size = struct.unpack(" 0: + esize, etime, etype = struct.unpack(" 4: + size = struct.unpack(" 1 else {} + return frame_data, roi_data + + +def read_micromanager_metadata(fh): + """Read MicroManager non-TIFF settings from open file and return as dict. + + The settings can be used to read image data without parsing the TIFF file. + + Raise ValueError if the file does not contain valid MicroManager metadata. + + """ + fh.seek(0) + try: + byteorder = {b"II": "<", b"MM": ">"}[fh.read(2)] + except IndexError: + raise ValueError("not a MicroManager TIFF file") + + result = {} + fh.seek(8) + ( + index_header, + index_offset, + display_header, + display_offset, + comments_header, + comments_offset, + summary_header, + summary_length, + ) = struct.unpack(byteorder + "IIIIIIII", fh.read(32)) + + if summary_header != 2355492: + raise ValueError("invalid MicroManager summary header") + result["Summary"] = read_json(fh, byteorder, None, summary_length, None) + + if index_header != 54773648: + raise ValueError("invalid MicroManager index header") + fh.seek(index_offset) + header, count = struct.unpack(byteorder + "II", fh.read(8)) + if header != 3453623: + raise ValueError("invalid MicroManager index header") + data = struct.unpack(byteorder + "IIIII" * count, fh.read(20 * count)) + result["IndexMap"] = { + "Channel": data[::5], + "Slice": data[1::5], + "Frame": data[2::5], + "Position": data[3::5], + "Offset": data[4::5], + } + + if display_header != 483765892: + raise ValueError("invalid MicroManager display header") + fh.seek(display_offset) + header, count = struct.unpack(byteorder + "II", fh.read(8)) + if header != 347834724: + raise ValueError("invalid MicroManager display header") + result["DisplaySettings"] = read_json(fh, byteorder, None, count, None) + + if comments_header != 99384722: + raise ValueError("invalid MicroManager comments header") + fh.seek(comments_offset) + header, count = struct.unpack(byteorder + "II", fh.read(8)) + if header != 84720485: + raise ValueError("invalid MicroManager comments header") + result["Comments"] = read_json(fh, byteorder, None, count, None) + + return result + + +def read_metaseries_catalog(fh): + """Read MetaSeries non-TIFF hint catalog from file. + + Raise ValueError if the file does not contain a valid hint catalog. + + """ + # TODO: implement read_metaseries_catalog + raise NotImplementedError() + + +def imagej_metadata_tags(metadata, byteorder): + """Return IJMetadata and IJMetadataByteCounts tags from metadata dict. + + The tags can be passed to the TiffWriter.save function as extratags. + + The metadata dict may contain the following keys and values: + + Info : str + Human-readable information as string. + Labels : sequence of str + Human-readable labels for each channel. + Ranges : sequence of doubles + Lower and upper values for each channel. + LUTs : sequence of (3, 256) uint8 ndarrays + Color palettes for each channel. + Plot : bytes + Undocumented ImageJ internal format. + ROI: bytes + Undocumented ImageJ internal region of interest format. + Overlays : bytes + Undocumented ImageJ internal format. + + """ + header = [{">": b"IJIJ", "<": b"JIJI"}[byteorder]] + bytecounts = [0] + body = [] + + def _string(data, byteorder): + return data.encode("utf-16" + {">": "be", "<": "le"}[byteorder]) + + def _doubles(data, byteorder): + return struct.pack(byteorder + ("d" * len(data)), *data) + + def _ndarray(data, byteorder): + return data.tobytes() + + def _bytes(data, byteorder): + return data + + metadata_types = ( + ("Info", b"info", 1, _string), + ("Labels", b"labl", None, _string), + ("Ranges", b"rang", 1, _doubles), + ("LUTs", b"luts", None, _ndarray), + ("Plot", b"plot", 1, _bytes), + ("ROI", b"roi ", 1, _bytes), + ("Overlays", b"over", None, _bytes), + ) + + for key, mtype, count, func in metadata_types: + if key.lower() in metadata: + key = key.lower() + elif key not in metadata: + continue + if byteorder == "<": + mtype = mtype[::-1] + values = metadata[key] + if count is None: + count = len(values) + else: + values = [values] + header.append(mtype + struct.pack(byteorder + "I", count)) + for value in values: + data = func(value, byteorder) + body.append(data) + bytecounts.append(len(data)) + + if not body: + return () + body = b"".join(body) + header = b"".join(header) + data = header + body + bytecounts[0] = len(header) + bytecounts = struct.pack(byteorder + ("I" * len(bytecounts)), *bytecounts) + return ( + (50839, "B", len(data), data, True), + (50838, "I", len(bytecounts) // 4, bytecounts, True), + ) + + +def imagej_metadata(data, bytecounts, byteorder): + """Return IJMetadata tag value as dict. + + The 'Info' string can have multiple formats, e.g. OIF or ScanImage, + that might be parsed into dicts using the matlabstr2py or + oiffile.SettingsFile functions. + + """ + + def _string(data, byteorder): + return data.decode("utf-16" + {">": "be", "<": "le"}[byteorder]) + + def _doubles(data, byteorder): + return struct.unpack(byteorder + ("d" * (len(data) // 8)), data) + + def _lut(data, byteorder): + return numpy.frombuffer(data, "uint8").reshape(-1, 256) + + def _bytes(data, byteorder): + return data + + metadata_types = { # big-endian + b"info": ("Info", _string), + b"labl": ("Labels", _string), + b"rang": ("Ranges", _doubles), + b"luts": ("LUTs", _lut), + b"plot": ("Plots", _bytes), + b"roi ": ("ROI", _bytes), + b"over": ("Overlays", _bytes), + } + metadata_types.update( # little-endian + dict((k[::-1], v) for k, v in metadata_types.items()) + ) + + if not bytecounts: + raise ValueError("no ImageJ metadata") + + if data[:4] not in (b"IJIJ", b"JIJI"): + raise ValueError("invalid ImageJ metadata") + + header_size = bytecounts[0] + if header_size < 12 or header_size > 804: + raise ValueError("invalid ImageJ metadata header size") + + ntypes = (header_size - 4) // 8 + header = struct.unpack(byteorder + "4sI" * ntypes, data[4 : 4 + ntypes * 8]) + pos = 4 + ntypes * 8 + counter = 0 + result = {} + for mtype, count in zip(header[::2], header[1::2]): + values = [] + name, func = metadata_types.get(mtype, (bytes2str(mtype), read_bytes)) + for _ in range(count): + counter += 1 + pos1 = pos + bytecounts[counter] + values.append(func(data[pos:pos1], byteorder)) + pos = pos1 + result[name.strip()] = values[0] if count == 1 else values + return result + + +def imagej_description_metadata(description): + """Return metatata from ImageJ image description as dict. + + Raise ValueError if not a valid ImageJ description. + + >>> description = 'ImageJ=1.11a\\nimages=510\\nhyperstack=true\\n' + >>> imagej_description_metadata(description) # doctest: +SKIP + {'ImageJ': '1.11a', 'images': 510, 'hyperstack': True} + + """ + + def _bool(val): + return {"true": True, "false": False}[val.lower()] + + result = {} + for line in description.splitlines(): + try: + key, val = line.split("=") + except Exception: + continue + key = key.strip() + val = val.strip() + for dtype in (int, float, _bool): + try: + val = dtype(val) + break + except Exception: + pass + result[key] = val + + if "ImageJ" not in result: + raise ValueError("not a ImageJ image description") + return result + + +def imagej_description( + shape, + rgb=None, + colormaped=False, + version="1.11a", + hyperstack=None, + mode=None, + loop=None, + **kwargs, +): + """Return ImageJ image description from data shape. + + ImageJ can handle up to 6 dimensions in order TZCYXS. + + >>> imagej_description((51, 5, 2, 196, 171)) # doctest: +SKIP + ImageJ=1.11a + images=510 + channels=2 + slices=5 + frames=51 + hyperstack=true + mode=grayscale + loop=false + + """ + if colormaped: + raise NotImplementedError("ImageJ colormapping not supported") + shape = imagej_shape(shape, rgb=rgb) + rgb = shape[-1] in (3, 4) + + result = ["ImageJ=%s" % version] + append = [] + result.append("images=%i" % product(shape[:-3])) + if hyperstack is None: + hyperstack = True + append.append("hyperstack=true") + else: + append.append("hyperstack=%s" % bool(hyperstack)) + if shape[2] > 1: + result.append("channels=%i" % shape[2]) + if mode is None and not rgb: + mode = "grayscale" + if hyperstack and mode: + append.append("mode=%s" % mode) + if shape[1] > 1: + result.append("slices=%i" % shape[1]) + if shape[0] > 1: + result.append("frames=%i" % shape[0]) + if loop is None: + append.append("loop=false") + if loop is not None: + append.append("loop=%s" % bool(loop)) + for key, value in kwargs.items(): + append.append("%s=%s" % (key.lower(), value)) + + return "\n".join(result + append + [""]) + + +def imagej_shape(shape, rgb=None): + """Return shape normalized to 6D ImageJ hyperstack TZCYXS. + + Raise ValueError if not a valid ImageJ hyperstack shape. + + >>> imagej_shape((2, 3, 4, 5, 3), False) + (2, 3, 4, 5, 3, 1) + + """ + shape = tuple(int(i) for i in shape) + ndim = len(shape) + if 1 > ndim > 6: + raise ValueError("invalid ImageJ hyperstack: not 2 to 6 dimensional") + if rgb is None: + rgb = shape[-1] in (3, 4) and ndim > 2 + if rgb and shape[-1] not in (3, 4): + raise ValueError("invalid ImageJ hyperstack: not a RGB image") + if not rgb and ndim == 6 and shape[-1] != 1: + raise ValueError("invalid ImageJ hyperstack: not a non-RGB image") + if rgb or shape[-1] == 1: + return (1,) * (6 - ndim) + shape + return (1,) * (5 - ndim) + shape + (1,) + + +def json_description(shape, **metadata): + """Return JSON image description from data shape and other meta data. + + Return UTF-8 encoded JSON. + + >>> json_description((256, 256, 3), axes='YXS') # doctest: +SKIP + b'{"shape": [256, 256, 3], "axes": "YXS"}' + + """ + metadata.update(shape=shape) + return json.dumps(metadata) # .encode('utf-8') + + +def json_description_metadata(description): + """Return metatata from JSON formatted image description as dict. + + Raise ValuError if description is of unknown format. + + >>> description = '{"shape": [256, 256, 3], "axes": "YXS"}' + >>> json_description_metadata(description) # doctest: +SKIP + {'shape': [256, 256, 3], 'axes': 'YXS'} + >>> json_description_metadata('shape=(256, 256, 3)') + {'shape': (256, 256, 3)} + + """ + if description[:6] == "shape=": + # old style 'shaped' description; not JSON + shape = tuple(int(i) for i in description[7:-1].split(",")) + return dict(shape=shape) + if description[:1] == "{" and description[-1:] == "}": + # JSON description + return json.loads(description) + raise ValueError("invalid JSON image description", description) + + +def fluoview_description_metadata(description, ignoresections=None): + """Return metatata from FluoView image description as dict. + + The FluoView image description format is unspecified. Expect failures. + + >>> descr = ('[Intensity Mapping]\\nMap Ch0: Range=00000 to 02047\\n' + ... '[Intensity Mapping End]') + >>> fluoview_description_metadata(descr) + {'Intensity Mapping': {'Map Ch0: Range': '00000 to 02047'}} + + """ + if not description.startswith("["): + raise ValueError("invalid FluoView image description") + if ignoresections is None: + ignoresections = {"Region Info (Fields)", "Protocol Description"} + + result = {} + sections = [result] + comment = False + for line in description.splitlines(): + if not comment: + line = line.strip() + if not line: + continue + if line[0] == "[": + if line[-5:] == " End]": + # close section + del sections[-1] + section = sections[-1] + name = line[1:-5] + if comment: + section[name] = "\n".join(section[name]) + if name[:4] == "LUT ": + a = numpy.array(section[name], dtype="uint8") + a.shape = -1, 3 + section[name] = a + continue + # new section + comment = False + name = line[1:-1] + if name[:4] == "LUT ": + section = [] + elif name in ignoresections: + section = [] + comment = True + else: + section = {} + sections.append(section) + result[name] = section + continue + # add entry + if comment: + section.append(line) + continue + line = line.split("=", 1) + if len(line) == 1: + section[line[0].strip()] = None + continue + key, value = line + if key[:4] == "RGB ": + section.extend(int(rgb) for rgb in value.split()) + else: + section[key.strip()] = astype(value.strip()) + return result + + +def pilatus_description_metadata(description): + """Return metatata from Pilatus image description as dict. + + Return metadata from Pilatus pixel array detectors by Dectris, created + by camserver or TVX software. + + >>> pilatus_description_metadata('# Pixel_size 172e-6 m x 172e-6 m') + {'Pixel_size': (0.000172, 0.000172)} + + """ + result = {} + if not description.startswith("# "): + return result + for c in "#:=,()": + description = description.replace(c, " ") + for line in description.split("\n"): + if line[:2] != " ": + continue + line = line.split() + name = line[0] + if line[0] not in TIFF.PILATUS_HEADER: + try: + result["DateTime"] = datetime.datetime.strptime( + " ".join(line), "%Y-%m-%dT%H %M %S.%f" + ) + except Exception: + result[name] = " ".join(line[1:]) + continue + indices, dtype = TIFF.PILATUS_HEADER[line[0]] + if isinstance(indices[0], slice): + # assumes one slice + values = line[indices[0]] + else: + values = [line[i] for i in indices] + if dtype is float and values[0] == "not": + values = ["NaN"] + values = tuple(dtype(v) for v in values) + if dtype == str: + values = " ".join(values) + elif len(values) == 1: + values = values[0] + result[name] = values + return result + + +def svs_description_metadata(description): + """Return metatata from Aperio image description as dict. + + The Aperio image description format is unspecified. Expect failures. + + >>> svs_description_metadata('Aperio Image Library v1.0') + {'Aperio Image Library': 'v1.0'} + + """ + if not description.startswith("Aperio Image Library "): + raise ValueError("invalid Aperio image description") + result = {} + lines = description.split("\n") + key, value = lines[0].strip().rsplit(None, 1) # 'Aperio Image Library' + result[key.strip()] = value.strip() + if len(lines) == 1: + return result + items = lines[1].split("|") + result[""] = items[0].strip() # TODO: parse this? + for item in items[1:]: + key, value = item.split(" = ") + result[key.strip()] = astype(value.strip()) + return result + + +def stk_description_metadata(description): + """Return metadata from MetaMorph image description as list of dict. + + The MetaMorph image description format is unspecified. Expect failures. + + """ + description = description.strip() + if not description: + return [] + try: + description = bytes2str(description) + except UnicodeDecodeError: + warnings.warn("failed to parse MetaMorph image description") + return [] + result = [] + for plane in description.split("\x00"): + d = {} + for line in plane.split("\r\n"): + line = line.split(":", 1) + if len(line) > 1: + name, value = line + d[name.strip()] = astype(value.strip()) + else: + value = line[0].strip() + if value: + if "" in d: + d[""].append(value) + else: + d[""] = [value] + result.append(d) + return result + + +def metaseries_description_metadata(description): + """Return metatata from MetaSeries image description as dict.""" + if not description.startswith(""): + raise ValueError("invalid MetaSeries image description") + + from xml.etree import cElementTree as etree # delayed import + + root = etree.fromstring(description) + types = {"float": float, "int": int, "bool": lambda x: asbool(x, "on", "off")} + + def parse(root, result): + # recursive + for child in root: + attrib = child.attrib + if not attrib: + result[child.tag] = parse(child, {}) + continue + if "id" in attrib: + i = attrib["id"] + t = attrib["type"] + v = attrib["value"] + if t in types: + result[i] = types[t](v) + else: + result[i] = v + return result + + adict = parse(root, {}) + if "Description" in adict: + adict["Description"] = adict["Description"].replace(" ", "\n") + return adict + + +def scanimage_description_metadata(description): + """Return metatata from ScanImage image description as dict.""" + return matlabstr2py(description) + + +def scanimage_artist_metadata(artist): + """Return metatata from ScanImage artist tag as dict.""" + try: + return json.loads(artist) + except ValueError: + warnings.warn("invalid JSON '%s'" % artist) + + +def _replace_by(module_function, package=__package__, warn=None, prefix="_"): + """Try replace decorated function by module.function.""" + return lambda f: f # imageio: just use what's in here + + def _warn(e, warn): + if warn is None: + warn = "\n Functionality might be degraded or be slow.\n" + elif warn is True: + warn = "" + elif not warn: + return + warnings.warn("%s%s" % (e, warn)) + + try: + from importlib import import_module + except ImportError as e: + _warn(e, warn) + return identityfunc + + def decorate(func, module_function=module_function, warn=warn): + module, function = module_function.split(".") + try: + if package: + module = import_module("." + module, package=package) + else: + module = import_module(module) + except Exception as e: + _warn(e, warn) + return func + try: + func, oldfunc = getattr(module, function), func + except Exception as e: + _warn(e, warn) + return func + globals()[prefix + func.__name__] = oldfunc + return func + + return decorate + + +def decode_floats(data): + """Decode floating point horizontal differencing. + + The TIFF predictor type 3 reorders the bytes of the image values and + applies horizontal byte differencing to improve compression of floating + point images. The ordering of interleaved color channels is preserved. + + Parameters + ---------- + data : numpy.ndarray + The image to be decoded. The dtype must be a floating point. + The shape must include the number of contiguous samples per pixel + even if 1. + + """ + shape = data.shape + dtype = data.dtype + if len(shape) < 3: + raise ValueError("invalid data shape") + if dtype.char not in "dfe": + raise ValueError("not a floating point image") + littleendian = data.dtype.byteorder == "<" or ( + sys.byteorder == "little" and data.dtype.byteorder == "=" + ) + # undo horizontal byte differencing + data = data.view("uint8") + data.shape = shape[:-2] + (-1,) + shape[-1:] + numpy.cumsum(data, axis=-2, dtype="uint8", out=data) + # reorder bytes + if littleendian: + data.shape = shape[:-2] + (-1,) + shape[-2:] + data = numpy.swapaxes(data, -3, -2) + data = numpy.swapaxes(data, -2, -1) + data = data[..., ::-1] + # back to float + data = numpy.ascontiguousarray(data) + data = data.view(dtype) + data.shape = shape + return data + + +@_replace_by("_tifffile.decode_packbits") +def decode_packbits(encoded): + """Decompress PackBits encoded byte string. + + PackBits is a simple byte-oriented run-length compression scheme. + + """ + func = ord if sys.version[0] == "2" else identityfunc + result = [] + result_extend = result.extend + i = 0 + try: + while True: + n = func(encoded[i]) + 1 + i += 1 + if n < 129: + result_extend(encoded[i : i + n]) + i += n + elif n > 129: + result_extend(encoded[i : i + 1] * (258 - n)) + i += 1 + except IndexError: + pass + return b"".join(result) if sys.version[0] == "2" else bytes(result) + + +@_replace_by("_tifffile.decode_lzw") +def decode_lzw(encoded): + """Decompress LZW (Lempel-Ziv-Welch) encoded TIFF strip (byte string). + + The strip must begin with a CLEAR code and end with an EOI code. + + This implementation of the LZW decoding algorithm is described in (1) and + is not compatible with old style LZW compressed files like quad-lzw.tif. + + """ + len_encoded = len(encoded) + bitcount_max = len_encoded * 8 + unpack = struct.unpack + + if sys.version[0] == "2": + newtable = [chr(i) for i in range(256)] + else: + newtable = [bytes([i]) for i in range(256)] + newtable.extend((0, 0)) + + def next_code(): + """Return integer of 'bitw' bits at 'bitcount' position in encoded.""" + start = bitcount // 8 + s = encoded[start : start + 4] + try: + code = unpack(">I", s)[0] + except Exception: + code = unpack(">I", s + b"\x00" * (4 - len(s)))[0] + code <<= bitcount % 8 + code &= mask + return code >> shr + + switchbitch = { # code: bit-width, shr-bits, bit-mask + 255: (9, 23, int(9 * "1" + "0" * 23, 2)), + 511: (10, 22, int(10 * "1" + "0" * 22, 2)), + 1023: (11, 21, int(11 * "1" + "0" * 21, 2)), + 2047: (12, 20, int(12 * "1" + "0" * 20, 2)), + } + bitw, shr, mask = switchbitch[255] + bitcount = 0 + + if len_encoded < 4: + raise ValueError("strip must be at least 4 characters long") + + if next_code() != 256: + raise ValueError("strip must begin with CLEAR code") + + code = 0 + oldcode = 0 + result = [] + result_append = result.append + while True: + code = next_code() # ~5% faster when inlining this function + bitcount += bitw + if code == 257 or bitcount >= bitcount_max: # EOI + break + if code == 256: # CLEAR + table = newtable[:] + table_append = table.append + lentable = 258 + bitw, shr, mask = switchbitch[255] + code = next_code() + bitcount += bitw + if code == 257: # EOI + break + result_append(table[code]) + else: + if code < lentable: + decoded = table[code] + newcode = table[oldcode] + decoded[:1] + else: + newcode = table[oldcode] + newcode += newcode[:1] + decoded = newcode + result_append(decoded) + table_append(newcode) + lentable += 1 + oldcode = code + if lentable in switchbitch: + bitw, shr, mask = switchbitch[lentable] + + if code != 257: + warnings.warn("unexpected end of LZW stream (code %i)" % code) + + return b"".join(result) + + +@_replace_by("_tifffile.unpack_ints") +def unpack_ints(data, dtype, itemsize, runlen=0): + """Decompress byte string to array of integers of any bit size <= 32. + + This Python implementation is slow and only handles itemsizes 1, 2, 4, 8, + 16, 32, and 64. + + Parameters + ---------- + data : byte str + Data to decompress. + dtype : numpy.dtype or str + A numpy boolean or integer type. + itemsize : int + Number of bits per integer. + runlen : int + Number of consecutive integers, after which to start at next byte. + + Examples + -------- + >>> unpack_ints(b'a', 'B', 1) + array([0, 1, 1, 0, 0, 0, 0, 1], dtype=uint8) + >>> unpack_ints(b'ab', 'B', 2) + array([1, 2, 0, 1, 1, 2, 0, 2], dtype=uint8) + + """ + if itemsize == 1: # bitarray + data = numpy.frombuffer(data, "|B") + data = numpy.unpackbits(data) + if runlen % 8: + data = data.reshape(-1, runlen + (8 - runlen % 8)) + data = data[:, :runlen].reshape(-1) + return data.astype(dtype) + + dtype = numpy.dtype(dtype) + if itemsize in (8, 16, 32, 64): + return numpy.frombuffer(data, dtype) + if itemsize not in (1, 2, 4, 8, 16, 32): + raise ValueError("itemsize not supported: %i" % itemsize) + if dtype.kind not in "biu": + raise ValueError("invalid dtype") + + itembytes = next(i for i in (1, 2, 4, 8) if 8 * i >= itemsize) + if itembytes != dtype.itemsize: + raise ValueError("dtype.itemsize too small") + if runlen == 0: + runlen = (8 * len(data)) // itemsize + skipbits = runlen * itemsize % 8 + if skipbits: + skipbits = 8 - skipbits + shrbits = itembytes * 8 - itemsize + bitmask = int(itemsize * "1" + "0" * shrbits, 2) + dtypestr = ">" + dtype.char # dtype always big-endian? + + unpack = struct.unpack + size = runlen * (len(data) * 8 // (runlen * itemsize + skipbits)) + result = numpy.empty((size,), dtype) + bitcount = 0 + for i in range(size): + start = bitcount // 8 + s = data[start : start + itembytes] + try: + code = unpack(dtypestr, s)[0] + except Exception: + code = unpack(dtypestr, s + b"\x00" * (itembytes - len(s)))[0] + code <<= bitcount % 8 + code &= bitmask + result[i] = code >> shrbits + bitcount += itemsize + if (i + 1) % runlen == 0: + bitcount += skipbits + return result + + +def unpack_rgb(data, dtype=">> data = struct.pack('BBBB', 0x21, 0x08, 0xff, 0xff) + >>> print(unpack_rgb(data, '>> print(unpack_rgb(data, '>> print(unpack_rgb(data, '= bits) + data = numpy.frombuffer(data, dtype.byteorder + dt) + result = numpy.empty((data.size, len(bitspersample)), dtype.char) + for i, bps in enumerate(bitspersample): + t = data >> int(numpy.sum(bitspersample[i + 1 :])) + t &= int("0b" + "1" * bps, 2) + if rescale: + o = ((dtype.itemsize * 8) // bps + 1) * bps + if o > data.dtype.itemsize * 8: + t = t.astype("I") + t *= (2**o - 1) // (2**bps - 1) + t //= 2 ** (o - (dtype.itemsize * 8)) + result[:, i] = t + return result.reshape(-1) + + +@_replace_by("_tifffile.reverse_bitorder") +def reverse_bitorder(data): + """Reverse bits in each byte of byte string or numpy array. + + Decode data where pixels with lower column values are stored in the + lower-order bits of the bytes (FillOrder is LSB2MSB). + + Parameters + ---------- + data : byte string or ndarray + The data to be bit reversed. If byte string, a new bit-reversed byte + string is returned. Numpy arrays are bit-reversed in-place. + + Examples + -------- + >>> reverse_bitorder(b'\\x01\\x64') + b'\\x80&' + >>> data = numpy.array([1, 666], dtype='uint16') + >>> reverse_bitorder(data) + >>> data + array([ 128, 16473], dtype=uint16) + + """ + try: + view = data.view("uint8") + numpy.take(TIFF.REVERSE_BITORDER_ARRAY, view, out=view) + except AttributeError: + return data.translate(TIFF.REVERSE_BITORDER_BYTES) + except ValueError: + raise NotImplementedError("slices of arrays not supported") + + +def apply_colormap(image, colormap, contig=True): + """Return palette-colored image. + + The image values are used to index the colormap on axis 1. The returned + image is of shape image.shape+colormap.shape[0] and dtype colormap.dtype. + + Parameters + ---------- + image : numpy.ndarray + Indexes into the colormap. + colormap : numpy.ndarray + RGB lookup table aka palette of shape (3, 2**bits_per_sample). + contig : bool + If True, return a contiguous array. + + Examples + -------- + >>> image = numpy.arange(256, dtype='uint8') + >>> colormap = numpy.vstack([image, image, image]).astype('uint16') * 256 + >>> apply_colormap(image, colormap)[-1] + array([65280, 65280, 65280], dtype=uint16) + + """ + image = numpy.take(colormap, image, axis=1) + image = numpy.rollaxis(image, 0, image.ndim) + if contig: + image = numpy.ascontiguousarray(image) + return image + + +def reorient(image, orientation): + """Return reoriented view of image array. + + Parameters + ---------- + image : numpy.ndarray + Non-squeezed output of asarray() functions. + Axes -3 and -2 must be image length and width respectively. + orientation : int or str + One of TIFF.ORIENTATION names or values. + + """ + ORIENTATION = TIFF.ORIENTATION + orientation = enumarg(ORIENTATION, orientation) + + if orientation == ORIENTATION.TOPLEFT: + return image + elif orientation == ORIENTATION.TOPRIGHT: + return image[..., ::-1, :] + elif orientation == ORIENTATION.BOTLEFT: + return image[..., ::-1, :, :] + elif orientation == ORIENTATION.BOTRIGHT: + return image[..., ::-1, ::-1, :] + elif orientation == ORIENTATION.LEFTTOP: + return numpy.swapaxes(image, -3, -2) + elif orientation == ORIENTATION.RIGHTTOP: + return numpy.swapaxes(image, -3, -2)[..., ::-1, :] + elif orientation == ORIENTATION.RIGHTBOT: + return numpy.swapaxes(image, -3, -2)[..., ::-1, :, :] + elif orientation == ORIENTATION.LEFTBOT: + return numpy.swapaxes(image, -3, -2)[..., ::-1, ::-1, :] + + +def repeat_nd(a, repeats): + """Return read-only view into input array with elements repeated. + + Zoom nD image by integer factors using nearest neighbor interpolation + (box filter). + + Parameters + ---------- + a : array_like + Input array. + repeats : sequence of int + The number of repetitions to apply along each dimension of input array. + + Example + ------- + >>> repeat_nd([[1, 2], [3, 4]], (2, 2)) + array([[1, 1, 2, 2], + [1, 1, 2, 2], + [3, 3, 4, 4], + [3, 3, 4, 4]]) + + """ + a = numpy.asarray(a) + reshape = [] + shape = [] + strides = [] + for i, j, k in zip(a.strides, a.shape, repeats): + shape.extend((j, k)) + strides.extend((i, 0)) + reshape.append(j * k) + return numpy.lib.stride_tricks.as_strided( + a, shape, strides, writeable=False + ).reshape(reshape) + + +def reshape_nd(data_or_shape, ndim): + """Return image array or shape with at least ndim dimensions. + + Prepend 1s to image shape as necessary. + + >>> reshape_nd(numpy.empty(0), 1).shape + (0,) + >>> reshape_nd(numpy.empty(1), 2).shape + (1, 1) + >>> reshape_nd(numpy.empty((2, 3)), 3).shape + (1, 2, 3) + >>> reshape_nd(numpy.empty((3, 4, 5)), 3).shape + (3, 4, 5) + >>> reshape_nd((2, 3), 3) + (1, 2, 3) + + """ + is_shape = isinstance(data_or_shape, tuple) + shape = data_or_shape if is_shape else data_or_shape.shape + if len(shape) >= ndim: + return data_or_shape + shape = (1,) * (ndim - len(shape)) + shape + return shape if is_shape else data_or_shape.reshape(shape) + + +def squeeze_axes(shape, axes, skip="XY"): + """Return shape and axes with single-dimensional entries removed. + + Remove unused dimensions unless their axes are listed in 'skip'. + + >>> squeeze_axes((5, 1, 2, 1, 1), 'TZYXC') + ((5, 2, 1), 'TYX') + + """ + if len(shape) != len(axes): + raise ValueError("dimensions of axes and shape do not match") + shape, axes = zip(*(i for i in zip(shape, axes) if i[0] > 1 or i[1] in skip)) + return tuple(shape), "".join(axes) + + +def transpose_axes(image, axes, asaxes="CTZYX"): + """Return image with its axes permuted to match specified axes. + + A view is returned if possible. + + >>> transpose_axes(numpy.zeros((2, 3, 4, 5)), 'TYXC', asaxes='CTZYX').shape + (5, 2, 1, 3, 4) + + """ + for ax in axes: + if ax not in asaxes: + raise ValueError("unknown axis %s" % ax) + # add missing axes to image + shape = image.shape + for ax in reversed(asaxes): + if ax not in axes: + axes = ax + axes + shape = (1,) + shape + image = image.reshape(shape) + # transpose axes + image = image.transpose([axes.index(ax) for ax in asaxes]) + return image + + +def reshape_axes(axes, shape, newshape, unknown="Q"): + """Return axes matching new shape. + + Unknown dimensions are labelled 'Q'. + + >>> reshape_axes('YXS', (219, 301, 1), (219, 301)) + 'YX' + >>> reshape_axes('IYX', (12, 219, 301), (3, 4, 219, 1, 301, 1)) + 'QQYQXQ' + + """ + shape = tuple(shape) + newshape = tuple(newshape) + if len(axes) != len(shape): + raise ValueError("axes do not match shape") + + size = product(shape) + newsize = product(newshape) + if size != newsize: + raise ValueError("cannot reshape %s to %s" % (shape, newshape)) + if not axes or not newshape: + return "" + + lendiff = max(0, len(shape) - len(newshape)) + if lendiff: + newshape = newshape + (1,) * lendiff + + i = len(shape) - 1 + prodns = 1 + prods = 1 + result = [] + for ns in newshape[::-1]: + prodns *= ns + while i > 0 and shape[i] == 1 and ns != 1: + i -= 1 + if ns == shape[i] and prodns == prods * shape[i]: + prods *= shape[i] + result.append(axes[i]) + i -= 1 + else: + result.append(unknown) + + return "".join(reversed(result[lendiff:])) + + +def stack_pages(pages, out=None, maxworkers=1, *args, **kwargs): + """Read data from sequence of TiffPage and stack them vertically. + + Additional parameters are passed to the TiffPage.asarray function. + + """ + npages = len(pages) + if npages == 0: + raise ValueError("no pages") + + if npages == 1: + return pages[0].asarray(out=out, *args, **kwargs) + + page0 = next(p for p in pages if p is not None) + page0.asarray(validate=None) # ThreadPoolExecutor swallows exceptions + shape = (npages,) + page0.keyframe.shape + dtype = page0.keyframe.dtype + out = create_output(out, shape, dtype) + + if maxworkers is None: + maxworkers = multiprocessing.cpu_count() // 2 + page0.parent.filehandle.lock = maxworkers > 1 + + filecache = OpenFileCache( + size=max(4, maxworkers), lock=page0.parent.filehandle.lock + ) + + def func(page, index, out=out, filecache=filecache, args=args, kwargs=kwargs): + """Read, decode, and copy page data.""" + if page is not None: + filecache.open(page.parent.filehandle) + out[index] = page.asarray( + lock=filecache.lock, reopen=False, validate=False, *args, **kwargs + ) + filecache.close(page.parent.filehandle) + + if maxworkers < 2: + for i, page in enumerate(pages): + func(page, i) + else: + with concurrent.futures.ThreadPoolExecutor(maxworkers) as executor: + executor.map(func, pages, range(npages)) + + filecache.clear() + page0.parent.filehandle.lock = None + + return out + + +def clean_offsets_counts(offsets, counts): + """Return cleaned offsets and byte counts. + + Remove zero offsets and counts. Use to sanitize _offsets and _bytecounts + tag values for strips or tiles. + + """ + offsets = list(offsets) + counts = list(counts) + assert len(offsets) == len(counts) + j = 0 + for i, (o, b) in enumerate(zip(offsets, counts)): + if o > 0 and b > 0: + if i > j: + offsets[j] = o + counts[j] = b + j += 1 + elif b > 0 and o <= 0: + raise ValueError("invalid offset") + else: + warnings.warn("empty byte count") + if j == 0: + j = 1 + return offsets[:j], counts[:j] + + +def buffered_read(fh, lock, offsets, bytecounts, buffersize=2**26): + """Return iterator over blocks read from file.""" + length = len(offsets) + i = 0 + while i < length: + data = [] + with lock: + size = 0 + while size < buffersize and i < length: + fh.seek(offsets[i]) + bytecount = bytecounts[i] + data.append(fh.read(bytecount)) + size += bytecount + i += 1 + for block in data: + yield block + + +def create_output(out, shape, dtype, mode="w+", suffix=".memmap"): + """Return numpy array where image data of shape and dtype can be copied. + + The 'out' parameter may have the following values or types: + + None + An empty array of shape and dtype is created and returned. + numpy.ndarray + An existing writable array of compatible dtype and shape. A view of + the same array is returned after verification. + 'memmap' or 'memmap:tempdir' + A memory-map to an array stored in a temporary binary file on disk + is created and returned. + str or open file + The file name or file object used to create a memory-map to an array + stored in a binary file on disk. The created memory-mapped array is + returned. + + """ + if out is None: + return numpy.zeros(shape, dtype) + if isinstance(out, str) and out[:6] == "memmap": + tempdir = out[7:] if len(out) > 7 else None + with tempfile.NamedTemporaryFile(dir=tempdir, suffix=suffix) as fh: + return numpy.memmap(fh, shape=shape, dtype=dtype, mode=mode) + if isinstance(out, numpy.ndarray): + if product(shape) != product(out.shape): + raise ValueError("incompatible output shape") + if not numpy.can_cast(dtype, out.dtype): + raise ValueError("incompatible output dtype") + return out.reshape(shape) + if isinstance(out, pathlib.Path): + out = str(out) + return numpy.memmap(out, shape=shape, dtype=dtype, mode=mode) + + +def matlabstr2py(string): + """Return Python object from Matlab string representation. + + Return str, bool, int, float, list (Matlab arrays or cells), or + dict (Matlab structures) types. + + Use to access ScanImage metadata. + + >>> matlabstr2py('1') + 1 + >>> matlabstr2py("['x y z' true false; 1 2.0 -3e4; NaN Inf @class]") + [['x y z', True, False], [1, 2.0, -30000.0], [nan, inf, '@class']] + >>> d = matlabstr2py("SI.hChannels.channelType = {'stripe' 'stripe'}\\n" + ... "SI.hChannels.channelsActive = 2") + >>> d['SI.hChannels.channelType'] + ['stripe', 'stripe'] + + """ + # TODO: handle invalid input + # TODO: review unboxing of multidimensional arrays + + def lex(s): + # return sequence of tokens from matlab string representation + tokens = ["["] + while True: + t, i = next_token(s) + if t is None: + break + if t == ";": + tokens.extend(("]", "[")) + elif t == "[": + tokens.extend(("[", "[")) + elif t == "]": + tokens.extend(("]", "]")) + else: + tokens.append(t) + s = s[i:] + tokens.append("]") + return tokens + + def next_token(s): + # return next token in matlab string + length = len(s) + if length == 0: + return None, 0 + i = 0 + while i < length and s[i] == " ": + i += 1 + if i == length: + return None, i + if s[i] in "{[;]}": + return s[i], i + 1 + if s[i] == "'": + j = i + 1 + while j < length and s[j] != "'": + j += 1 + return s[i : j + 1], j + 1 + if s[i] == "<": + j = i + 1 + while j < length and s[j] != ">": + j += 1 + return s[i : j + 1], j + 1 + j = i + while j < length and s[j] not in " {[;]}": + j += 1 + return s[i:j], j + + def value(s, fail=False): + # return Python value of token + s = s.strip() + if not s: + return s + if len(s) == 1: + try: + return int(s) + except Exception: + if fail: + raise ValueError() + return s + if s[0] == "'": + if fail and s[-1] != "'" or "'" in s[1:-1]: + raise ValueError() + return s[1:-1] + if s[0] == "<": + if fail and s[-1] != ">" or "<" in s[1:-1]: + raise ValueError() + return s + if fail and any(i in s for i in " ';[]{}"): + raise ValueError() + if s[0] == "@": + return s + if s in ("true", "True"): + return True + if s in ("false", "False"): + return False + if s[:6] == "zeros(": + return numpy.zeros([int(i) for i in s[6:-1].split(",")]).tolist() + if s[:5] == "ones(": + return numpy.ones([int(i) for i in s[5:-1].split(",")]).tolist() + if "." in s or "e" in s: + try: + return float(s) + except Exception: + pass + try: + return int(s) + except Exception: + pass + try: + return float(s) # nan, inf + except Exception: + if fail: + raise ValueError() + return s + + def parse(s): + # return Python value from string representation of Matlab value + s = s.strip() + try: + return value(s, fail=True) + except ValueError: + pass + result = add2 = [] + levels = [add2] + for t in lex(s): + if t in "[{": + add2 = [] + levels.append(add2) + elif t in "]}": + x = levels.pop() + if len(x) == 1 and isinstance(x[0], (list, str)): + x = x[0] + add2 = levels[-1] + add2.append(x) + else: + add2.append(value(t)) + if len(result) == 1 and isinstance(result[0], (list, str)): + result = result[0] + return result + + if "\r" in string or "\n" in string: + # structure + d = {} + for line in string.splitlines(): + line = line.strip() + if not line or line[0] == "%": + continue + k, v = line.split("=", 1) + k = k.strip() + if any(c in k for c in " ';[]{}<>"): + continue + d[k] = parse(v) + return d + return parse(string) + + +def stripnull(string, null=b"\x00"): + """Return string truncated at first null character. + + Clean NULL terminated C strings. For unicode strings use null='\\0'. + + >>> stripnull(b'string\\x00') + b'string' + >>> stripnull('string\\x00', null='\\0') + 'string' + + """ + i = string.find(null) + return string if (i < 0) else string[:i] + + +def stripascii(string): + """Return string truncated at last byte that is 7-bit ASCII. + + Clean NULL separated and terminated TIFF strings. + + >>> stripascii(b'string\\x00string\\n\\x01\\x00') + b'string\\x00string\\n' + >>> stripascii(b'\\x00') + b'' + + """ + # TODO: pythonize this + i = len(string) + while i: + i -= 1 + if 8 < byte2int(string[i]) < 127: + break + else: + i = -1 + return string[: i + 1] + + +def asbool(value, true=(b"true", "true"), false=(b"false", "false")): + """Return string as bool if possible, else raise TypeError. + + >>> asbool(b' False ') + False + + """ + value = value.strip().lower() + if value in true: # might raise UnicodeWarning/BytesWarning + return True + if value in false: + return False + raise TypeError() + + +def astype(value, types=None): + """Return argument as one of types if possible. + + >>> astype('42') + 42 + >>> astype('3.14') + 3.14 + >>> astype('True') + True + >>> astype(b'Neee-Wom') + 'Neee-Wom' + + """ + if types is None: + types = int, float, asbool, bytes2str + for typ in types: + try: + return typ(value) + except (ValueError, AttributeError, TypeError, UnicodeEncodeError): + pass + return value + + +def format_size(size, threshold=1536): + """Return file size as string from byte size. + + >>> format_size(1234) + '1234 B' + >>> format_size(12345678901) + '11.50 GiB' + + """ + if size < threshold: + return "%i B" % size + for unit in ("KiB", "MiB", "GiB", "TiB", "PiB"): + size /= 1024.0 + if size < threshold: + return "%.2f %s" % (size, unit) + + +def identityfunc(arg): + """Single argument identity function. + + >>> identityfunc('arg') + 'arg' + + """ + return arg + + +def nullfunc(*args, **kwargs): + """Null function. + + >>> nullfunc('arg', kwarg='kwarg') + + """ + return + + +def sequence(value): + """Return tuple containing value if value is not a sequence. + + >>> sequence(1) + (1,) + >>> sequence([1]) + [1] + + """ + try: + len(value) + return value + except TypeError: + return (value,) + + +def product(iterable): + """Return product of sequence of numbers. + + Equivalent of functools.reduce(operator.mul, iterable, 1). + Multiplying numpy integers might overflow. + + >>> product([2**8, 2**30]) + 274877906944 + >>> product([]) + 1 + + """ + prod = 1 + for i in iterable: + prod *= i + return prod + + +def natural_sorted(iterable): + """Return human sorted list of strings. + + E.g. for sorting file names. + + >>> natural_sorted(['f1', 'f2', 'f10']) + ['f1', 'f2', 'f10'] + + """ + + def sortkey(x): + return [(int(c) if c.isdigit() else c) for c in re.split(numbers, x)] + + numbers = re.compile(r"(\d+)") + return sorted(iterable, key=sortkey) + + +def excel_datetime(timestamp, epoch=datetime.datetime.fromordinal(693594)): + """Return datetime object from timestamp in Excel serial format. + + Convert LSM time stamps. + + >>> excel_datetime(40237.029999999795) + datetime.datetime(2010, 2, 28, 0, 43, 11, 999982) + + """ + return epoch + datetime.timedelta(timestamp) + + +def julian_datetime(julianday, milisecond=0): + """Return datetime from days since 1/1/4713 BC and ms since midnight. + + Convert Julian dates according to MetaMorph. + + >>> julian_datetime(2451576, 54362783) + datetime.datetime(2000, 2, 2, 15, 6, 2, 783) + + """ + if julianday <= 1721423: + # no datetime before year 1 + return None + + a = julianday + 1 + if a > 2299160: + alpha = math.trunc((a - 1867216.25) / 36524.25) + a += 1 + alpha - alpha // 4 + b = a + (1524 if a > 1721423 else 1158) + c = math.trunc((b - 122.1) / 365.25) + d = math.trunc(365.25 * c) + e = math.trunc((b - d) / 30.6001) + + day = b - d - math.trunc(30.6001 * e) + month = e - (1 if e < 13.5 else 13) + year = c - (4716 if month > 2.5 else 4715) + + hour, milisecond = divmod(milisecond, 1000 * 60 * 60) + minute, milisecond = divmod(milisecond, 1000 * 60) + second, milisecond = divmod(milisecond, 1000) + + return datetime.datetime(year, month, day, hour, minute, second, milisecond) + + +def byteorder_isnative(byteorder): + """Return if byteorder matches the system's byteorder. + + >>> byteorder_isnative('=') + True + + """ + if byteorder == "=" or byteorder == sys.byteorder: + return True + keys = {"big": ">", "little": "<"} + return keys.get(byteorder, byteorder) == keys[sys.byteorder] + + +def recarray2dict(recarray): + """Return numpy.recarray as dict.""" + # TODO: subarrays + result = {} + for descr, value in zip(recarray.dtype.descr, recarray): + name, dtype = descr[:2] + if dtype[1] == "S": + value = bytes2str(stripnull(value)) + elif value.ndim < 2: + value = value.tolist() + result[name] = value + return result + + +def xml2dict(xml, sanitize=True, prefix=None): + """Return XML as dict. + + >>> xml2dict('1') + {'root': {'key': 1, 'attr': 'name'}} + + """ + from xml.etree import cElementTree as etree # delayed import + + at = tx = "" + if prefix: + at, tx = prefix + + def astype(value): + # return value as int, float, bool, or str + for t in (int, float, asbool): + try: + return t(value) + except Exception: + pass + return value + + def etree2dict(t): + # adapted from https://stackoverflow.com/a/10077069/453463 + key = t.tag + if sanitize: + key = key.rsplit("}", 1)[-1] + d = {key: {} if t.attrib else None} + children = list(t) + if children: + dd = collections.defaultdict(list) + for dc in map(etree2dict, children): + for k, v in dc.items(): + dd[k].append(astype(v)) + d = { + key: { + k: astype(v[0]) if len(v) == 1 else astype(v) for k, v in dd.items() + } + } + if t.attrib: + d[key].update((at + k, astype(v)) for k, v in t.attrib.items()) + if t.text: + text = t.text.strip() + if children or t.attrib: + if text: + d[key][tx + "value"] = astype(text) + else: + d[key] = astype(text) + return d + + return etree2dict(etree.fromstring(xml)) + + +def hexdump(bytestr, width=75, height=24, snipat=-2, modulo=2, ellipsis="..."): + """Return hexdump representation of byte string. + + >>> hexdump(binascii.unhexlify('49492a00080000000e00fe0004000100')) + '49 49 2a 00 08 00 00 00 0e 00 fe 00 04 00 01 00 II*.............' + + """ + size = len(bytestr) + if size < 1 or width < 2 or height < 1: + return "" + if height == 1: + addr = b"" + bytesperline = min(modulo * (((width - len(addr)) // 4) // modulo), size) + if bytesperline < 1: + return "" + nlines = 1 + else: + addr = b"%%0%ix: " % len(b"%x" % size) + bytesperline = min(modulo * (((width - len(addr % 1)) // 4) // modulo), size) + if bytesperline < 1: + return "" + width = 3 * bytesperline + len(addr % 1) + nlines = (size - 1) // bytesperline + 1 + + if snipat is None or snipat == 1: + snipat = height + elif 0 < abs(snipat) < 1: + snipat = int(math.floor(height * snipat)) + if snipat < 0: + snipat += height + + if height == 1 or nlines == 1: + blocks = [(0, bytestr[:bytesperline])] + addr = b"" + height = 1 + width = 3 * bytesperline + elif height is None or nlines <= height: + blocks = [(0, bytestr)] + elif snipat <= 0: + start = bytesperline * (nlines - height) + blocks = [(start, bytestr[start:])] # (start, None) + elif snipat >= height or height < 3: + end = bytesperline * height + blocks = [(0, bytestr[:end])] # (end, None) + else: + end1 = bytesperline * snipat + end2 = bytesperline * (height - snipat - 1) + blocks = [ + (0, bytestr[:end1]), + (size - end1 - end2, None), + (size - end2, bytestr[size - end2 :]), + ] + + ellipsis = str2bytes(ellipsis) + result = [] + for start, bytestr in blocks: + if bytestr is None: + result.append(ellipsis) # 'skip %i bytes' % start) + continue + hexstr = binascii.hexlify(bytestr) + strstr = re.sub(rb"[^\x20-\x7f]", b".", bytestr) + for i in range(0, len(bytestr), bytesperline): + h = hexstr[2 * i : 2 * i + bytesperline * 2] + r = (addr % (i + start)) if height > 1 else addr + r += b" ".join(h[i : i + 2] for i in range(0, 2 * bytesperline, 2)) + r += b" " * (width - len(r)) + r += strstr[i : i + bytesperline] + result.append(r) + result = b"\n".join(result) + if sys.version_info[0] == 3: + result = result.decode("ascii") + return result + + +def isprintable(string): + """Return if all characters in string are printable. + + >>> isprintable('abc') + True + >>> isprintable(b'\01') + False + + """ + string = string.strip() + if len(string) < 1: + return True + if sys.version_info[0] == 3: + try: + return string.isprintable() + except Exception: + pass + try: + return string.decode("utf-8").isprintable() + except Exception: + pass + else: + if string.isalnum(): + return True + printable = ( + "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRST" + "UVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c" + ) + return all(c in printable for c in string) + + +def clean_whitespace(string, compact=False): + """Return string with compressed whitespace.""" + for a, b in ( + ("\r\n", "\n"), + ("\r", "\n"), + ("\n\n", "\n"), + ("\t", " "), + (" ", " "), + ): + string = string.replace(a, b) + if compact: + for a, b in (("\n", " "), ("[ ", "["), (" ", " "), (" ", " "), (" ", " ")): + string = string.replace(a, b) + return string.strip() + + +def pformat_xml(xml): + """Return pretty formatted XML.""" + try: + import lxml.etree as etree # delayed import + + if not isinstance(xml, bytes): + xml = xml.encode("utf-8") + xml = etree.parse(io.BytesIO(xml)) + xml = etree.tostring( + xml, pretty_print=True, xml_declaration=True, encoding=xml.docinfo.encoding + ) + xml = bytes2str(xml) + except Exception: + if isinstance(xml, bytes): + xml = bytes2str(xml) + xml = xml.replace("><", ">\n<") + return xml.replace(" ", " ").replace("\t", " ") + + +def pformat(arg, width=79, height=24, compact=True): + """Return pretty formatted representation of object as string. + + Whitespace might be altered. + + """ + if height is None or height < 1: + height = 1024 + if width is None or width < 1: + width = 256 + + npopt = numpy.get_printoptions() + numpy.set_printoptions(threshold=100, linewidth=width) + + if isinstance(arg, basestring): + if arg[:5].lower() in (" height: + arg = "\n".join(argl[: height // 2] + ["..."] + argl[-height // 2 :]) + return arg + + +def snipstr(string, width=79, snipat=0.5, ellipsis="..."): + """Return string cut to specified length. + + >>> snipstr('abcdefghijklmnop', 8) + 'abc...op' + + """ + if ellipsis is None: + if isinstance(string, bytes): + ellipsis = b"..." + else: + ellipsis = "\u2026" # does not print on win-py3.5 + esize = len(ellipsis) + + splitlines = string.splitlines() + # TODO: finish and test multiline snip + + result = [] + for line in splitlines: + if line is None: + result.append(ellipsis) + continue + linelen = len(line) + if linelen <= width: + result.append(string) + continue + + split = snipat + if split is None or split == 1: + split = linelen + elif 0 < abs(split) < 1: + split = int(math.floor(linelen * split)) + if split < 0: + split += linelen + if split < 0: + split = 0 + + if esize == 0 or width < esize + 1: + if split <= 0: + result.append(string[-width:]) + else: + result.append(string[:width]) + elif split <= 0: + result.append(ellipsis + string[esize - width :]) + elif split >= linelen or width < esize + 4: + result.append(string[: width - esize] + ellipsis) + else: + splitlen = linelen - width + esize + end1 = split - splitlen // 2 + end2 = end1 + splitlen + result.append(string[:end1] + ellipsis + string[end2:]) + + if isinstance(string, bytes): + return b"\n".join(result) + else: + return "\n".join(result) + + +def enumarg(enum, arg): + """Return enum member from its name or value. + + >>> enumarg(TIFF.PHOTOMETRIC, 2) + + >>> enumarg(TIFF.PHOTOMETRIC, 'RGB') + + + """ + try: + return enum(arg) + except Exception: + try: + return enum[arg.upper()] + except Exception: + raise ValueError("invalid argument %s" % arg) + + +def parse_kwargs(kwargs, *keys, **keyvalues): + """Return dict with keys from keys|keyvals and values from kwargs|keyvals. + + Existing keys are deleted from kwargs. + + >>> kwargs = {'one': 1, 'two': 2, 'four': 4} + >>> kwargs2 = parse_kwargs(kwargs, 'two', 'three', four=None, five=5) + >>> kwargs == {'one': 1} + True + >>> kwargs2 == {'two': 2, 'four': 4, 'five': 5} + True + + """ + result = {} + for key in keys: + if key in kwargs: + result[key] = kwargs[key] + del kwargs[key] + for key, value in keyvalues.items(): + if key in kwargs: + result[key] = kwargs[key] + del kwargs[key] + else: + result[key] = value + return result + + +def update_kwargs(kwargs, **keyvalues): + """Update dict with keys and values if keys do not already exist. + + >>> kwargs = {'one': 1, } + >>> update_kwargs(kwargs, one=None, two=2) + >>> kwargs == {'one': 1, 'two': 2} + True + + """ + for key, value in keyvalues.items(): + if key not in kwargs: + kwargs[key] = value + + +def validate_jhove(filename, jhove="jhove", ignore=("More than 50 IFDs",)): + """Validate TIFF file using jhove -m TIFF-hul. + + Raise ValueError if jhove outputs an error message unless the message + contains one of the strings in 'ignore'. + + JHOVE does not support bigtiff or more than 50 IFDs. + + See `JHOVE TIFF-hul Module `_ + + """ + import subprocess # noqa: delayed import + + out = subprocess.check_output([jhove, filename, "-m", "TIFF-hul"]) + if b"ErrorMessage: " in out: + for line in out.splitlines(): + line = line.strip() + if line.startswith(b"ErrorMessage: "): + error = line[14:].decode("utf8") + for i in ignore: + if i in error: + break + else: + raise ValueError(error) + break + + +def lsm2bin(lsmfile, binfile=None, tile=(256, 256), verbose=True): + """Convert [MP]TZCYX LSM file to series of BIN files. + + One BIN file containing 'ZCYX' data are created for each position, time, + and tile. The position, time, and tile indices are encoded at the end + of the filenames. + + """ + verbose = print_ if verbose else nullfunc + + if binfile is None: + binfile = lsmfile + elif binfile.lower() == "none": + binfile = None + if binfile: + binfile += "_(z%ic%iy%ix%i)_m%%ip%%it%%03iy%%ix%%i.bin" + + verbose("\nOpening LSM file... ", end="", flush=True) + start_time = time.time() + + with TiffFile(lsmfile) as lsm: + if not lsm.is_lsm: + verbose("\n", lsm, flush=True) + raise ValueError("not a LSM file") + series = lsm.series[0] # first series contains the image data + shape = series.shape + axes = series.axes + dtype = series.dtype + size = product(shape) * dtype.itemsize + + verbose("%.3f s" % (time.time() - start_time)) + # verbose(lsm, flush=True) + verbose( + "Image\n axes: %s\n shape: %s\n dtype: %s\n size: %s" + % (axes, shape, dtype, format_size(size)), + flush=True, + ) + if not series.axes.endswith("TZCYX"): + raise ValueError("not a *TZCYX LSM file") + + verbose("Copying image from LSM to BIN files", end="", flush=True) + start_time = time.time() + tiles = shape[-2] // tile[-2], shape[-1] // tile[-1] + if binfile: + binfile = binfile % (shape[-4], shape[-3], tile[0], tile[1]) + shape = (1,) * (7 - len(shape)) + shape + # cache for ZCYX stacks and output files + data = numpy.empty(shape[3:], dtype=dtype) + out = numpy.empty((shape[-4], shape[-3], tile[0], tile[1]), dtype=dtype) + # iterate over Tiff pages containing data + pages = iter(series.pages) + for m in range(shape[0]): # mosaic axis + for p in range(shape[1]): # position axis + for t in range(shape[2]): # time axis + for z in range(shape[3]): # z slices + data[z] = next(pages).asarray() + for y in range(tiles[0]): # tile y + for x in range(tiles[1]): # tile x + out[:] = data[ + ..., + y * tile[0] : (y + 1) * tile[0], + x * tile[1] : (x + 1) * tile[1], + ] + if binfile: + out.tofile(binfile % (m, p, t, y, x)) + verbose(".", end="", flush=True) + verbose(" %.3f s" % (time.time() - start_time)) + + +def imshow( + data, + title=None, + vmin=0, + vmax=None, + cmap=None, + bitspersample=None, + photometric="RGB", + interpolation=None, + dpi=96, + figure=None, + subplot=111, + maxdim=32768, + **kwargs, +): + """Plot n-dimensional images using matplotlib.pyplot. + + Return figure, subplot and plot axis. + Requires pyplot already imported C{from matplotlib import pyplot}. + + Parameters + ---------- + bitspersample : int or None + Number of bits per channel in integer RGB images. + photometric : {'MINISWHITE', 'MINISBLACK', 'RGB', or 'PALETTE'} + The color space of the image data. + title : str + Window and subplot title. + figure : matplotlib.figure.Figure (optional). + Matplotlib to use for plotting. + subplot : int + A matplotlib.pyplot.subplot axis. + maxdim : int + maximum image width and length. + kwargs : optional + Arguments for matplotlib.pyplot.imshow. + + """ + isrgb = photometric in ("RGB",) # 'PALETTE', 'YCBCR' + if data.dtype.kind == "b": + isrgb = False + if isrgb and not ( + data.shape[-1] in (3, 4) or (data.ndim > 2 and data.shape[-3] in (3, 4)) + ): + isrgb = False + photometric = "MINISBLACK" + + data = data.squeeze() + if photometric in ("MINISWHITE", "MINISBLACK", None): + data = reshape_nd(data, 2) + else: + data = reshape_nd(data, 3) + + dims = data.ndim + if dims < 2: + raise ValueError("not an image") + elif dims == 2: + dims = 0 + isrgb = False + else: + if isrgb and data.shape[-3] in (3, 4): + data = numpy.swapaxes(data, -3, -2) + data = numpy.swapaxes(data, -2, -1) + elif not isrgb and ( + data.shape[-1] < data.shape[-2] // 8 + and data.shape[-1] < data.shape[-3] // 8 + and data.shape[-1] < 5 + ): + data = numpy.swapaxes(data, -3, -1) + data = numpy.swapaxes(data, -2, -1) + isrgb = isrgb and data.shape[-1] in (3, 4) + dims -= 3 if isrgb else 2 + + if isrgb: + data = data[..., :maxdim, :maxdim, :maxdim] + else: + data = data[..., :maxdim, :maxdim] + + if photometric == "PALETTE" and isrgb: + datamax = data.max() + if datamax > 255: + data = data >> 8 # possible precision loss + data = data.astype("B") + elif data.dtype.kind in "ui": + if not (isrgb and data.dtype.itemsize <= 1) or bitspersample is None: + try: + bitspersample = int(math.ceil(math.log(data.max(), 2))) + except Exception: + bitspersample = data.dtype.itemsize * 8 + elif not isinstance(bitspersample, inttypes): + # bitspersample can be tuple, e.g. (5, 6, 5) + bitspersample = data.dtype.itemsize * 8 + datamax = 2**bitspersample + if isrgb: + if bitspersample < 8: + data = data << (8 - bitspersample) + elif bitspersample > 8: + data = data >> (bitspersample - 8) # precision loss + data = data.astype("B") + elif data.dtype.kind == "f": + datamax = data.max() + if isrgb and datamax > 1.0: + if data.dtype.char == "d": + data = data.astype("f") + data /= datamax + else: + data = data / datamax + elif data.dtype.kind == "b": + datamax = 1 + elif data.dtype.kind == "c": + data = numpy.absolute(data) + datamax = data.max() + + if not isrgb: + if vmax is None: + vmax = datamax + if vmin is None: + if data.dtype.kind == "i": + dtmin = numpy.iinfo(data.dtype).min + vmin = numpy.min(data) + if vmin == dtmin: + vmin = numpy.min(data > dtmin) + if data.dtype.kind == "f": + dtmin = numpy.finfo(data.dtype).min + vmin = numpy.min(data) + if vmin == dtmin: + vmin = numpy.min(data > dtmin) + else: + vmin = 0 + + pyplot = sys.modules["matplotlib.pyplot"] + + if figure is None: + pyplot.rc("font", family="sans-serif", weight="normal", size=8) + figure = pyplot.figure( + dpi=dpi, figsize=(10.3, 6.3), frameon=True, facecolor="1.0", edgecolor="w" + ) + try: + figure.canvas.manager.window.title(title) + except Exception: + pass + size = len(title.splitlines()) if title else 1 + pyplot.subplots_adjust( + bottom=0.03 * (dims + 2), + top=0.98 - size * 0.03, + left=0.1, + right=0.95, + hspace=0.05, + wspace=0.0, + ) + subplot = pyplot.subplot(subplot) + + if title: + try: + title = unicode(title, "Windows-1252") + except TypeError: + pass + pyplot.title(title, size=11) + + if cmap is None: + if data.dtype.char == "?": + cmap = "gray" + elif data.dtype.kind in "buf" or vmin == 0: + cmap = "viridis" + else: + cmap = "coolwarm" + if photometric == "MINISWHITE": + cmap += "_r" + + image = pyplot.imshow( + numpy.atleast_2d(data[(0,) * dims].squeeze()), + vmin=vmin, + vmax=vmax, + cmap=cmap, + interpolation=interpolation, + **kwargs, + ) + + if not isrgb: + pyplot.colorbar() # panchor=(0.55, 0.5), fraction=0.05 + + def format_coord(x, y): + # callback function to format coordinate display in toolbar + x = int(x + 0.5) + y = int(y + 0.5) + try: + if dims: + return "%s @ %s [%4i, %4i]" % (curaxdat[1][y, x], current, y, x) + return "%s @ [%4i, %4i]" % (data[y, x], y, x) + except IndexError: + return "" + + def none(event): + return "" + + subplot.format_coord = format_coord + image.get_cursor_data = none + image.format_cursor_data = none + + if dims: + current = list((0,) * dims) + curaxdat = [0, data[tuple(current)].squeeze()] + sliders = [ + pyplot.Slider( + pyplot.axes([0.125, 0.03 * (axis + 1), 0.725, 0.025]), + "Dimension %i" % axis, + 0, + data.shape[axis] - 1, + 0, + facecolor="0.5", + valfmt="%%.0f [%i]" % data.shape[axis], + ) + for axis in range(dims) + ] + for slider in sliders: + slider.drawon = False + + def set_image(current, sliders=sliders, data=data): + # change image and redraw canvas + curaxdat[1] = data[tuple(current)].squeeze() + image.set_data(curaxdat[1]) + for ctrl, index in zip(sliders, current): + ctrl.eventson = False + ctrl.set_val(index) + ctrl.eventson = True + figure.canvas.draw() + + def on_changed(index, axis, data=data, current=current): + # callback function for slider change event + index = int(round(index)) + curaxdat[0] = axis + if index == current[axis]: + return + if index >= data.shape[axis]: + index = 0 + elif index < 0: + index = data.shape[axis] - 1 + current[axis] = index + set_image(current) + + def on_keypressed(event, data=data, current=current): + # callback function for key press event + key = event.key + axis = curaxdat[0] + if str(key) in "0123456789": + on_changed(key, axis) + elif key == "right": + on_changed(current[axis] + 1, axis) + elif key == "left": + on_changed(current[axis] - 1, axis) + elif key == "up": + curaxdat[0] = 0 if axis == len(data.shape) - 1 else axis + 1 + elif key == "down": + curaxdat[0] = len(data.shape) - 1 if axis == 0 else axis - 1 + elif key == "end": + on_changed(data.shape[axis] - 1, axis) + elif key == "home": + on_changed(0, axis) + + figure.canvas.mpl_connect("key_press_event", on_keypressed) + for axis, ctrl in enumerate(sliders): + ctrl.on_changed(lambda k, a=axis: on_changed(k, a)) + + return figure, subplot, image + + +def _app_show(): + """Block the GUI. For use as skimage plugin.""" + pyplot = sys.modules["matplotlib.pyplot"] + pyplot.show() + + +def askopenfilename(**kwargs): + """Return file name(s) from Tkinter's file open dialog.""" + try: + from Tkinter import Tk + import tkFileDialog as filedialog + except ImportError: + from tkinter import Tk, filedialog + root = Tk() + root.withdraw() + root.update() + filenames = filedialog.askopenfilename(**kwargs) + root.destroy() + return filenames + + +def main(argv=None): + """Command line usage main function.""" + if float(sys.version[0:3]) < 2.7: + print("This script requires Python version 2.7 or better.") + print("This is Python version %s" % sys.version) + return 0 + if argv is None: + argv = sys.argv + + import optparse # TODO: use argparse + + parser = optparse.OptionParser( + usage="usage: %prog [options] path", + description="Display image data in TIFF files.", + version="%%prog %s" % __version__, + ) + opt = parser.add_option + opt("-p", "--page", dest="page", type="int", default=-1, help="display single page") + opt( + "-s", + "--series", + dest="series", + type="int", + default=-1, + help="display series of pages of same shape", + ) + opt( + "--nomultifile", + dest="nomultifile", + action="store_true", + default=False, + help="do not read OME series from multiple files", + ) + opt( + "--noplots", + dest="noplots", + type="int", + default=8, + help="maximum number of plots", + ) + opt( + "--interpol", + dest="interpol", + metavar="INTERPOL", + default="bilinear", + help="image interpolation method", + ) + opt("--dpi", dest="dpi", type="int", default=96, help="plot resolution") + opt( + "--vmin", + dest="vmin", + type="int", + default=None, + help="minimum value for colormapping", + ) + opt( + "--vmax", + dest="vmax", + type="int", + default=None, + help="maximum value for colormapping", + ) + opt( + "--debug", + dest="debug", + action="store_true", + default=False, + help="raise exception on failures", + ) + opt( + "--doctest", + dest="doctest", + action="store_true", + default=False, + help="runs the docstring examples", + ) + opt("-v", "--detail", dest="detail", type="int", default=2) + opt("-q", "--quiet", dest="quiet", action="store_true") + + settings, path = parser.parse_args() + path = " ".join(path) + + if settings.doctest: + import doctest + + doctest.testmod(optionflags=doctest.ELLIPSIS) + return 0 + if not path: + path = askopenfilename( + title="Select a TIFF file", filetypes=TIFF.FILEOPEN_FILTER + ) + if not path: + parser.error("No file specified") + + if any(i in path for i in "?*"): + path = glob.glob(path) + if not path: + print("no files match the pattern") + return 0 + # TODO: handle image sequences + path = path[0] + + if not settings.quiet: + print("\nReading file structure...", end=" ") + start = time.time() + try: + tif = TiffFile(path, multifile=not settings.nomultifile) + except Exception as e: + if settings.debug: + raise + else: + print("\n", e) + sys.exit(0) + if not settings.quiet: + print("%.3f ms" % ((time.time() - start) * 1e3)) + + if tif.is_ome: + settings.norgb = True + + images = [] + if settings.noplots > 0: + if not settings.quiet: + print("Reading image data... ", end=" ") + + def notnone(x): + return next(i for i in x if i is not None) + + start = time.time() + try: + if settings.page >= 0: + images = [(tif.asarray(key=settings.page), tif[settings.page], None)] + elif settings.series >= 0: + images = [ + ( + tif.asarray(series=settings.series), + notnone(tif.series[settings.series]._pages), + tif.series[settings.series], + ) + ] + else: + images = [] + for i, s in enumerate(tif.series[: settings.noplots]): + try: + images.append( + (tif.asarray(series=i), notnone(s._pages), tif.series[i]) + ) + except ValueError as e: + images.append((None, notnone(s.pages), None)) + if settings.debug: + raise + else: + print("\nSeries %i failed: %s... " % (i, e), end="") + if not settings.quiet: + print("%.3f ms" % ((time.time() - start) * 1e3)) + except Exception as e: + if settings.debug: + raise + else: + print(e) + + if not settings.quiet: + print() + print(TiffFile.__str__(tif, detail=int(settings.detail))) + print() + tif.close() + + if images and settings.noplots > 0: + try: + import matplotlib + + matplotlib.use("TkAgg") + from matplotlib import pyplot + except ImportError as e: + warnings.warn("failed to import matplotlib.\n%s" % e) + else: + for img, page, series in images: + if img is None: + continue + vmin, vmax = settings.vmin, settings.vmax + if "GDAL_NODATA" in page.tags: + try: + vmin = numpy.min( + img[img > float(page.tags["GDAL_NODATA"].value)] + ) + except ValueError: + pass + if tif.is_stk: + try: + vmin = tif.stk_metadata["MinScale"] + vmax = tif.stk_metadata["MaxScale"] + except KeyError: + pass + else: + if vmax <= vmin: + vmin, vmax = settings.vmin, settings.vmax + if series: + title = "%s\n%s\n%s" % (str(tif), str(page), str(series)) + else: + title = "%s\n %s" % (str(tif), str(page)) + photometric = "MINISBLACK" + if page.photometric not in (3,): + photometric = TIFF.PHOTOMETRIC(page.photometric).name + imshow( + img, + title=title, + vmin=vmin, + vmax=vmax, + bitspersample=page.bitspersample, + photometric=photometric, + interpolation=settings.interpol, + dpi=settings.dpi, + ) + pyplot.show() + + +if sys.version_info[0] == 2: + inttypes = int, long # noqa + + def print_(*args, **kwargs): + """Print function with flush support.""" + flush = kwargs.pop("flush", False) + print(*args, **kwargs) + if flush: + sys.stdout.flush() + + def bytes2str(b, encoding=None, errors=None): + """Return string from bytes.""" + return b + + def str2bytes(s, encoding=None): + """Return bytes from string.""" + return s + + def byte2int(b): + """Return value of byte as int.""" + return ord(b) + + class FileNotFoundError(IOError): + pass + + TiffFrame = TiffPage # noqa +else: + inttypes = int + basestring = str, bytes + unicode = str + print_ = print + + def bytes2str(b, encoding=None, errors="strict"): + """Return unicode string from encoded bytes.""" + if encoding is not None: + return b.decode(encoding, errors) + try: + return b.decode("utf-8", errors) + except UnicodeDecodeError: + return b.decode("cp1252", errors) + + def str2bytes(s, encoding="cp1252"): + """Return bytes from unicode string.""" + return s.encode(encoding) + + def byte2int(b): + """Return value of byte as int.""" + return b + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/bsdf.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/bsdf.py new file mode 100644 index 0000000000000000000000000000000000000000..609e12aae4bab9a77b6d1c1ddb5743a1a99dea2f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/bsdf.py @@ -0,0 +1,324 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +"""Read/Write BSDF files. + +Backend Library: internal + +The BSDF format enables reading and writing of image data in the +BSDF serialization format. This format allows storage of images, volumes, +and series thereof. Data can be of any numeric data type, and can +optionally be compressed. Each image/volume can have associated +meta data, which can consist of any data type supported by BSDF. + +By default, image data is lazily loaded; the actual image data is +not read until it is requested. This allows storing multiple images +in a single file and still have fast access to individual images. +Alternatively, a series of images can be read in streaming mode, reading +images as they are read (e.g. from http). + +BSDF is a simple generic binary format. It is easy to extend and there +are standard extension definitions for 2D and 3D image data. +Read more at http://bsdf.io. + + +Parameters +---------- +random_access : bool + Whether individual images in the file can be read in random order. + Defaults to True for normal files, and to False when reading from HTTP. + If False, the file is read in "streaming mode", allowing reading + files as they are read, but without support for "rewinding". + Note that setting this to True when reading from HTTP, the whole file + is read upon opening it (since lazy loading is not possible over HTTP). + +compression : int + Use ``0`` or "no" for no compression, ``1`` or "zlib" for Zlib + compression (same as zip files and PNG), and ``2`` or "bz2" for Bz2 + compression (more compact but slower). Default 1 (zlib). + Note that some BSDF implementations may not support compression + (e.g. JavaScript). + +""" + +import numpy as np + +from ..core import Format + + +def get_bsdf_serializer(options): + from . import _bsdf as bsdf + + class NDArrayExtension(bsdf.Extension): + """Copy of BSDF's NDArrayExtension but deal with lazy blobs.""" + + name = "ndarray" + cls = np.ndarray + + def encode(self, s, v): + return dict(shape=v.shape, dtype=str(v.dtype), data=v.tobytes()) + + def decode(self, s, v): + return v # return as dict, because of lazy blobs, decode in Image + + class ImageExtension(bsdf.Extension): + """We implement two extensions that trigger on the Image classes.""" + + def encode(self, s, v): + return dict(array=v.array, meta=v.meta) + + def decode(self, s, v): + return Image(v["array"], v["meta"]) + + class Image2DExtension(ImageExtension): + name = "image2d" + cls = Image2D + + class Image3DExtension(ImageExtension): + name = "image3d" + cls = Image3D + + exts = [NDArrayExtension, Image2DExtension, Image3DExtension] + serializer = bsdf.BsdfSerializer(exts, **options) + + return bsdf, serializer + + +class Image: + """Class in which we wrap the array and meta data. By using an extension + we can make BSDF trigger on these classes and thus encode the images. + as actual images. + """ + + def __init__(self, array, meta): + self.array = array + self.meta = meta + + def get_array(self): + if not isinstance(self.array, np.ndarray): + v = self.array + blob = v["data"] + if not isinstance(blob, bytes): # then it's a lazy bsdf.Blob + blob = blob.get_bytes() + self.array = np.frombuffer(blob, dtype=v["dtype"]) + self.array.shape = v["shape"] + return self.array + + def get_meta(self): + return self.meta + + +class Image2D(Image): + pass + + +class Image3D(Image): + pass + + +class BsdfFormat(Format): + """The BSDF format enables reading and writing of image data in the + BSDF serialization format. This format allows storage of images, volumes, + and series thereof. Data can be of any numeric data type, and can + optionally be compressed. Each image/volume can have associated + meta data, which can consist of any data type supported by BSDF. + + By default, image data is lazily loaded; the actual image data is + not read until it is requested. This allows storing multiple images + in a single file and still have fast access to individual images. + Alternatively, a series of images can be read in streaming mode, reading + images as they are read (e.g. from http). + + BSDF is a simple generic binary format. It is easy to extend and there + are standard extension definitions for 2D and 3D image data. + Read more at http://bsdf.io. + + Parameters for reading + ---------------------- + random_access : bool + Whether individual images in the file can be read in random order. + Defaults to True for normal files, and to False when reading from HTTP. + If False, the file is read in "streaming mode", allowing reading + files as they are read, but without support for "rewinding". + Note that setting this to True when reading from HTTP, the whole file + is read upon opening it (since lazy loading is not possible over HTTP). + + Parameters for saving + --------------------- + compression : {0, 1, 2} + Use ``0`` or "no" for no compression, ``1`` or "zlib" for Zlib + compression (same as zip files and PNG), and ``2`` or "bz2" for Bz2 + compression (more compact but slower). Default 1 (zlib). + Note that some BSDF implementations may not support compression + (e.g. JavaScript). + + """ + + def _can_read(self, request): + if request.mode[1] in (self.modes + "?"): + # if request.extension in self.extensions: + # return True + if request.firstbytes.startswith(b"BSDF"): + return True + + def _can_write(self, request): + if request.mode[1] in (self.modes + "?"): + if request.extension in self.extensions: + return True + + # -- reader + + class Reader(Format.Reader): + def _open(self, random_access=None): + # Validate - we need a BSDF file consisting of a list of images + # The list is typically a stream, but does not have to be. + assert self.request.firstbytes[:4] == b"BSDF", "Not a BSDF file" + # self.request.firstbytes[5:6] == major and minor version + if not ( + self.request.firstbytes[6:15] == b"M\x07image2D" + or self.request.firstbytes[6:15] == b"M\x07image3D" + or self.request.firstbytes[6:7] == b"l" + ): + pass # Actually, follow a more duck-type approach ... + # raise RuntimeError('BSDF file does not look like an ' + # 'image container.') + # Set options. If we think that seeking is allowed, we lazily load + # blobs, and set streaming to False (i.e. the whole file is read, + # but we skip over binary blobs), so that we subsequently allow + # random access to the images. + # If seeking is not allowed (e.g. with a http request), we cannot + # lazily load blobs, but we can still load streaming from the web. + options = {} + if self.request.filename.startswith(("http://", "https://")): + ra = False if random_access is None else bool(random_access) + options["lazy_blob"] = False # Because we cannot seek now + options["load_streaming"] = not ra # Load as a stream? + else: + ra = True if random_access is None else bool(random_access) + options["lazy_blob"] = ra # Don't read data until needed + options["load_streaming"] = not ra + + file = self.request.get_file() + bsdf, self._serializer = get_bsdf_serializer(options) + self._stream = self._serializer.load(file) + # Another validation + if ( + isinstance(self._stream, dict) + and "meta" in self._stream + and "array" in self._stream + ): + self._stream = Image(self._stream["array"], self._stream["meta"]) + if not isinstance(self._stream, (Image, list, bsdf.ListStream)): + raise RuntimeError( + "BSDF file does not look seem to have an " "image container." + ) + + def _close(self): + pass + + def _get_length(self): + if isinstance(self._stream, Image): + return 1 + elif isinstance(self._stream, list): + return len(self._stream) + elif self._stream.count < 0: + return np.inf + return self._stream.count + + def _get_data(self, index): + # Validate + if index < 0 or index >= self.get_length(): + raise IndexError( + "Image index %i not in [0 %i]." % (index, self.get_length()) + ) + # Get Image object + if isinstance(self._stream, Image): + image_ob = self._stream # singleton + elif isinstance(self._stream, list): + # Easy when we have random access + image_ob = self._stream[index] + else: + # For streaming, we need to skip over frames + if index < self._stream.index: + raise IndexError( + "BSDF file is being read in streaming " + "mode, thus does not allow rewinding." + ) + while index > self._stream.index: + self._stream.next() + image_ob = self._stream.next() # Can raise StopIteration + # Is this an image? + if ( + isinstance(image_ob, dict) + and "meta" in image_ob + and "array" in image_ob + ): + image_ob = Image(image_ob["array"], image_ob["meta"]) + if isinstance(image_ob, Image): + # Return as array (if we have lazy blobs, they are read now) + return image_ob.get_array(), image_ob.get_meta() + else: + r = repr(image_ob) + r = r if len(r) < 200 else r[:197] + "..." + raise RuntimeError("BSDF file contains non-image " + r) + + def _get_meta_data(self, index): # pragma: no cover + return {} # This format does not support global meta data + + # -- writer + + class Writer(Format.Writer): + def _open(self, compression=1): + options = {"compression": compression} + bsdf, self._serializer = get_bsdf_serializer(options) + if self.request.mode[1] in "iv": + self._stream = None # Singleton image + self._written = False + else: + # Series (stream) of images + file = self.request.get_file() + self._stream = bsdf.ListStream() + self._serializer.save(file, self._stream) + + def _close(self): + # We close the stream here, which will mark the number of written + # elements. If we would not close it, the file would be fine, it's + # just that upon reading it would not be known how many items are + # in there. + if self._stream is not None: + self._stream.close(False) # False says "keep this a stream" + + def _append_data(self, im, meta): + # Determine dimension + ndim = None + if self.request.mode[1] in "iI": + ndim = 2 + elif self.request.mode[1] in "vV": + ndim = 3 + else: + ndim = 3 # Make an educated guess + if im.ndim == 2 or (im.ndim == 3 and im.shape[-1] <= 4): + ndim = 2 + # Validate shape + assert ndim in (2, 3) + if ndim == 2: + assert im.ndim == 2 or (im.ndim == 3 and im.shape[-1] <= 4) + else: + assert im.ndim == 3 or (im.ndim == 4 and im.shape[-1] <= 4) + # Wrap data and meta data in our special class that will trigger + # the BSDF image2D or image3D extension. + if ndim == 2: + ob = Image2D(im, meta) + else: + ob = Image3D(im, meta) + # Write directly or to stream + if self._stream is None: + assert not self._written, "Cannot write singleton image twice" + self._written = True + file = self.request.get_file() + self._serializer.save(file, ob) + else: + self._stream.append(ob) + + def set_meta_data(self, meta): # pragma: no cover + raise RuntimeError("The BSDF format only supports " "per-image meta data.") diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/dicom.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/dicom.py new file mode 100644 index 0000000000000000000000000000000000000000..c5f366449c9a707ad792cbe61c3e46f343e665ff --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/dicom.py @@ -0,0 +1,333 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +"""Read DICOM files. + +Backend Library: internal + +A format for reading DICOM images: a common format used to store +medical image data, such as X-ray, CT and MRI. + +This format borrows some code (and ideas) from the pydicom project. However, +only a predefined subset of tags are extracted from the file. This allows +for great simplifications allowing us to make a stand-alone reader, and +also results in a much faster read time. + +By default, only uncompressed and deflated transfer syntaxes are supported. +If gdcm or dcmtk is installed, these will be used to automatically convert +the data. See https://github.com/malaterre/GDCM/releases for installing GDCM. + +This format provides functionality to group images of the same +series together, thus extracting volumes (and multiple volumes). +Using volread will attempt to yield a volume. If multiple volumes +are present, the first one is given. Using mimread will simply yield +all images in the given directory (not taking series into account). + +Parameters +---------- +progress : {True, False, BaseProgressIndicator} + Whether to show progress when reading from multiple files. + Default True. By passing an object that inherits from + BaseProgressIndicator, the way in which progress is reported + can be costumized. + +""" + +# todo: Use pydicom: +# * Note: is not py3k ready yet +# * Allow reading the full meta info +# I think we can more or less replace the SimpleDicomReader with a +# pydicom.Dataset For series, only ned to read the full info from one +# file: speed still high +# * Perhaps allow writing? + +import os +import sys +import logging +import subprocess + +from ..core import Format, BaseProgressIndicator, StdoutProgressIndicator +from ..core import read_n_bytes + +_dicom = None # lazily loaded in load_lib() + +logger = logging.getLogger(__name__) + + +def load_lib(): + global _dicom + from . import _dicom + + return _dicom + + +# Determine endianity of system +sys_is_little_endian = sys.byteorder == "little" + + +def get_dcmdjpeg_exe(): + fname = "dcmdjpeg" + ".exe" * sys.platform.startswith("win") + for dir in ( + "c:\\dcmtk", + "c:\\Program Files", + "c:\\Program Files\\dcmtk", + "c:\\Program Files (x86)\\dcmtk", + ): + filename = os.path.join(dir, fname) + if os.path.isfile(filename): + return [filename] + + try: + subprocess.check_call([fname, "--version"]) + return [fname] + except Exception: + return None + + +def get_gdcmconv_exe(): + fname = "gdcmconv" + ".exe" * sys.platform.startswith("win") + # Maybe it's on the path + try: + subprocess.check_call([fname, "--version"]) + return [fname, "--raw"] + except Exception: + pass + # Select directories where it could be + candidates = [] + base_dirs = [r"c:\Program Files"] + for base_dir in base_dirs: + if os.path.isdir(base_dir): + for dname in os.listdir(base_dir): + if dname.lower().startswith("gdcm"): + suffix = dname[4:].strip() + candidates.append((suffix, os.path.join(base_dir, dname))) + # Sort, so higher versions are tried earlier + candidates.sort(reverse=True) + # Select executable + filename = None + for _, dirname in candidates: + exe1 = os.path.join(dirname, "gdcmconv.exe") + exe2 = os.path.join(dirname, "bin", "gdcmconv.exe") + if os.path.isfile(exe1): + filename = exe1 + break + if os.path.isfile(exe2): + filename = exe2 + break + else: + return None + return [filename, "--raw"] + + +class DicomFormat(Format): + """See :mod:`imageio.plugins.dicom`""" + + def _can_read(self, request): + # If user URI was a directory, we check whether it has a DICOM file + if os.path.isdir(request.filename): + files = os.listdir(request.filename) + for fname in sorted(files): # Sorting make it consistent + filename = os.path.join(request.filename, fname) + if os.path.isfile(filename) and "DICOMDIR" not in fname: + with open(filename, "rb") as f: + first_bytes = read_n_bytes(f, 140) + return first_bytes[128:132] == b"DICM" + else: + return False + # Check + return request.firstbytes[128:132] == b"DICM" + + def _can_write(self, request): + # We cannot save yet. May be possible if we will used pydicom as + # a backend. + return False + + # -- + + class Reader(Format.Reader): + _compressed_warning_dirs = set() + + def _open(self, progress=True): + if not _dicom: + load_lib() + if os.path.isdir(self.request.filename): + # A dir can be given if the user used the format explicitly + self._info = {} + self._data = None + else: + # Read the given dataset now ... + try: + dcm = _dicom.SimpleDicomReader(self.request.get_file()) + except _dicom.CompressedDicom as err: + # We cannot do this on our own. Perhaps with some help ... + cmd = get_gdcmconv_exe() + if not cmd and "JPEG" in str(err): + cmd = get_dcmdjpeg_exe() + if not cmd: + msg = err.args[0].replace("using", "installing") + msg = msg.replace("convert", "auto-convert") + err.args = (msg,) + raise + else: + fname1 = self.request.get_local_filename() + fname2 = fname1 + ".raw" + try: + subprocess.check_call(cmd + [fname1, fname2]) + except Exception: + raise err + d = os.path.dirname(fname1) + if d not in self._compressed_warning_dirs: + self._compressed_warning_dirs.add(d) + logger.warning( + "DICOM file contained compressed data. " + + "Autoconverting with " + + cmd[0] + + " (this warning is shown once for each directory)" + ) + dcm = _dicom.SimpleDicomReader(fname2) + + self._info = dcm._info + self._data = dcm.get_numpy_array() + + # Initialize series, list of DicomSeries objects + self._series = None # only created if needed + + # Set progress indicator + if isinstance(progress, BaseProgressIndicator): + self._progressIndicator = progress + elif progress is True: + p = StdoutProgressIndicator("Reading DICOM") + self._progressIndicator = p + elif progress in (None, False): + self._progressIndicator = BaseProgressIndicator("Dummy") + else: + raise ValueError("Invalid value for progress.") + + def _close(self): + # Clean up + self._info = None + self._data = None + self._series = None + + @property + def series(self): + if self._series is None: + pi = self._progressIndicator + self._series = _dicom.process_directory(self.request, pi) + return self._series + + def _get_length(self): + if self._data is None: + dcm = self.series[0][0] + self._info = dcm._info + self._data = dcm.get_numpy_array() + + nslices = self._data.shape[0] if (self._data.ndim == 3) else 1 + + if self.request.mode[1] == "i": + # User expects one, but lets be honest about this file + return nslices + elif self.request.mode[1] == "I": + # User expects multiple, if this file has multiple slices, ok. + # Otherwise we have to check the series. + if nslices > 1: + return nslices + else: + return sum([len(serie) for serie in self.series]) + elif self.request.mode[1] == "v": + # User expects a volume, if this file has one, ok. + # Otherwise we have to check the series + if nslices > 1: + return 1 + else: + return len(self.series) # We assume one volume per series + elif self.request.mode[1] == "V": + # User expects multiple volumes. We have to check the series + return len(self.series) # We assume one volume per series + else: + raise RuntimeError("DICOM plugin should know what to expect.") + + def _get_slice_data(self, index): + nslices = self._data.shape[0] if (self._data.ndim == 3) else 1 + + # Allow index >1 only if this file contains >1 + if nslices > 1: + return self._data[index], self._info + elif index == 0: + return self._data, self._info + else: + raise IndexError("Dicom file contains only one slice.") + + def _get_data(self, index): + if self._data is None: + dcm = self.series[0][0] + self._info = dcm._info + self._data = dcm.get_numpy_array() + + nslices = self._data.shape[0] if (self._data.ndim == 3) else 1 + + if self.request.mode[1] == "i": + return self._get_slice_data(index) + elif self.request.mode[1] == "I": + # Return slice from volume, or return item from series + if index == 0 and nslices > 1: + return self._data[index], self._info + else: + L = [] + for serie in self.series: + L.extend([dcm_ for dcm_ in serie]) + return L[index].get_numpy_array(), L[index].info + elif self.request.mode[1] in "vV": + # Return volume or series + if index == 0 and nslices > 1: + return self._data, self._info + else: + return ( + self.series[index].get_numpy_array(), + self.series[index].info, + ) + # mode is `?` (typically because we are using V3). If there is a + # series (multiple files), index referrs to the element of the + # series and we read volumes. If there is no series, index + # referrs to the slice in the volume we read "flat" images. + elif len(self.series) > 1: + # mode is `?` and there are multiple series. Each series is a ndimage. + return ( + self.series[index].get_numpy_array(), + self.series[index].info, + ) + else: + # mode is `?` and there is only one series. Each slice is an ndimage. + return self._get_slice_data(index) + + def _get_meta_data(self, index): + if self._data is None: + dcm = self.series[0][0] + self._info = dcm._info + self._data = dcm.get_numpy_array() + + nslices = self._data.shape[0] if (self._data.ndim == 3) else 1 + + # Default is the meta data of the given file, or the "first" file. + if index is None: + return self._info + + if self.request.mode[1] == "i": + return self._info + elif self.request.mode[1] == "I": + # Return slice from volume, or return item from series + if index == 0 and nslices > 1: + return self._info + else: + L = [] + for serie in self.series: + L.extend([dcm_ for dcm_ in serie]) + return L[index].info + elif self.request.mode[1] in "vV": + # Return volume or series + if index == 0 and nslices > 1: + return self._info + else: + return self.series[index].info + else: # pragma: no cover + raise ValueError("DICOM plugin should know what to expect.") diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/example.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/example.py new file mode 100644 index 0000000000000000000000000000000000000000..a953bf1455a56f10df0fc1673645652277dfe184 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/example.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +"""Example plugin. You can use this as a template for your own plugin.""" + +import numpy as np + +from .. import formats +from ..core import Format + + +class DummyFormat(Format): + """The dummy format is an example format that does nothing. + It will never indicate that it can read or write a file. When + explicitly asked to read, it will simply read the bytes. When + explicitly asked to write, it will raise an error. + + This documentation is shown when the user does ``help('thisformat')``. + + Parameters for reading + ---------------------- + Specify arguments in numpy doc style here. + + Parameters for saving + --------------------- + Specify arguments in numpy doc style here. + + """ + + def _can_read(self, request): + # This method is called when the format manager is searching + # for a format to read a certain image. Return True if this format + # can do it. + # + # The format manager is aware of the extensions and the modes + # that each format can handle. It will first ask all formats + # that *seem* to be able to read it whether they can. If none + # can, it will ask the remaining formats if they can: the + # extension might be missing, and this allows formats to provide + # functionality for certain extensions, while giving preference + # to other plugins. + # + # If a format says it can, it should live up to it. The format + # would ideally check the request.firstbytes and look for a + # header of some kind. + # + # The request object has: + # request.filename: a representation of the source (only for reporting) + # request.firstbytes: the first 256 bytes of the file. + # request.mode[0]: read or write mode + + if request.extension in self.extensions: + return True + + def _can_write(self, request): + # This method is called when the format manager is searching + # for a format to write a certain image. It will first ask all + # formats that *seem* to be able to write it whether they can. + # If none can, it will ask the remaining formats if they can. + # + # Return True if the format can do it. + + # In most cases, this code does suffice: + if request.extension in self.extensions: + return True + + # -- reader + + class Reader(Format.Reader): + def _open(self, some_option=False, length=1): + # Specify kwargs here. Optionally, the user-specified kwargs + # can also be accessed via the request.kwargs object. + # + # The request object provides two ways to get access to the + # data. Use just one: + # - Use request.get_file() for a file object (preferred) + # - Use request.get_local_filename() for a file on the system + self._fp = self.request.get_file() + self._length = length # passed as an arg in this case for testing + self._data = None + + def _close(self): + # Close the reader. + # Note that the request object will close self._fp + pass + + def _get_length(self): + # Return the number of images. Can be np.inf + return self._length + + def _get_data(self, index): + # Return the data and meta data for the given index + if index >= self._length: + raise IndexError("Image index %i > %i" % (index, self._length)) + # Read all bytes + if self._data is None: + self._data = self._fp.read() + # Put in a numpy array + im = np.frombuffer(self._data, "uint8") + im.shape = len(im), 1 + # Return array and dummy meta data + return im, {} + + def _get_meta_data(self, index): + # Get the meta data for the given index. If index is None, it + # should return the global meta data. + return {} # This format does not support meta data + + # -- writer + + class Writer(Format.Writer): + def _open(self, flags=0): + # Specify kwargs here. Optionally, the user-specified kwargs + # can also be accessed via the request.kwargs object. + # + # The request object provides two ways to write the data. + # Use just one: + # - Use request.get_file() for a file object (preferred) + # - Use request.get_local_filename() for a file on the system + self._fp = self.request.get_file() + + def _close(self): + # Close the reader. + # Note that the request object will close self._fp + pass + + def _append_data(self, im, meta): + # Process the given data and meta data. + raise RuntimeError("The dummy format cannot write image data.") + + def set_meta_data(self, meta): + # Process the given meta data (global for all images) + # It is not mandatory to support this. + raise RuntimeError("The dummy format cannot write meta data.") + + +# Register. You register an *instance* of a Format class. Here specify: +format = DummyFormat( + "dummy", # short name + "An example format that does nothing.", # one line descr. + ".foobar .nonexistentext", # list of extensions + "iI", # modes, characters in iIvV +) +formats.add_format(format) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/feisem.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/feisem.py new file mode 100644 index 0000000000000000000000000000000000000000..af50768a3c5a904bd5fde6ea98feb942fba4dc10 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/feisem.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +"""Read TIFF from FEI SEM microscopes. + +Backend Library: internal + +This format is based on :mod:`TIFF `, and supports the +same parameters. FEI microscopes append metadata as ASCII text at the end of the +file, which this reader correctly extracts. + +Parameters +---------- +discard_watermark : bool + If True (default), discard the bottom rows of the image, which + contain no image data, only a watermark with metadata. +watermark_height : int + The height in pixels of the FEI watermark. The default is 70. + +See Also +-------- + :mod:`imageio.plugins.tifffile` + +""" + + +from .tifffile import TiffFormat + + +class FEISEMFormat(TiffFormat): + """See :mod:`imageio.plugins.feisem`""" + + def _can_write(self, request): + return False # FEI-SEM only supports reading + + class Reader(TiffFormat.Reader): + def _get_data(self, index=0, discard_watermark=True, watermark_height=70): + """Get image and metadata from given index. + + FEI images usually (always?) contain a watermark at the + bottom of the image, 70 pixels high. We discard this by + default as it does not contain any information not present + in the metadata. + """ + im, meta = super(FEISEMFormat.Reader, self)._get_data(index) + if discard_watermark: + im = im[:-watermark_height] + return im, meta + + def _get_meta_data(self, index=None): + """Read the metadata from an FEI SEM TIFF. + + This metadata is included as ASCII text at the end of the file. + + The index, if provided, is ignored. + + Returns + ------- + metadata : dict + Dictionary of metadata. + """ + if hasattr(self, "_fei_meta"): + return self._fei_meta + + md = {"root": {}} + current_tag = "root" + reading_metadata = False + filename = self.request.get_local_filename() + with open(filename, encoding="utf8", errors="ignore") as fin: + for line in fin: + if not reading_metadata: + if not line.startswith("Date="): + continue + else: + reading_metadata = True + line = line.rstrip() + if line.startswith("["): + current_tag = line.lstrip("[").rstrip("]") + md[current_tag] = {} + else: + if "=" in line: # ignore empty and irrelevant lines + key, val = line.split("=", maxsplit=1) + for tag_type in (int, float): + try: + val = tag_type(val) + except ValueError: + continue + else: + break + md[current_tag][key] = val + if not md["root"] and len(md) == 1: + raise ValueError("Input file %s contains no FEI metadata." % filename) + + self._fei_meta = md + return md diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/ffmpeg.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/ffmpeg.py new file mode 100644 index 0000000000000000000000000000000000000000..613583f51bd8e3d287ad8ee60beb74100db88630 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/ffmpeg.py @@ -0,0 +1,735 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +"""Read/Write video using FFMPEG + +.. note:: + We are in the process of (slowly) replacing this plugin with a new one that + is based on `pyav `_. It is faster and more + flexible than the plugin documented here. Check the :mod:`pyav + plugin's documentation ` for more information about + this plugin. + +Backend Library: https://github.com/imageio/imageio-ffmpeg + +.. note:: + To use this plugin you have to install its backend:: + + pip install imageio[ffmpeg] + + +The ffmpeg format provides reading and writing for a wide range of movie formats +such as .avi, .mpeg, .mp4, etc. as well as the ability to read streams from +webcams and USB cameras. It is based on ffmpeg and is inspired by/based `moviepy +`_ by Zulko. + +Parameters for reading +---------------------- +fps : scalar + The number of frames per second of the input stream. Default None (i.e. + read at the file's native fps). One can use this for files with a + variable fps, or in cases where imageio is unable to correctly detect + the fps. In case of trouble opening camera streams, it may help to set an + explicit fps value matching a framerate supported by the camera. +loop : bool + If True, the video will rewind as soon as a frame is requested + beyond the last frame. Otherwise, IndexError is raised. Default False. + Setting this to True will internally call ``count_frames()``, + and set the reader's length to that value instead of inf. +size : str | tuple + The frame size (i.e. resolution) to read the images, e.g. + (100, 100) or "640x480". For camera streams, this allows setting + the capture resolution. For normal video data, ffmpeg will + rescale the data. +dtype : str | type + The dtype for the output arrays. Determines the bit-depth that + is requested from ffmpeg. Supported dtypes: uint8, uint16. + Default: uint8. +pixelformat : str + The pixel format for the camera to use (e.g. "yuyv422" or + "gray"). The camera needs to support the format in order for + this to take effect. Note that the images produced by this + reader are always RGB. +input_params : list + List additional arguments to ffmpeg for input file options. + (Can also be provided as ``ffmpeg_params`` for backwards compatibility) + Example ffmpeg arguments to use aggressive error handling: + ['-err_detect', 'aggressive'] +output_params : list + List additional arguments to ffmpeg for output file options (i.e. the + stream being read by imageio). +print_info : bool + Print information about the video file as reported by ffmpeg. + +Parameters for writing +---------------------- +fps : scalar + The number of frames per second. Default 10. +codec : str + the video codec to use. Default 'libx264', which represents the + widely available mpeg4. Except when saving .wmv files, then the + defaults is 'msmpeg4' which is more commonly supported for windows +quality : float | None + Video output quality. Default is 5. Uses variable bit rate. Highest + quality is 10, lowest is 0. Set to None to prevent variable bitrate + flags to FFMPEG so you can manually specify them using output_params + instead. Specifying a fixed bitrate using 'bitrate' disables this + parameter. +bitrate : int | None + Set a constant bitrate for the video encoding. Default is None causing + 'quality' parameter to be used instead. Better quality videos with + smaller file sizes will result from using the 'quality' variable + bitrate parameter rather than specifying a fixed bitrate with this + parameter. +pixelformat: str + The output video pixel format. Default is 'yuv420p' which most widely + supported by video players. +input_params : list + List additional arguments to ffmpeg for input file options (i.e. the + stream that imageio provides). +output_params : list + List additional arguments to ffmpeg for output file options. + (Can also be provided as ``ffmpeg_params`` for backwards compatibility) + Example ffmpeg arguments to use only intra frames and set aspect ratio: + ['-intra', '-aspect', '16:9'] +ffmpeg_log_level: str + Sets ffmpeg output log level. Default is "warning". + Values can be "quiet", "panic", "fatal", "error", "warning", "info" + "verbose", or "debug". Also prints the FFMPEG command being used by + imageio if "info", "verbose", or "debug". +macro_block_size: int + Size constraint for video. Width and height, must be divisible by this + number. If not divisible by this number imageio will tell ffmpeg to + scale the image up to the next closest size + divisible by this number. Most codecs are compatible with a macroblock + size of 16 (default), some can go smaller (4, 8). To disable this + automatic feature set it to None or 1, however be warned many players + can't decode videos that are odd in size and some codecs will produce + poor results or fail. See https://en.wikipedia.org/wiki/Macroblock. +audio_path : str | None + Audio path of any audio that needs to be written. Defaults to nothing, + so no audio will be written. Please note, when writing shorter video + than the original, ffmpeg will not truncate the audio track; it + will maintain its original length and be longer than the video. +audio_codec : str | None + The audio codec to use. Defaults to nothing, but if an audio_path has + been provided ffmpeg will attempt to set a default codec. + +Notes +----- +If you are using anaconda and ``anaconda/ffmpeg`` you will not be able to +encode/decode H.264 (likely due to licensing concerns). If you need this +format on anaconda install ``conda-forge/ffmpeg`` instead. + +You can use the ``IMAGEIO_FFMPEG_EXE`` environment variable to force using a +specific ffmpeg executable. + +To get the number of frames before having read them all, you can use the +``reader.count_frames()`` method (the reader will then use +``imageio_ffmpeg.count_frames_and_secs()`` to get the exact number of frames, +note that this operation can take a few seconds on large files). Alternatively, +the number of frames can be estimated from the fps and duration in the meta data +(though these values themselves are not always present/reliable). + +""" + +import re +import sys +import time +import logging +import platform +import threading +import subprocess as sp +import imageio_ffmpeg + +import numpy as np + +from ..core import Format, image_as_uint + +logger = logging.getLogger(__name__) + +# Get camera format +if sys.platform.startswith("win"): + CAM_FORMAT = "dshow" # dshow or vfwcap +elif sys.platform.startswith("linux"): + CAM_FORMAT = "video4linux2" +elif sys.platform.startswith("darwin"): + CAM_FORMAT = "avfoundation" +else: # pragma: no cover + CAM_FORMAT = "unknown-cam-format" + + +def download(directory=None, force_download=False): # pragma: no cover + raise RuntimeError( + "imageio.ffmpeg.download() has been deprecated. " + "Use 'pip install imageio-ffmpeg' instead.'" + ) + + +# For backwards compatibility - we dont use this ourselves +def get_exe(): # pragma: no cover + """Wrapper for imageio_ffmpeg.get_ffmpeg_exe()""" + + return imageio_ffmpeg.get_ffmpeg_exe() + + +def get_version(): + """Return the version of imageio-ffmpeg in tuple.""" + return tuple(map(int, imageio_ffmpeg.__version__.split("."))) + + +class FfmpegFormat(Format): + """Read/Write ImageResources using FFMPEG. + + See :mod:`imageio.plugins.ffmpeg` + """ + + def _can_read(self, request): + # Read from video stream? + # Note that we could write the _video flag here, but a user might + # select this format explicitly (and this code is not run) + if re.match(r"", request.filename): + return True + + # Read from file that we know? + if request.extension in self.extensions: + return True + + def _can_write(self, request): + if request.extension in self.extensions: + return True + + # -- + + class Reader(Format.Reader): + _frame_catcher = None + _read_gen = None + + def _get_cam_inputname(self, index): + if sys.platform.startswith("linux"): + return "/dev/" + self.request._video[1:-1] + + elif sys.platform.startswith("win"): + # Ask ffmpeg for list of dshow device names + ffmpeg_api = imageio_ffmpeg + cmd = [ + ffmpeg_api.get_ffmpeg_exe(), + "-list_devices", + "true", + "-f", + CAM_FORMAT, + "-i", + "dummy", + ] + # Set `shell=True` in sp.run to prevent popup of a command + # line window in frozen applications. Note: this would be a + # security vulnerability if user-input goes into the cmd. + # Note that the ffmpeg process returns with exit code 1 when + # using `-list_devices` (or `-list_options`), even if the + # command is successful, so we set `check=False` explicitly. + completed_process = sp.run( + cmd, + stdout=sp.PIPE, + stderr=sp.PIPE, + encoding="utf-8", + shell=True, + check=False, + ) + + # Return device name at index + try: + name = parse_device_names(completed_process.stderr)[index] + except IndexError: + raise IndexError("No ffdshow camera at index %i." % index) + return "video=%s" % name + + elif sys.platform.startswith("darwin"): + # Appears that newer ffmpeg builds don't support -list-devices + # on OS X. But you can directly open the camera by index. + name = str(index) + return name + + else: # pragma: no cover + return "??" + + def _open( + self, + loop=False, + size=None, + dtype=None, + pixelformat=None, + print_info=False, + ffmpeg_params=None, + input_params=None, + output_params=None, + fps=None, + ): + # Get generator functions + self._ffmpeg_api = imageio_ffmpeg + # Process input args + self._arg_loop = bool(loop) + if size is None: + self._arg_size = None + elif isinstance(size, tuple): + self._arg_size = "%ix%i" % size + elif isinstance(size, str) and "x" in size: + self._arg_size = size + else: + raise ValueError('FFMPEG size must be tuple of "NxM"') + if pixelformat is None: + pass + elif not isinstance(pixelformat, str): + raise ValueError("FFMPEG pixelformat must be str") + if dtype is None: + self._dtype = np.dtype("uint8") + else: + self._dtype = np.dtype(dtype) + allowed_dtypes = ["uint8", "uint16"] + if self._dtype.name not in allowed_dtypes: + raise ValueError( + "dtype must be one of: {}".format(", ".join(allowed_dtypes)) + ) + self._arg_pixelformat = pixelformat + self._arg_input_params = input_params or [] + self._arg_output_params = output_params or [] + self._arg_input_params += ffmpeg_params or [] # backward compat + # Write "_video"_arg - indicating webcam support + self.request._video = None + regex_match = re.match(r"", self.request.filename) + if regex_match: + self.request._video = self.request.filename + # Get local filename + if self.request._video: + index = int(regex_match.group(1)) + self._filename = self._get_cam_inputname(index) + else: + self._filename = self.request.get_local_filename() + # When passed to imageio-ffmpeg (<0.4.2) on command line, carets need to be escaped. + if get_version() < (0, 4, 2): + self._filename = self._filename.replace("^", "^^") + # Determine pixel format and depth + self._depth = 3 + if self._dtype.name == "uint8": + self._pix_fmt = "rgb24" + self._bytes_per_channel = 1 + else: + self._pix_fmt = "rgb48le" + self._bytes_per_channel = 2 + # Initialize parameters + self._pos = -1 + self._meta = {"plugin": "ffmpeg"} + self._lastread = None + + # Calculating this from fps and duration is not accurate, + # and calculating it exactly with ffmpeg_api.count_frames_and_secs + # takes too long to do for each video. But we need it for looping. + self._nframes = float("inf") + if self._arg_loop and not self.request._video: + self._nframes = self.count_frames() + self._meta["nframes"] = self._nframes + + # Specify input framerate? (only on macOS) + # Ideally we'd get the supported framerate from the metadata, but we get the + # metadata when we boot ffmpeg ... maybe we could refactor this so we can + # get the metadata beforehand, but for now we'll just give it 2 tries on MacOS, + # one with fps 30 and one with fps 15. + need_input_fps = need_output_fps = False + if self.request._video and platform.system().lower() == "darwin": + if "-framerate" not in str(self._arg_input_params): + need_input_fps = True + if not self.request.kwargs.get("fps", None): + need_output_fps = True + if need_input_fps: + self._arg_input_params.extend(["-framerate", str(float(30))]) + if need_output_fps: + self._arg_output_params.extend(["-r", str(float(30))]) + + # Start ffmpeg subprocess and get meta information + try: + self._initialize() + except IndexError: + # Specify input framerate again, this time different. + if need_input_fps: + self._arg_input_params[-1] = str(float(15)) + self._initialize() + else: + raise + + # For cameras, create thread that keeps reading the images + if self.request._video: + self._frame_catcher = FrameCatcher(self._read_gen) + + # For reference - but disabled, because it is inaccurate + # if self._meta["nframes"] == float("inf"): + # if self._meta.get("fps", 0) > 0: + # if self._meta.get("duration", 0) > 0: + # n = round(self._meta["duration"] * self._meta["fps"]) + # self._meta["nframes"] = int(n) + + def _close(self): + # First close the frame catcher, because we cannot close the gen + # if the frame catcher thread is using it + if self._frame_catcher is not None: + self._frame_catcher.stop_me() + self._frame_catcher = None + if self._read_gen is not None: + self._read_gen.close() + self._read_gen = None + + def count_frames(self): + """Count the number of frames. Note that this can take a few + seconds for large files. Also note that it counts the number + of frames in the original video and does not take a given fps + into account. + """ + # This would have been nice, but this does not work :( + # oargs = [] + # if self.request.kwargs.get("fps", None): + # fps = float(self.request.kwargs["fps"]) + # oargs += ["-r", "%.02f" % fps] + cf = self._ffmpeg_api.count_frames_and_secs + return cf(self._filename)[0] + + def _get_length(self): + return self._nframes # only not inf if loop is True + + def _get_data(self, index): + """Reads a frame at index. Note for coders: getting an + arbitrary frame in the video with ffmpeg can be painfully + slow if some decoding has to be done. This function tries + to avoid fectching arbitrary frames whenever possible, by + moving between adjacent frames.""" + # Modulo index (for looping) + if self._arg_loop and self._nframes < float("inf"): + index %= self._nframes + + if index == self._pos: + return self._lastread, dict(new=False) + elif index < 0: + raise IndexError("Frame index must be >= 0") + elif index >= self._nframes: + raise IndexError("Reached end of video") + else: + if (index < self._pos) or (index > self._pos + 100): + self._initialize(index) + else: + self._skip_frames(index - self._pos - 1) + result, is_new = self._read_frame() + self._pos = index + return result, dict(new=is_new) + + def _get_meta_data(self, index): + return self._meta + + def _initialize(self, index=0): + # Close the current generator, and thereby terminate its subprocess + if self._read_gen is not None: + self._read_gen.close() + + iargs = [] + oargs = [] + + # Create input args + iargs += self._arg_input_params + if self.request._video: + iargs += ["-f", CAM_FORMAT] + if self._arg_pixelformat: + iargs += ["-pix_fmt", self._arg_pixelformat] + if self._arg_size: + iargs += ["-s", self._arg_size] + elif index > 0: # re-initialize / seek + # Note: only works if we initialized earlier, and now have meta + # Some info here: https://trac.ffmpeg.org/wiki/Seeking + # There are two ways to seek, one before -i (input_params) and + # after (output_params). The former is fast, because it uses + # keyframes, the latter is slow but accurate. According to + # the article above, the fast method should also be accurate + # from ffmpeg version 2.1, however in version 4.1 our tests + # start failing again. Not sure why, but we can solve this + # by combining slow and fast. Seek the long stretch using + # the fast method, and seek the last 10s the slow way. + starttime = index / self._meta["fps"] + seek_slow = min(10, starttime) + seek_fast = starttime - seek_slow + # We used to have this epsilon earlier, when we did not use + # the slow seek. I don't think we need it anymore. + # epsilon = -1 / self._meta["fps"] * 0.1 + iargs += ["-ss", "%.06f" % (seek_fast)] + oargs += ["-ss", "%.06f" % (seek_slow)] + + # Output args, for writing to pipe + if self._arg_size: + oargs += ["-s", self._arg_size] + if self.request.kwargs.get("fps", None): + fps = float(self.request.kwargs["fps"]) + oargs += ["-r", "%.02f" % fps] + oargs += self._arg_output_params + + # Get pixelformat and bytes per pixel + pix_fmt = self._pix_fmt + bpp = self._depth * self._bytes_per_channel + + # Create generator + rf = self._ffmpeg_api.read_frames + self._read_gen = rf( + self._filename, pix_fmt, bpp, input_params=iargs, output_params=oargs + ) + + # Read meta data. This start the generator (and ffmpeg subprocess) + if self.request._video: + # With cameras, catch error and turn into IndexError + try: + meta = self._read_gen.__next__() + except IOError as err: + err_text = str(err) + if "darwin" in sys.platform: + if "Unknown input format: 'avfoundation'" in err_text: + err_text += ( + "Try installing FFMPEG using " + "home brew to get a version with " + "support for cameras." + ) + raise IndexError( + "No (working) camera at {}.\n\n{}".format( + self.request._video, err_text + ) + ) + else: + self._meta.update(meta) + elif index == 0: + self._meta.update(self._read_gen.__next__()) + else: + self._read_gen.__next__() # we already have meta data + + def _skip_frames(self, n=1): + """Reads and throws away n frames""" + for i in range(n): + self._read_gen.__next__() + self._pos += n + + def _read_frame(self): + # Read and convert to numpy array + w, h = self._meta["size"] + framesize = w * h * self._depth * self._bytes_per_channel + # t0 = time.time() + + # Read frame + if self._frame_catcher: # pragma: no cover - camera thing + s, is_new = self._frame_catcher.get_frame() + else: + s = self._read_gen.__next__() + is_new = True + + # Check + if len(s) != framesize: + raise RuntimeError( + "Frame is %i bytes, but expected %i." % (len(s), framesize) + ) + + result = np.frombuffer(s, dtype=self._dtype).copy() + result = result.reshape((h, w, self._depth)) + # t1 = time.time() + # print('etime', t1-t0) + + # Store and return + self._lastread = result + return result, is_new + + # -- + + class Writer(Format.Writer): + _write_gen = None + + def _open( + self, + fps=10, + codec="libx264", + bitrate=None, + pixelformat="yuv420p", + ffmpeg_params=None, + input_params=None, + output_params=None, + ffmpeg_log_level="quiet", + quality=5, + macro_block_size=16, + audio_path=None, + audio_codec=None, + ): + self._ffmpeg_api = imageio_ffmpeg + self._filename = self.request.get_local_filename() + self._pix_fmt = None + self._depth = None + self._size = None + + def _close(self): + if self._write_gen is not None: + self._write_gen.close() + self._write_gen = None + + def _append_data(self, im, meta): + # Get props of image + h, w = im.shape[:2] + size = w, h + depth = 1 if im.ndim == 2 else im.shape[2] + + # Ensure that image is in uint8 + im = image_as_uint(im, bitdepth=8) + # To be written efficiently, ie. without creating an immutable + # buffer, by calling im.tobytes() the array must be contiguous. + if not im.flags.c_contiguous: + # checkign the flag is a micro optimization. + # the image will be a numpy subclass. See discussion + # https://github.com/numpy/numpy/issues/11804 + im = np.ascontiguousarray(im) + + # Set size and initialize if not initialized yet + if self._size is None: + map = {1: "gray", 2: "gray8a", 3: "rgb24", 4: "rgba"} + self._pix_fmt = map.get(depth, None) + if self._pix_fmt is None: + raise ValueError("Image must have 1, 2, 3 or 4 channels") + self._size = size + self._depth = depth + self._initialize() + + # Check size of image + if size != self._size: + raise ValueError("All images in a movie should have same size") + if depth != self._depth: + raise ValueError( + "All images in a movie should have same " "number of channels" + ) + + assert self._write_gen is not None # Check status + + # Write. Yes, we can send the data in as a numpy array + self._write_gen.send(im) + + def set_meta_data(self, meta): + raise RuntimeError( + "The ffmpeg format does not support setting " "meta data." + ) + + def _initialize(self): + # Close existing generator + if self._write_gen is not None: + self._write_gen.close() + + # Get parameters + # Use None to let imageio-ffmpeg (or ffmpeg) select good results + fps = self.request.kwargs.get("fps", 10) + codec = self.request.kwargs.get("codec", None) + bitrate = self.request.kwargs.get("bitrate", None) + quality = self.request.kwargs.get("quality", None) + input_params = self.request.kwargs.get("input_params") or [] + output_params = self.request.kwargs.get("output_params") or [] + output_params += self.request.kwargs.get("ffmpeg_params") or [] + pixelformat = self.request.kwargs.get("pixelformat", None) + macro_block_size = self.request.kwargs.get("macro_block_size", 16) + ffmpeg_log_level = self.request.kwargs.get("ffmpeg_log_level", None) + audio_path = self.request.kwargs.get("audio_path", None) + audio_codec = self.request.kwargs.get("audio_codec", None) + + macro_block_size = macro_block_size or 1 # None -> 1 + + # Create generator + self._write_gen = self._ffmpeg_api.write_frames( + self._filename, + self._size, + pix_fmt_in=self._pix_fmt, + pix_fmt_out=pixelformat, + fps=fps, + quality=quality, + bitrate=bitrate, + codec=codec, + macro_block_size=macro_block_size, + ffmpeg_log_level=ffmpeg_log_level, + input_params=input_params, + output_params=output_params, + audio_path=audio_path, + audio_codec=audio_codec, + ) + + # Seed the generator (this is where the ffmpeg subprocess starts) + self._write_gen.send(None) + + +class FrameCatcher(threading.Thread): + """Thread to keep reading the frame data from stdout. This is + useful when streaming from a webcam. Otherwise, if the user code + does not grab frames fast enough, the buffer will fill up, leading + to lag, and ffmpeg can also stall (experienced on Linux). The + get_frame() method always returns the last available image. + """ + + def __init__(self, gen): + self._gen = gen + self._frame = None + self._frame_is_new = False + self._lock = threading.RLock() + threading.Thread.__init__(self) + self.daemon = True # do not let this thread hold up Python shutdown + self._should_stop = False + self.start() + + def stop_me(self): + self._should_stop = True + while self.is_alive(): + time.sleep(0.001) + + def get_frame(self): + while self._frame is None: # pragma: no cover - an init thing + time.sleep(0.001) + with self._lock: + is_new = self._frame_is_new + self._frame_is_new = False # reset + return self._frame, is_new + + def run(self): + # This runs in the worker thread + try: + while not self._should_stop: + time.sleep(0) # give control to other threads + frame = self._gen.__next__() + with self._lock: + self._frame = frame + self._frame_is_new = True + except (StopIteration, EOFError): + pass + + +def parse_device_names(ffmpeg_output): + """Parse the output of the ffmpeg -list-devices command""" + # Collect device names - get [friendly_name, alt_name] of each + device_names = [] + in_video_devices = False + for line in ffmpeg_output.splitlines(): + if line.startswith("[dshow"): + logger.debug(line) + line = line.split("]", 1)[1].strip() + if in_video_devices and line.startswith('"'): + friendly_name = line[1:-1] + device_names.append([friendly_name, ""]) + elif in_video_devices and line.lower().startswith("alternative name"): + alt_name = line.split(" name ", 1)[1].strip()[1:-1] + if sys.platform.startswith("win"): + alt_name = alt_name.replace("&", "^&") # Tested to work + else: + alt_name = alt_name.replace("&", "\\&") # Does this work? + device_names[-1][-1] = alt_name + elif "video devices" in line: + in_video_devices = True + elif "devices" in line: + # set False for subsequent "devices" sections + in_video_devices = False + # Post-process, see #441 + # prefer friendly names, use alt name if two cams have same friendly name + device_names2 = [] + for friendly_name, alt_name in device_names: + if friendly_name not in device_names2: + device_names2.append(friendly_name) + elif alt_name: + device_names2.append(alt_name) + else: + device_names2.append(friendly_name) # duplicate, but not much we can do + return device_names2 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/fits.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/fits.py new file mode 100644 index 0000000000000000000000000000000000000000..4617d1ea8c0cc2ce351151e700ff14651102685d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/fits.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +"""Read FITS files. + +Backend Library: `Astropy `_ + +.. note:: + To use this plugin you have to install its backend:: + + pip install imageio[fits] + +Flexible Image Transport System (FITS) is an open standard defining a +digital file format useful for storage, transmission and processing of +scientific and other images. FITS is the most commonly used digital +file format in astronomy. + + +Parameters +---------- +cache : bool + If the file name is a URL, `~astropy.utils.data.download_file` is used + to open the file. This specifies whether or not to save the file + locally in Astropy's download cache (default: `True`). +uint : bool + Interpret signed integer data where ``BZERO`` is the + central value and ``BSCALE == 1`` as unsigned integer + data. For example, ``int16`` data with ``BZERO = 32768`` + and ``BSCALE = 1`` would be treated as ``uint16`` data. + + Note, for backward compatibility, the kwarg **uint16** may + be used instead. The kwarg was renamed when support was + added for integers of any size. +ignore_missing_end : bool + Do not issue an exception when opening a file that is + missing an ``END`` card in the last header. +checksum : bool or str + If `True`, verifies that both ``DATASUM`` and + ``CHECKSUM`` card values (when present in the HDU header) + match the header and data of all HDU's in the file. Updates to a + file that already has a checksum will preserve and update the + existing checksums unless this argument is given a value of + 'remove', in which case the CHECKSUM and DATASUM values are not + checked, and are removed when saving changes to the file. +disable_image_compression : bool, optional + If `True`, treats compressed image HDU's like normal + binary table HDU's. +do_not_scale_image_data : bool + If `True`, image data is not scaled using BSCALE/BZERO values + when read. +ignore_blank : bool + If `True`, the BLANK keyword is ignored if present. +scale_back : bool + If `True`, when saving changes to a file that contained scaled + image data, restore the data to the original type and reapply the + original BSCALE/BZERO values. This could lead to loss of accuracy + if scaling back to integer values after performing floating point + operations on the data. + +""" + +from ..core import Format + +_fits = None # lazily loaded + + +def load_lib(): + global _fits + try: + from astropy.io import fits as _fits + except ImportError: + raise ImportError( + "The FITS format relies on the astropy package." + "Please refer to http://www.astropy.org/ " + "for further instructions." + ) + return _fits + + +class FitsFormat(Format): + """See :mod:`imageio.plugins.fits`""" + + def _can_read(self, request): + # We return True if ext matches, because this is the only plugin + # that can. If astropy is not installed, a useful error follows. + return request.extension in self.extensions + + def _can_write(self, request): + # No write support + return False + + # -- reader + + class Reader(Format.Reader): + def _open(self, cache=False, **kwargs): + if not _fits: + load_lib() + hdulist = _fits.open(self.request.get_file(), cache=cache, **kwargs) + + self._index = [] + allowed_hdu_types = (_fits.ImageHDU, _fits.PrimaryHDU, _fits.CompImageHDU) + for n, hdu in zip(range(len(hdulist)), hdulist): + if isinstance(hdu, allowed_hdu_types): + # Ignore (primary) header units with no data (use '.size' + # rather than '.data' to avoid actually loading the image): + if hdu.size > 0: + self._index.append(n) + self._hdulist = hdulist + + def _close(self): + self._hdulist.close() + + def _get_length(self): + return len(self._index) + + def _get_data(self, index): + # Get data + if index < 0 or index >= len(self._index): + raise IndexError("Index out of range while reading from fits") + im = self._hdulist[self._index[index]].data + # Return array and empty meta data + return im, {} + + def _get_meta_data(self, index): + # Get the meta data for the given index + raise RuntimeError("The fits format does not support meta data.") diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/freeimage.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/freeimage.py new file mode 100644 index 0000000000000000000000000000000000000000..922899f88f1b68c5d2be8c4ff215383590fa7709 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/freeimage.py @@ -0,0 +1,404 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +"""Read/Write images using FreeImage. + +Backend Library: `FreeImage `_ + +.. note:: + To use this plugin you have to install its backend:: + + imageio_download_bin freeimage + + or you can download the backend using the function:: + + imageio.plugins.freeimage.download() + +Each Freeimage format has the ``flags`` keyword argument. See the `Freeimage +documentation `_ for more information. + +Parameters +---------- +flags : int + A freeimage-specific option. In most cases we provide explicit + parameters for influencing image reading. + +""" + +import numpy as np + +from ..core import Format, image_as_uint +from ..core.request import RETURN_BYTES +from ._freeimage import FNAME_PER_PLATFORM, IO_FLAGS, download, fi # noqa + +# todo: support files with only meta data + + +class FreeimageFormat(Format): + """See :mod:`imageio.plugins.freeimage`""" + + _modes = "i" + + def __init__(self, name, description, extensions=None, modes=None, *, fif=None): + super().__init__(name, description, extensions=extensions, modes=modes) + self._fif = fif + + @property + def fif(self): + return self._fif # Set when format is created + + def _can_read(self, request): + # Ask freeimage if it can read it, maybe ext missing + if fi.has_lib(): + if not hasattr(request, "_fif"): + try: + request._fif = fi.getFIF(request.filename, "r", request.firstbytes) + except Exception: # pragma: no cover + request._fif = -1 + if request._fif == self.fif: + return True + elif request._fif == 7 and self.fif == 14: + # PPM gets identified as PBM and PPM can read PBM + # see: https://github.com/imageio/imageio/issues/677 + return True + + def _can_write(self, request): + # Ask freeimage, because we are not aware of all formats + if fi.has_lib(): + if not hasattr(request, "_fif"): + try: + request._fif = fi.getFIF(request.filename, "w") + except ValueError: # pragma: no cover + if request.raw_uri == RETURN_BYTES: + request._fif = self.fif + else: + request._fif = -1 + if request._fif is self.fif: + return True + + # -- + + class Reader(Format.Reader): + def _get_length(self): + return 1 + + def _open(self, flags=0): + self._bm = fi.create_bitmap(self.request.filename, self.format.fif, flags) + self._bm.load_from_filename(self.request.get_local_filename()) + + def _close(self): + self._bm.close() + + def _get_data(self, index): + if index != 0: + raise IndexError("This format only supports singleton images.") + return self._bm.get_image_data(), self._bm.get_meta_data() + + def _get_meta_data(self, index): + if not (index is None or index == 0): + raise IndexError() + return self._bm.get_meta_data() + + # -- + + class Writer(Format.Writer): + def _open(self, flags=0): + self._flags = flags # Store flags for later use + self._bm = None + self._is_set = False # To prevent appending more than one image + self._meta = {} + + def _close(self): + # Set global meta data + self._bm.set_meta_data(self._meta) + # Write and close + self._bm.save_to_filename(self.request.get_local_filename()) + self._bm.close() + + def _append_data(self, im, meta): + # Check if set + if not self._is_set: + self._is_set = True + else: + raise RuntimeError( + "Singleton image; " "can only append image data once." + ) + # Pop unit dimension for grayscale images + if im.ndim == 3 and im.shape[-1] == 1: + im = im[:, :, 0] + # Lazy instantaion of the bitmap, we need image data + if self._bm is None: + self._bm = fi.create_bitmap( + self.request.filename, self.format.fif, self._flags + ) + self._bm.allocate(im) + # Set data + self._bm.set_image_data(im) + # There is no distinction between global and per-image meta data + # for singleton images + self._meta = meta + + def _set_meta_data(self, meta): + self._meta = meta + + +# Special plugins + +# todo: there is also FIF_LOAD_NOPIXELS, +# but perhaps that should be used with get_meta_data. + + +class FreeimageBmpFormat(FreeimageFormat): + """A BMP format based on the Freeimage library. + + This format supports grayscale, RGB and RGBA images. + + The freeimage plugin requires a `freeimage` binary. If this binary + not available on the system, it can be downloaded manually from + by either + + - the command line script ``imageio_download_bin freeimage`` + - the Python method ``imageio.plugins.freeimage.download()`` + + Parameters for saving + --------------------- + compression : bool + Whether to compress the bitmap using RLE when saving. Default False. + It seems this does not always work, but who cares, you should use + PNG anyway. + + """ + + class Writer(FreeimageFormat.Writer): + def _open(self, flags=0, compression=False): + # Build flags from kwargs + flags = int(flags) + if compression: + flags |= IO_FLAGS.BMP_SAVE_RLE + else: + flags |= IO_FLAGS.BMP_DEFAULT + # Act as usual, but with modified flags + return FreeimageFormat.Writer._open(self, flags) + + def _append_data(self, im, meta): + im = image_as_uint(im, bitdepth=8) + return FreeimageFormat.Writer._append_data(self, im, meta) + + +class FreeimagePngFormat(FreeimageFormat): + """A PNG format based on the Freeimage library. + + This format supports grayscale, RGB and RGBA images. + + The freeimage plugin requires a `freeimage` binary. If this binary + not available on the system, it can be downloaded manually from + by either + + - the command line script ``imageio_download_bin freeimage`` + - the Python method ``imageio.plugins.freeimage.download()`` + + Parameters for reading + ---------------------- + ignoregamma : bool + Avoid gamma correction. Default True. + + Parameters for saving + --------------------- + compression : {0, 1, 6, 9} + The compression factor. Higher factors result in more + compression at the cost of speed. Note that PNG compression is + always lossless. Default 9. + quantize : int + If specified, turn the given RGB or RGBA image in a paletted image + for more efficient storage. The value should be between 2 and 256. + If the value of 0 the image is not quantized. + interlaced : bool + Save using Adam7 interlacing. Default False. + """ + + class Reader(FreeimageFormat.Reader): + def _open(self, flags=0, ignoregamma=True): + # Build flags from kwargs + flags = int(flags) + if ignoregamma: + flags |= IO_FLAGS.PNG_IGNOREGAMMA + # Enter as usual, with modified flags + return FreeimageFormat.Reader._open(self, flags) + + # -- + + class Writer(FreeimageFormat.Writer): + def _open(self, flags=0, compression=9, quantize=0, interlaced=False): + compression_map = { + 0: IO_FLAGS.PNG_Z_NO_COMPRESSION, + 1: IO_FLAGS.PNG_Z_BEST_SPEED, + 6: IO_FLAGS.PNG_Z_DEFAULT_COMPRESSION, + 9: IO_FLAGS.PNG_Z_BEST_COMPRESSION, + } + # Build flags from kwargs + flags = int(flags) + if interlaced: + flags |= IO_FLAGS.PNG_INTERLACED + try: + flags |= compression_map[compression] + except KeyError: + raise ValueError("Png compression must be 0, 1, 6, or 9.") + # Act as usual, but with modified flags + return FreeimageFormat.Writer._open(self, flags) + + def _append_data(self, im, meta): + if str(im.dtype) == "uint16": + im = image_as_uint(im, bitdepth=16) + else: + im = image_as_uint(im, bitdepth=8) + FreeimageFormat.Writer._append_data(self, im, meta) + # Quantize? + q = int(self.request.kwargs.get("quantize", False)) + if not q: + pass + elif not (im.ndim == 3 and im.shape[-1] == 3): + raise ValueError("Can only quantize RGB images") + elif q < 2 or q > 256: + raise ValueError("PNG quantize param must be 2..256") + else: + bm = self._bm.quantize(0, q) + self._bm.close() + self._bm = bm + + +class FreeimageJpegFormat(FreeimageFormat): + """A JPEG format based on the Freeimage library. + + This format supports grayscale and RGB images. + + The freeimage plugin requires a `freeimage` binary. If this binary + not available on the system, it can be downloaded manually from + by either + + - the command line script ``imageio_download_bin freeimage`` + - the Python method ``imageio.plugins.freeimage.download()`` + + Parameters for reading + ---------------------- + exifrotate : bool + Automatically rotate the image according to the exif flag. + Default True. If 2 is given, do the rotation in Python instead + of freeimage. + quickread : bool + Read the image more quickly, at the expense of quality. + Default False. + + Parameters for saving + --------------------- + quality : scalar + The compression factor of the saved image (1..100), higher + numbers result in higher quality but larger file size. Default 75. + progressive : bool + Save as a progressive JPEG file (e.g. for images on the web). + Default False. + optimize : bool + On saving, compute optimal Huffman coding tables (can reduce a + few percent of file size). Default False. + baseline : bool + Save basic JPEG, without metadata or any markers. Default False. + + """ + + class Reader(FreeimageFormat.Reader): + def _open(self, flags=0, exifrotate=True, quickread=False): + # Build flags from kwargs + flags = int(flags) + if exifrotate and exifrotate != 2: + flags |= IO_FLAGS.JPEG_EXIFROTATE + if not quickread: + flags |= IO_FLAGS.JPEG_ACCURATE + # Enter as usual, with modified flags + return FreeimageFormat.Reader._open(self, flags) + + def _get_data(self, index): + im, meta = FreeimageFormat.Reader._get_data(self, index) + im = self._rotate(im, meta) + return im, meta + + def _rotate(self, im, meta): + """Use Orientation information from EXIF meta data to + orient the image correctly. Freeimage is also supposed to + support that, and I am pretty sure it once did, but now it + does not, so let's just do it in Python. + Edit: and now it works again, just leave in place as a fallback. + """ + if self.request.kwargs.get("exifrotate", None) == 2: + try: + ori = meta["EXIF_MAIN"]["Orientation"] + except KeyError: # pragma: no cover + pass # Orientation not available + else: # pragma: no cover - we cannot touch all cases + # www.impulseadventure.com/photo/exif-orientation.html + if ori in [1, 2]: + pass + if ori in [3, 4]: + im = np.rot90(im, 2) + if ori in [5, 6]: + im = np.rot90(im, 3) + if ori in [7, 8]: + im = np.rot90(im) + if ori in [2, 4, 5, 7]: # Flipped cases (rare) + im = np.fliplr(im) + return im + + # -- + + class Writer(FreeimageFormat.Writer): + def _open( + self, flags=0, quality=75, progressive=False, optimize=False, baseline=False + ): + # Test quality + quality = int(quality) + if quality < 1 or quality > 100: + raise ValueError("JPEG quality should be between 1 and 100.") + # Build flags from kwargs + flags = int(flags) + flags |= quality + if progressive: + flags |= IO_FLAGS.JPEG_PROGRESSIVE + if optimize: + flags |= IO_FLAGS.JPEG_OPTIMIZE + if baseline: + flags |= IO_FLAGS.JPEG_BASELINE + # Act as usual, but with modified flags + return FreeimageFormat.Writer._open(self, flags) + + def _append_data(self, im, meta): + if im.ndim == 3 and im.shape[-1] == 4: + raise IOError("JPEG does not support alpha channel.") + im = image_as_uint(im, bitdepth=8) + return FreeimageFormat.Writer._append_data(self, im, meta) + + +class FreeimagePnmFormat(FreeimageFormat): + """A PNM format based on the Freeimage library. + + This format supports single bit (PBM), grayscale (PGM) and RGB (PPM) + images, even with ASCII or binary coding. + + The freeimage plugin requires a `freeimage` binary. If this binary + not available on the system, it can be downloaded manually from + by either + + - the command line script ``imageio_download_bin freeimage`` + - the Python method ``imageio.plugins.freeimage.download()`` + + Parameters for saving + --------------------- + use_ascii : bool + Save with ASCII coding. Default True. + """ + + class Writer(FreeimageFormat.Writer): + def _open(self, flags=0, use_ascii=True): + # Build flags from kwargs + flags = int(flags) + if use_ascii: + flags |= IO_FLAGS.PNM_SAVE_ASCII + # Act as usual, but with modified flags + return FreeimageFormat.Writer._open(self, flags) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/freeimagemulti.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/freeimagemulti.py new file mode 100644 index 0000000000000000000000000000000000000000..b9f2e6a306db35946731b932745afe1a29d19c76 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/freeimagemulti.py @@ -0,0 +1,315 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +"""Plugin for multi-image freeimafe formats, like animated GIF and ico.""" + +import logging +import numpy as np + +from ..core import Format, image_as_uint +from ._freeimage import fi, IO_FLAGS +from .freeimage import FreeimageFormat + +logger = logging.getLogger(__name__) + + +class FreeimageMulti(FreeimageFormat): + """Base class for freeimage formats that support multiple images.""" + + _modes = "iI" + _fif = -1 + + class Reader(Format.Reader): + def _open(self, flags=0): + flags = int(flags) + # Create bitmap + self._bm = fi.create_multipage_bitmap( + self.request.filename, self.format.fif, flags + ) + self._bm.load_from_filename(self.request.get_local_filename()) + + def _close(self): + self._bm.close() + + def _get_length(self): + return len(self._bm) + + def _get_data(self, index): + sub = self._bm.get_page(index) + try: + return sub.get_image_data(), sub.get_meta_data() + finally: + sub.close() + + def _get_meta_data(self, index): + index = index or 0 + if index < 0 or index >= len(self._bm): + raise IndexError() + sub = self._bm.get_page(index) + try: + return sub.get_meta_data() + finally: + sub.close() + + # -- + + class Writer(FreeimageFormat.Writer): + def _open(self, flags=0): + # Set flags + self._flags = flags = int(flags) + # Instantiate multi-page bitmap + self._bm = fi.create_multipage_bitmap( + self.request.filename, self.format.fif, flags + ) + self._bm.save_to_filename(self.request.get_local_filename()) + + def _close(self): + # Close bitmap + self._bm.close() + + def _append_data(self, im, meta): + # Prepare data + if im.ndim == 3 and im.shape[-1] == 1: + im = im[:, :, 0] + im = image_as_uint(im, bitdepth=8) + # Create sub bitmap + sub1 = fi.create_bitmap(self._bm._filename, self.format.fif) + # Let subclass add data to bitmap, optionally return new + sub2 = self._append_bitmap(im, meta, sub1) + # Add + self._bm.append_bitmap(sub2) + sub2.close() + if sub1 is not sub2: + sub1.close() + + def _append_bitmap(self, im, meta, bitmap): + # Set data + bitmap.allocate(im) + bitmap.set_image_data(im) + bitmap.set_meta_data(meta) + # Return that same bitmap + return bitmap + + def _set_meta_data(self, meta): + pass # ignore global meta data + + +class MngFormat(FreeimageMulti): + """An Mng format based on the Freeimage library. + + Read only. Seems broken. + """ + + _fif = 6 + + def _can_write(self, request): # pragma: no cover + return False + + +class IcoFormat(FreeimageMulti): + """An ICO format based on the Freeimage library. + + This format supports grayscale, RGB and RGBA images. + + The freeimage plugin requires a `freeimage` binary. If this binary + is not available on the system, it can be downloaded by either + + - the command line script ``imageio_download_bin freeimage`` + - the Python method ``imageio.plugins.freeimage.download()`` + + Parameters for reading + ---------------------- + makealpha : bool + Convert to 32-bit and create an alpha channel from the AND- + mask when loading. Default False. Note that this returns wrong + results if the image was already RGBA. + + """ + + _fif = 1 + + class Reader(FreeimageMulti.Reader): + def _open(self, flags=0, makealpha=False): + # Build flags from kwargs + flags = int(flags) + if makealpha: + flags |= IO_FLAGS.ICO_MAKEALPHA + return FreeimageMulti.Reader._open(self, flags) + + +class GifFormat(FreeimageMulti): + """A format for reading and writing static and animated GIF, based + on the Freeimage library. + + Images read with this format are always RGBA. Currently, + the alpha channel is ignored when saving RGB images with this + format. + + The freeimage plugin requires a `freeimage` binary. If this binary + is not available on the system, it can be downloaded by either + + - the command line script ``imageio_download_bin freeimage`` + - the Python method ``imageio.plugins.freeimage.download()`` + + Parameters for reading + ---------------------- + playback : bool + 'Play' the GIF to generate each frame (as 32bpp) instead of + returning raw frame data when loading. Default True. + + Parameters for saving + --------------------- + loop : int + The number of iterations. Default 0 (meaning loop indefinitely) + duration : {float, list} + The duration (in seconds) of each frame. Either specify one value + that is used for all frames, or one value for each frame. + Note that in the GIF format the duration/delay is expressed in + hundredths of a second, which limits the precision of the duration. + fps : float + The number of frames per second. If duration is not given, the + duration for each frame is set to 1/fps. Default 10. + palettesize : int + The number of colors to quantize the image to. Is rounded to + the nearest power of two. Default 256. + quantizer : {'wu', 'nq'} + The quantization algorithm: + * wu - Wu, Xiaolin, Efficient Statistical Computations for + Optimal Color Quantization + * nq (neuqant) - Dekker A. H., Kohonen neural networks for + optimal color quantization + subrectangles : bool + If True, will try and optimize the GIF by storing only the + rectangular parts of each frame that change with respect to the + previous. Unfortunately, this option seems currently broken + because FreeImage does not handle DisposalMethod correctly. + Default False. + """ + + _fif = 25 + + class Reader(FreeimageMulti.Reader): + def _open(self, flags=0, playback=True): + # Build flags from kwargs + flags = int(flags) + if playback: + flags |= IO_FLAGS.GIF_PLAYBACK + FreeimageMulti.Reader._open(self, flags) + + def _get_data(self, index): + im, meta = FreeimageMulti.Reader._get_data(self, index) + # im = im[:, :, :3] # Drop alpha channel + return im, meta + + # -- writer + + class Writer(FreeimageMulti.Writer): + # todo: subrectangles + # todo: global palette + + def _open( + self, + flags=0, + loop=0, + duration=None, + fps=10, + palettesize=256, + quantizer="Wu", + subrectangles=False, + ): + # Check palettesize + if palettesize < 2 or palettesize > 256: + raise ValueError("GIF quantize param must be 2..256") + if palettesize not in [2, 4, 8, 16, 32, 64, 128, 256]: + palettesize = 2 ** int(np.log2(128) + 0.999) + logger.warning( + "Warning: palettesize (%r) modified to a factor of " + "two between 2-256." % palettesize + ) + self._palettesize = palettesize + # Check quantizer + self._quantizer = {"wu": 0, "nq": 1}.get(quantizer.lower(), None) + if self._quantizer is None: + raise ValueError('Invalid quantizer, must be "wu" or "nq".') + # Check frametime + if duration is None: + self._frametime = [int(1000 / float(fps) + 0.5)] + elif isinstance(duration, list): + self._frametime = [int(1000 * d) for d in duration] + elif isinstance(duration, (float, int)): + self._frametime = [int(1000 * duration)] + else: + raise ValueError("Invalid value for duration: %r" % duration) + # Check subrectangles + self._subrectangles = bool(subrectangles) + self._prev_im = None + # Init + FreeimageMulti.Writer._open(self, flags) + # Set global meta data + self._meta = {} + self._meta["ANIMATION"] = { + # 'GlobalPalette': np.array([0]).astype(np.uint8), + "Loop": np.array([loop]).astype(np.uint32), + # 'LogicalWidth': np.array([x]).astype(np.uint16), + # 'LogicalHeight': np.array([x]).astype(np.uint16), + } + + def _append_bitmap(self, im, meta, bitmap): + # Prepare meta data + meta = meta.copy() + meta_a = meta["ANIMATION"] = {} + # If this is the first frame, assign it our "global" meta data + if len(self._bm) == 0: + meta.update(self._meta) + meta_a = meta["ANIMATION"] + # Set frame time + index = len(self._bm) + if index < len(self._frametime): + ft = self._frametime[index] + else: + ft = self._frametime[-1] + meta_a["FrameTime"] = np.array([ft]).astype(np.uint32) + # Check array + if im.ndim == 3 and im.shape[-1] == 4: + im = im[:, :, :3] + # Process subrectangles + im_uncropped = im + if self._subrectangles and self._prev_im is not None: + im, xy = self._get_sub_rectangles(self._prev_im, im) + meta_a["DisposalMethod"] = np.array([1]).astype(np.uint8) + meta_a["FrameLeft"] = np.array([xy[0]]).astype(np.uint16) + meta_a["FrameTop"] = np.array([xy[1]]).astype(np.uint16) + self._prev_im = im_uncropped + # Set image data + sub2 = sub1 = bitmap + sub1.allocate(im) + sub1.set_image_data(im) + # Quantize it if its RGB + if im.ndim == 3 and im.shape[-1] == 3: + sub2 = sub1.quantize(self._quantizer, self._palettesize) + # Set meta data and return + sub2.set_meta_data(meta) + return sub2 + + def _get_sub_rectangles(self, prev, im): + """ + Calculate the minimal rectangles that need updating each frame. + Returns a two-element tuple containing the cropped images and a + list of x-y positions. + """ + # Get difference, sum over colors + diff = np.abs(im - prev) + if diff.ndim == 3: + diff = diff.sum(2) + # Get begin and end for both dimensions + X = np.argwhere(diff.sum(0)) + Y = np.argwhere(diff.sum(1)) + # Get rect coordinates + if X.size and Y.size: + x0, x1 = int(X[0]), int(X[-1]) + 1 + y0, y1 = int(Y[0]), int(Y[-1]) + 1 + else: # No change ... make it minimal + x0, x1 = 0, 2 + y0, y1 = 0, 2 + # Cut out and return + return im[y0:y1, x0:x1], (x0, y0) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/gdal.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/gdal.py new file mode 100644 index 0000000000000000000000000000000000000000..48cb7479f2dc8c0b89d2778196048ec3a8997d34 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/gdal.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +"""Read GDAL files. + +Backend: `GDAL `_ + +.. note:: + To use this plugin you have to install its backend:: + + pip install imageio[gdal] + +Parameters +---------- +none +""" + +from ..core import Format, has_module + +_gdal = None # lazily loaded in load_lib() + + +def load_lib(): + global _gdal + try: + import osgeo.gdal as _gdal + except ImportError: + raise ImportError( + "The GDAL format relies on the GDAL package." + "Please refer to http://www.gdal.org/" + "for further instructions." + ) + return _gdal + + +GDAL_FORMATS = (".tiff", " .tif", ".img", ".ecw", ".jpg", ".jpeg") + + +class GdalFormat(Format): + """See :mod:`imageio.plugins.gdal`""" + + def _can_read(self, request): + if request.extension in (".ecw",): + return True + if has_module("osgeo.gdal"): + return request.extension in self.extensions + + def _can_write(self, request): + return False + + # -- + + class Reader(Format.Reader): + def _open(self): + if not _gdal: + load_lib() + self._ds = _gdal.Open(self.request.get_local_filename()) + + def _close(self): + del self._ds + + def _get_length(self): + return 1 + + def _get_data(self, index): + if index != 0: + raise IndexError("Gdal file contains only one dataset") + return self._ds.ReadAsArray(), self._get_meta_data(index) + + def _get_meta_data(self, index): + return self._ds.GetMetadata() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/grab.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/grab.py new file mode 100644 index 0000000000000000000000000000000000000000..8477863e30757740e83f55d880f2a7554dbe1521 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/grab.py @@ -0,0 +1,105 @@ +""" +PIL-based formats to take screenshots and grab from the clipboard. +""" + +import threading + +import numpy as np + +from ..core import Format + + +class BaseGrabFormat(Format): + """Base format for grab formats.""" + + _pillow_imported = False + _ImageGrab = None + + def __init__(self, *args, **kwargs): + super(BaseGrabFormat, self).__init__(*args, **kwargs) + self._lock = threading.RLock() + + def _can_write(self, request): + return False + + def _init_pillow(self): + with self._lock: + if not self._pillow_imported: + self._pillow_imported = True # more like tried to import + import PIL + + if not hasattr(PIL, "__version__"): # pragma: no cover + raise ImportError("Imageio Pillow requires " "Pillow, not PIL!") + try: + from PIL import ImageGrab + except ImportError: + return None + self._ImageGrab = ImageGrab + return self._ImageGrab + + class Reader(Format.Reader): + def _open(self): + pass + + def _close(self): + pass + + def _get_data(self, index): + return self.format._get_data(index) + + +class ScreenGrabFormat(BaseGrabFormat): + """The ScreenGrabFormat provided a means to grab screenshots using + the uri of "". + + This functionality is provided via Pillow. Note that "" is + only supported on Windows and OS X. + + Parameters for reading + ---------------------- + No parameters. + """ + + def _can_read(self, request): + if request.filename != "": + return False + return bool(self._init_pillow()) + + def _get_data(self, index): + ImageGrab = self._init_pillow() + assert ImageGrab + + pil_im = ImageGrab.grab() + assert pil_im is not None + im = np.asarray(pil_im) + return im, {} + + +class ClipboardGrabFormat(BaseGrabFormat): + """The ClipboardGrabFormat provided a means to grab image data from + the clipboard, using the uri "" + + This functionality is provided via Pillow. Note that "" is + only supported on Windows. + + Parameters for reading + ---------------------- + No parameters. + """ + + def _can_read(self, request): + if request.filename != "": + return False + return bool(self._init_pillow()) + + def _get_data(self, index): + ImageGrab = self._init_pillow() + assert ImageGrab + + pil_im = ImageGrab.grabclipboard() + if pil_im is None: + raise RuntimeError( + "There seems to be no image data on the " "clipboard now." + ) + im = np.asarray(pil_im) + return im, {} diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/lytro.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/lytro.py new file mode 100644 index 0000000000000000000000000000000000000000..3d41a6d7073947810805e80d1c4285ef86d8a793 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/lytro.py @@ -0,0 +1,714 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2018, imageio contributors +# imageio is distributed under the terms of the (new) BSD License. +# + +"""Read LFR files (Lytro Illum). + +Backend: internal + +Plugin to read Lytro Illum .lfr and .raw files as produced +by the Lytro Illum light field camera. It is actually a collection +of plugins, each supporting slightly different keyword arguments + +Parameters +---------- +meta_only : bool + Whether to only read the metadata. +include_thumbnail : bool + (only for lytro-lfr and lytro-lfp) + Whether to include an image thumbnail in the metadata. + +""" +# +# +# This code is based on work by +# David Uhlig and his lfr_reader +# (https://www.iiit.kit.edu/uhlig.php) +# Donald Dansereau and his Matlab LF Toolbox +# (http://dgd.vision/Tools/LFToolbox/) +# and Behnam Esfahbod and his Python LFP-Reader +# (https://github.com/behnam/python-lfp-reader/) + + +import os +import json +import struct +import logging + + +import numpy as np + +from ..core import Format +from ..v2 import imread + + +logger = logging.getLogger(__name__) + + +# Sensor size of Lytro Illum resp. Lytro F01 light field camera sensor +LYTRO_ILLUM_IMAGE_SIZE = (5368, 7728) +LYTRO_F01_IMAGE_SIZE = (3280, 3280) + +# Parameter of lfr file format +HEADER_LENGTH = 12 +SIZE_LENGTH = 4 # = 16 - header_length +SHA1_LENGTH = 45 # = len("sha1-") + (160 / 4) +PADDING_LENGTH = 35 # = (4*16) - header_length - size_length - sha1_length +DATA_CHUNKS_ILLUM = 11 +DATA_CHUNKS_F01 = 3 + + +class LytroFormat(Format): + """Base class for Lytro format. + The subclasses LytroLfrFormat, LytroLfpFormat, LytroIllumRawFormat and + LytroF01RawFormat implement the Lytro-LFR, Lytro-LFP and Lytro-RAW format + for the Illum and original F01 camera respectively. + Writing is not supported. + """ + + # Only single images are supported. + _modes = "i" + + def _can_write(self, request): + # Writing of Lytro files is not supported + return False + + # -- writer + + class Writer(Format.Writer): + def _open(self, flags=0): + self._fp = self.request.get_file() + + def _close(self): + # Close the reader. + # Note that the request object will close self._fp + pass + + def _append_data(self, im, meta): + # Process the given data and meta data. + raise RuntimeError("The lytro format cannot write image data.") + + def _set_meta_data(self, meta): + # Process the given meta data (global for all images) + # It is not mandatory to support this. + raise RuntimeError("The lytro format cannot write meta data.") + + +class LytroIllumRawFormat(LytroFormat): + """This is the Lytro Illum RAW format. + The raw format is a 10bit image format as used by the Lytro Illum + light field camera. The format will read the specified raw file and will + try to load a .txt or .json file with the associated meta data. + This format does not support writing. + + + Parameters for reading + ---------------------- + meta_only : bool + Whether to only read the metadata. + """ + + def _can_read(self, request): + # Check if mode and extensions are supported by the format + if request.extension in (".raw",): + return True + + @staticmethod + def rearrange_bits(array): + # Do bit rearrangement for the 10-bit lytro raw format + # Normalize output to 1.0 as float64 + t0 = array[0::5] + t1 = array[1::5] + t2 = array[2::5] + t3 = array[3::5] + lsb = array[4::5] + + t0 = np.left_shift(t0, 2) + np.bitwise_and(lsb, 3) + t1 = np.left_shift(t1, 2) + np.right_shift(np.bitwise_and(lsb, 12), 2) + t2 = np.left_shift(t2, 2) + np.right_shift(np.bitwise_and(lsb, 48), 4) + t3 = np.left_shift(t3, 2) + np.right_shift(np.bitwise_and(lsb, 192), 6) + + image = np.zeros(LYTRO_ILLUM_IMAGE_SIZE, dtype=np.uint16) + image[:, 0::4] = t0.reshape( + (LYTRO_ILLUM_IMAGE_SIZE[0], LYTRO_ILLUM_IMAGE_SIZE[1] // 4) + ) + image[:, 1::4] = t1.reshape( + (LYTRO_ILLUM_IMAGE_SIZE[0], LYTRO_ILLUM_IMAGE_SIZE[1] // 4) + ) + image[:, 2::4] = t2.reshape( + (LYTRO_ILLUM_IMAGE_SIZE[0], LYTRO_ILLUM_IMAGE_SIZE[1] // 4) + ) + image[:, 3::4] = t3.reshape( + (LYTRO_ILLUM_IMAGE_SIZE[0], LYTRO_ILLUM_IMAGE_SIZE[1] // 4) + ) + + # Normalize data to 1.0 as 64-bit float. + # Division is by 1023 as the Lytro Illum saves 10-bit raw data. + return np.divide(image, 1023.0).astype(np.float64) + + # -- reader + + class Reader(Format.Reader): + def _open(self, meta_only=False): + self._file = self.request.get_file() + self._data = None + self._meta_only = meta_only + + def _close(self): + # Close the reader. + # Note that the request object will close self._file + del self._data + + def _get_length(self): + # Return the number of images. + return 1 + + def _get_data(self, index): + # Return the data and meta data for the given index + + if index not in [0, "None"]: + raise IndexError("Lytro file contains only one dataset") + + if not self._meta_only: + # Read all bytes + if self._data is None: + self._data = self._file.read() + + # Read bytes from string and convert to uint16 + raw = np.frombuffer(self._data, dtype=np.uint8).astype(np.uint16) + + # Rearrange bits + img = LytroIllumRawFormat.rearrange_bits(raw) + + else: + # Return empty image + img = np.array([]) + + # Return image and meta data + return img, self._get_meta_data(index=0) + + def _get_meta_data(self, index): + # Get the meta data for the given index. If index is None, it + # should return the global meta data. + + if index not in [0, None]: + raise IndexError("Lytro meta data file contains only one dataset") + + # Try to read meta data from meta data file corresponding + # to the raw data file, extension in [.txt, .TXT, .json, .JSON] + filename_base = os.path.splitext(self.request.get_local_filename())[0] + + meta_data = None + + for ext in [".txt", ".TXT", ".json", ".JSON"]: + if os.path.isfile(filename_base + ext): + meta_data = json.load(open(filename_base + ext)) + + if meta_data is not None: + return meta_data + + else: + logger.warning("No metadata file found for provided raw file.") + return {} + + +class LytroLfrFormat(LytroFormat): + """This is the Lytro Illum LFR format. + The lfr is a image and meta data container format as used by the + Lytro Illum light field camera. + The format will read the specified lfr file. + This format does not support writing. + + Parameters for reading + ---------------------- + meta_only : bool + Whether to only read the metadata. + include_thumbnail : bool + Whether to include an image thumbnail in the metadata. + """ + + def _can_read(self, request): + # Check if mode and extensions are supported by the format + if request.extension in (".lfr",): + return True + + # -- reader + + class Reader(Format.Reader): + def _open(self, meta_only=False, include_thumbnail=True): + self._file = self.request.get_file() + self._data = None + self._chunks = {} + self.metadata = {} + self._content = None + self._meta_only = meta_only + self._include_thumbnail = include_thumbnail + + self._find_header() + self._find_chunks() + self._find_meta() + + try: + # Get sha1 dict and check if it is in dictionary of data chunks + chunk_dict = self._content["frames"][0]["frame"] + if ( + chunk_dict["metadataRef"] in self._chunks + and chunk_dict["imageRef"] in self._chunks + and chunk_dict["privateMetadataRef"] in self._chunks + ): + if not self._meta_only: + # Read raw image data byte buffer + data_pos, size = self._chunks[chunk_dict["imageRef"]] + self._file.seek(data_pos, 0) + self.raw_image_data = self._file.read(size) + + # Read meta data + data_pos, size = self._chunks[chunk_dict["metadataRef"]] + self._file.seek(data_pos, 0) + metadata = self._file.read(size) + # Add metadata to meta data dict + self.metadata["metadata"] = json.loads(metadata.decode("ASCII")) + + # Read private metadata + data_pos, size = self._chunks[chunk_dict["privateMetadataRef"]] + self._file.seek(data_pos, 0) + serial_numbers = self._file.read(size) + self.serial_numbers = json.loads(serial_numbers.decode("ASCII")) + # Add private metadata to meta data dict + self.metadata["privateMetadata"] = self.serial_numbers + + # Read image preview thumbnail + if self._include_thumbnail: + chunk_dict = self._content["thumbnails"][0] + if chunk_dict["imageRef"] in self._chunks: + # Read thumbnail image from thumbnail chunk + data_pos, size = self._chunks[chunk_dict["imageRef"]] + self._file.seek(data_pos, 0) + # Read binary data, read image as jpeg + thumbnail_data = self._file.read(size) + thumbnail_img = imread(thumbnail_data, format="jpeg") + + thumbnail_height = chunk_dict["height"] + thumbnail_width = chunk_dict["width"] + + # Add thumbnail to metadata + self.metadata["thumbnail"] = { + "image": thumbnail_img, + "height": thumbnail_height, + "width": thumbnail_width, + } + + except KeyError: + raise RuntimeError("The specified file is not a valid LFR file.") + + def _close(self): + # Close the reader. + # Note that the request object will close self._file + del self._data + + def _get_length(self): + # Return the number of images. Can be np.inf + return 1 + + def _find_header(self): + """ + Checks if file has correct header and skip it. + """ + file_header = b"\x89LFP\x0d\x0a\x1a\x0a\x00\x00\x00\x01" + # Read and check header of file + header = self._file.read(HEADER_LENGTH) + if header != file_header: + raise RuntimeError("The LFR file header is invalid.") + + # Read first bytes to skip header + self._file.read(SIZE_LENGTH) + + def _find_chunks(self): + """ + Gets start position and size of data chunks in file. + """ + chunk_header = b"\x89LFC\x0d\x0a\x1a\x0a\x00\x00\x00\x00" + + for i in range(0, DATA_CHUNKS_ILLUM): + data_pos, size, sha1 = self._get_chunk(chunk_header) + self._chunks[sha1] = (data_pos, size) + + def _find_meta(self): + """ + Gets a data chunk that contains information over content + of other data chunks. + """ + meta_header = b"\x89LFM\x0d\x0a\x1a\x0a\x00\x00\x00\x00" + data_pos, size, sha1 = self._get_chunk(meta_header) + + # Get content + self._file.seek(data_pos, 0) + data = self._file.read(size) + self._content = json.loads(data.decode("ASCII")) + + def _get_chunk(self, header): + """ + Checks if chunk has correct header and skips it. + Finds start position and length of next chunk and reads + sha1-string that identifies the following data chunk. + + Parameters + ---------- + header : bytes + Byte string that identifies start of chunk. + + Returns + ------- + data_pos : int + Start position of data chunk in file. + size : int + Size of data chunk. + sha1 : str + Sha1 value of chunk. + """ + # Read and check header of chunk + header_chunk = self._file.read(HEADER_LENGTH) + if header_chunk != header: + raise RuntimeError("The LFR chunk header is invalid.") + + data_pos = None + sha1 = None + + # Read size + size = struct.unpack(">i", self._file.read(SIZE_LENGTH))[0] + if size > 0: + # Read sha1 + sha1 = str(self._file.read(SHA1_LENGTH).decode("ASCII")) + # Skip fixed null chars + self._file.read(PADDING_LENGTH) + # Find start of data and skip data + data_pos = self._file.tell() + self._file.seek(size, 1) + # Skip extra null chars + ch = self._file.read(1) + while ch == b"\0": + ch = self._file.read(1) + self._file.seek(-1, 1) + + return data_pos, size, sha1 + + def _get_data(self, index): + # Return the data and meta data for the given index + if index not in [0, None]: + raise IndexError("Lytro lfr file contains only one dataset") + + if not self._meta_only: + # Read bytes from string and convert to uint16 + raw = np.frombuffer(self.raw_image_data, dtype=np.uint8).astype( + np.uint16 + ) + im = LytroIllumRawFormat.rearrange_bits(raw) + else: + im = np.array([]) + + # Return array and dummy meta data + return im, self.metadata + + def _get_meta_data(self, index): + # Get the meta data for the given index. If index is None, + # it returns the global meta data. + if index not in [0, None]: + raise IndexError("Lytro meta data file contains only one dataset") + + return self.metadata + + +class LytroF01RawFormat(LytroFormat): + """This is the Lytro RAW format for the original F01 Lytro camera. + The raw format is a 12bit image format as used by the Lytro F01 + light field camera. The format will read the specified raw file and will + try to load a .txt or .json file with the associated meta data. + This format does not support writing. + + + Parameters for reading + ---------------------- + meta_only : bool + Whether to only read the metadata. + + """ + + def _can_read(self, request): + # Check if mode and extensions are supported by the format + if request.extension in (".raw",): + return True + + @staticmethod + def rearrange_bits(array): + # Do bit rearrangement for the 12-bit lytro raw format + # Normalize output to 1.0 as float64 + t0 = array[0::3] + t1 = array[1::3] + t2 = array[2::3] + + a0 = np.left_shift(t0, 4) + np.right_shift(np.bitwise_and(t1, 240), 4) + a1 = np.left_shift(np.bitwise_and(t1, 15), 8) + t2 + + image = np.zeros(LYTRO_F01_IMAGE_SIZE, dtype=np.uint16) + image[:, 0::2] = a0.reshape( + (LYTRO_F01_IMAGE_SIZE[0], LYTRO_F01_IMAGE_SIZE[1] // 2) + ) + image[:, 1::2] = a1.reshape( + (LYTRO_F01_IMAGE_SIZE[0], LYTRO_F01_IMAGE_SIZE[1] // 2) + ) + + # Normalize data to 1.0 as 64-bit float. + # Division is by 4095 as the Lytro F01 saves 12-bit raw data. + return np.divide(image, 4095.0).astype(np.float64) + + # -- reader + + class Reader(Format.Reader): + def _open(self, meta_only=False): + self._file = self.request.get_file() + self._data = None + self._meta_only = meta_only + + def _close(self): + # Close the reader. + # Note that the request object will close self._file + del self._data + + def _get_length(self): + # Return the number of images. + return 1 + + def _get_data(self, index): + # Return the data and meta data for the given index + + if index not in [0, "None"]: + raise IndexError("Lytro file contains only one dataset") + + if not self._meta_only: + # Read all bytes + if self._data is None: + self._data = self._file.read() + + # Read bytes from string and convert to uint16 + raw = np.frombuffer(self._data, dtype=np.uint8).astype(np.uint16) + + # Rearrange bits + img = LytroF01RawFormat.rearrange_bits(raw) + + else: + img = np.array([]) + + # Return image and meta data + return img, self._get_meta_data(index=0) + + def _get_meta_data(self, index): + # Get the meta data for the given index. If index is None, it + # should return the global meta data. + + if index not in [0, None]: + raise IndexError("Lytro meta data file contains only one dataset") + + # Try to read meta data from meta data file corresponding + # to the raw data file, extension in [.txt, .TXT, .json, .JSON] + filename_base = os.path.splitext(self.request.get_local_filename())[0] + + meta_data = None + + for ext in [".txt", ".TXT", ".json", ".JSON"]: + if os.path.isfile(filename_base + ext): + meta_data = json.load(open(filename_base + ext)) + + if meta_data is not None: + return meta_data + + else: + logger.warning("No metadata file found for provided raw file.") + return {} + + +class LytroLfpFormat(LytroFormat): + """This is the Lytro Illum LFP format. + The lfp is a image and meta data container format as used by the + Lytro F01 light field camera. + The format will read the specified lfp file. + This format does not support writing. + + Parameters for reading + ---------------------- + meta_only : bool + Whether to only read the metadata. + include_thumbnail : bool + Whether to include an image thumbnail in the metadata. + """ + + def _can_read(self, request): + # Check if mode and extensions are supported by the format + if request.extension in (".lfp",): + return True + + # -- reader + + class Reader(Format.Reader): + def _open(self, meta_only=False): + self._file = self.request.get_file() + self._data = None + self._chunks = {} + self.metadata = {} + self._content = None + self._meta_only = meta_only + + self._find_header() + self._find_meta() + self._find_chunks() + + try: + # Get sha1 dict and check if it is in dictionary of data chunks + chunk_dict = self._content["picture"]["frameArray"][0]["frame"] + if ( + chunk_dict["metadataRef"] in self._chunks + and chunk_dict["imageRef"] in self._chunks + and chunk_dict["privateMetadataRef"] in self._chunks + ): + if not self._meta_only: + # Read raw image data byte buffer + data_pos, size = self._chunks[chunk_dict["imageRef"]] + self._file.seek(data_pos, 0) + self.raw_image_data = self._file.read(size) + + # Read meta data + data_pos, size = self._chunks[chunk_dict["metadataRef"]] + self._file.seek(data_pos, 0) + metadata = self._file.read(size) + # Add metadata to meta data dict + self.metadata["metadata"] = json.loads(metadata.decode("ASCII")) + + # Read private metadata + data_pos, size = self._chunks[chunk_dict["privateMetadataRef"]] + self._file.seek(data_pos, 0) + serial_numbers = self._file.read(size) + self.serial_numbers = json.loads(serial_numbers.decode("ASCII")) + # Add private metadata to meta data dict + self.metadata["privateMetadata"] = self.serial_numbers + + except KeyError: + raise RuntimeError("The specified file is not a valid LFP file.") + + def _close(self): + # Close the reader. + # Note that the request object will close self._file + del self._data + + def _get_length(self): + # Return the number of images. Can be np.inf + return 1 + + def _find_header(self): + """ + Checks if file has correct header and skip it. + """ + file_header = b"\x89LFP\x0d\x0a\x1a\x0a\x00\x00\x00\x01" + + # Read and check header of file + header = self._file.read(HEADER_LENGTH) + if header != file_header: + raise RuntimeError("The LFP file header is invalid.") + + # Read first bytes to skip header + self._file.read(SIZE_LENGTH) + + def _find_chunks(self): + """ + Gets start position and size of data chunks in file. + """ + chunk_header = b"\x89LFC\x0d\x0a\x1a\x0a\x00\x00\x00\x00" + + for i in range(0, DATA_CHUNKS_F01): + data_pos, size, sha1 = self._get_chunk(chunk_header) + self._chunks[sha1] = (data_pos, size) + + def _find_meta(self): + """ + Gets a data chunk that contains information over content + of other data chunks. + """ + meta_header = b"\x89LFM\x0d\x0a\x1a\x0a\x00\x00\x00\x00" + + data_pos, size, sha1 = self._get_chunk(meta_header) + + # Get content + self._file.seek(data_pos, 0) + data = self._file.read(size) + self._content = json.loads(data.decode("ASCII")) + data = self._file.read(5) # Skip 5 + + def _get_chunk(self, header): + """ + Checks if chunk has correct header and skips it. + Finds start position and length of next chunk and reads + sha1-string that identifies the following data chunk. + + Parameters + ---------- + header : bytes + Byte string that identifies start of chunk. + + Returns + ------- + data_pos : int + Start position of data chunk in file. + size : int + Size of data chunk. + sha1 : str + Sha1 value of chunk. + """ + # Read and check header of chunk + header_chunk = self._file.read(HEADER_LENGTH) + if header_chunk != header: + raise RuntimeError("The LFP chunk header is invalid.") + + data_pos = None + sha1 = None + + # Read size + size = struct.unpack(">i", self._file.read(SIZE_LENGTH))[0] + if size > 0: + # Read sha1 + sha1 = str(self._file.read(SHA1_LENGTH).decode("ASCII")) + # Skip fixed null chars + self._file.read(PADDING_LENGTH) + # Find start of data and skip data + data_pos = self._file.tell() + self._file.seek(size, 1) + # Skip extra null chars + ch = self._file.read(1) + while ch == b"\0": + ch = self._file.read(1) + self._file.seek(-1, 1) + + return data_pos, size, sha1 + + def _get_data(self, index): + # Return the data and meta data for the given index + if index not in [0, None]: + raise IndexError("Lytro lfp file contains only one dataset") + + if not self._meta_only: + # Read bytes from string and convert to uint16 + raw = np.frombuffer(self.raw_image_data, dtype=np.uint8).astype( + np.uint16 + ) + im = LytroF01RawFormat.rearrange_bits(raw) + else: + im = np.array([]) + + # Return array and dummy meta data + return im, self.metadata + + def _get_meta_data(self, index): + # Get the meta data for the given index. If index is None, + # it returns the global meta data. + if index not in [0, None]: + raise IndexError("Lytro meta data file contains only one dataset") + + return self.metadata diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/npz.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/npz.py new file mode 100644 index 0000000000000000000000000000000000000000..87b37e44a0cc85671f42d1e25c775b687c709f71 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/npz.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +"""Read/Write NPZ files. + +Backend: `Numpy `_ + +NPZ is a file format by numpy that provides storage of array data using gzip +compression. This imageio plugin supports data of any shape, and also supports +multiple images per file. However, the npz format does not provide streaming; +all data is read/written at once. Further, there is no support for meta data. + +See the BSDF format for a similar (but more fully featured) format. + +Parameters +---------- +None + +Notes +----- +This format is not available on Pypy. + +""" + +import numpy as np + +from ..core import Format + + +class NpzFormat(Format): + """See :mod:`imageio.plugins.npz`""" + + def _can_read(self, request): + # We support any kind of image data + return request.extension in self.extensions + + def _can_write(self, request): + # We support any kind of image data + return request.extension in self.extensions + + # -- reader + + class Reader(Format.Reader): + def _open(self): + # Load npz file, which provides another file like object + self._npz = np.load(self.request.get_file()) + assert isinstance(self._npz, np.lib.npyio.NpzFile) + # Get list of names, ordered by name, but smarter + self._names = sorted(self._npz.files, key=lambda x: x.split("_")[-1]) + + def _close(self): + self._npz.close() + + def _get_length(self): + return len(self._names) + + def _get_data(self, index): + # Get data + if index < 0 or index >= len(self._names): + raise IndexError("Index out of range while reading from nzp") + im = self._npz[self._names[index]] + # Return array and empty meta data + return im, {} + + def _get_meta_data(self, index): + # Get the meta data for the given index + raise RuntimeError("The npz format does not support meta data.") + + # -- writer + + class Writer(Format.Writer): + def _open(self): + # Npz is not such a great format. We cannot stream to the file. + # So we remember all images and write them to file at the end. + self._images = [] + + def _close(self): + # Write everything + np.savez_compressed(self.request.get_file(), *self._images) + + def _append_data(self, im, meta): + self._images.append(im) # discart meta data + + def set_meta_data(self, meta): + raise RuntimeError("The npz format does not support meta data.") diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/opencv.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/opencv.py new file mode 100644 index 0000000000000000000000000000000000000000..944a75776b8b091405da342703ab76b26677c0e9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/opencv.py @@ -0,0 +1,313 @@ +"""Read/Write images using OpenCV. + +Backend Library: `OpenCV `_ + +This plugin wraps OpenCV (also known as ``cv2``), a popular image processing +library. Currently, it exposes OpenCVs image reading capability (no video or GIF +support yet); however, this may be added in future releases. + +Methods +------- +.. note:: + Check the respective function for a list of supported kwargs and their + documentation. + +.. autosummary:: + :toctree: + + OpenCVPlugin.read + OpenCVPlugin.iter + OpenCVPlugin.write + OpenCVPlugin.properties + OpenCVPlugin.metadata + +Pixel Formats (Colorspaces) +--------------------------- + +OpenCV is known to process images in BGR; however, most of the python ecosystem +(in particular matplotlib and other pydata libraries) use the RGB. As such, +images are converted to RGB, RGBA, or grayscale (where applicable) by default. + +""" + +import warnings +from pathlib import Path +from typing import Any, Dict, List, Optional, Union + +import cv2 +import numpy as np + +from ..core import Request +from ..core.request import URI_BYTES, InitializationError, IOMode +from ..core.v3_plugin_api import ImageProperties, PluginV3 +from ..typing import ArrayLike + + +class OpenCVPlugin(PluginV3): + def __init__(self, request: Request) -> None: + super().__init__(request) + + self.file_handle = request.get_local_filename() + if request._uri_type is URI_BYTES: + self.filename = "" + else: + self.filename = request.raw_uri + + mode = request.mode.io_mode + if mode == IOMode.read and not cv2.haveImageReader(self.file_handle): + raise InitializationError(f"OpenCV can't read `{self.filename}`.") + elif mode == IOMode.write and not cv2.haveImageWriter(self.file_handle): + raise InitializationError(f"OpenCV can't write to `{self.filename}`.") + + def read( + self, + *, + index: int = None, + colorspace: Union[int, str] = None, + flags: int = cv2.IMREAD_COLOR, + ) -> np.ndarray: + """Read an image from the ImageResource. + + Parameters + ---------- + index : int, Ellipsis + If int, read the index-th image from the ImageResource. If ``...``, + read all images from the ImageResource and stack them along a new, + prepended, batch dimension. If None (default), use ``index=0`` if + the image contains exactly one image and ``index=...`` otherwise. + colorspace : str, int + The colorspace to convert into after loading and before returning + the image. If None (default) keep grayscale images as is, convert + images with an alpha channel to ``RGBA`` and all other images to + ``RGB``. If int, interpret ``colorspace`` as one of OpenCVs + `conversion flags + `_ + and use it for conversion. If str, convert the image into the given + colorspace. Possible string values are: ``"RGB"``, ``"BGR"``, + ``"RGBA"``, ``"BGRA"``, ``"GRAY"``, ``"HSV"``, or ``"LAB"``. + flags : int + The OpenCV flag(s) to pass to the reader. Refer to the `OpenCV docs + `_ + for details. + + Returns + ------- + ndimage : np.ndarray + The decoded image as a numpy array. + + """ + + if index is None: + n_images = cv2.imcount(self.file_handle, flags) + index = 0 if n_images == 1 else ... + + if index is ...: + retval, img = cv2.imreadmulti(self.file_handle, flags=flags) + is_batch = True + else: + retval, img = cv2.imreadmulti(self.file_handle, index, 1, flags=flags) + is_batch = False + + if retval is False: + raise ValueError(f"Could not read index `{index}` from `{self.filename}`.") + + if img[0].ndim == 2: + in_colorspace = "GRAY" + out_colorspace = colorspace or "GRAY" + elif img[0].shape[-1] == 4: + in_colorspace = "BGRA" + out_colorspace = colorspace or "RGBA" + else: + in_colorspace = "BGR" + out_colorspace = colorspace or "RGB" + + if isinstance(colorspace, int): + cvt_space = colorspace + elif in_colorspace == out_colorspace.upper(): + cvt_space = None + else: + out_colorspace = out_colorspace.upper() + cvt_space = getattr(cv2, f"COLOR_{in_colorspace}2{out_colorspace}") + + if cvt_space is not None: + img = np.stack([cv2.cvtColor(x, cvt_space) for x in img]) + else: + img = np.stack(img) + + return img if is_batch else img[0] + + def iter( + self, + colorspace: Union[int, str] = None, + flags: int = cv2.IMREAD_COLOR, + ) -> np.ndarray: + """Yield images from the ImageResource. + + Parameters + ---------- + colorspace : str, int + The colorspace to convert into after loading and before returning + the image. If None (default) keep grayscale images as is, convert + images with an alpha channel to ``RGBA`` and all other images to + ``RGB``. If int, interpret ``colorspace`` as one of OpenCVs + `conversion flags + `_ + and use it for conversion. If str, convert the image into the given + colorspace. Possible string values are: ``"RGB"``, ``"BGR"``, + ``"RGBA"``, ``"BGRA"``, ``"GRAY"``, ``"HSV"``, or ``"LAB"``. + flags : int + The OpenCV flag(s) to pass to the reader. Refer to the `OpenCV docs + `_ + for details. + + Yields + ------ + ndimage : np.ndarray + The decoded image as a numpy array. + + """ + for idx in range(cv2.imcount(self.file_handle)): + yield self.read(index=idx, flags=flags, colorspace=colorspace) + + def write( + self, + ndimage: Union[ArrayLike, List[ArrayLike]], + is_batch: bool = False, + params: List[int] = None, + ) -> Optional[bytes]: + """Save an ndimage in the ImageResource. + + Parameters + ---------- + ndimage : ArrayLike, List[ArrayLike] + The image data that will be written to the file. It is either a + single image, a batch of images, or a list of images. + is_batch : bool + If True, the provided ndimage is a batch of images. If False (default), the + provided ndimage is a single image. If the provided ndimage is a list of images, + this parameter has no effect. + params : List[int] + A list of parameters that will be passed to OpenCVs imwrite or + imwritemulti functions. Possible values are documented in the + `OpenCV documentation + `_. + + Returns + ------- + encoded_image : bytes, None + If the ImageResource is ``""`` the call to write returns the + encoded image as a bytes string. Otherwise it returns None. + + """ + + if isinstance(ndimage, list): + ndimage = np.stack(ndimage, axis=0) + elif not is_batch: + ndimage = ndimage[None, ...] + + if ndimage[0].ndim == 2: + n_channels = 1 + else: + n_channels = ndimage[0].shape[-1] + + if n_channels == 1: + ndimage_cv2 = [x for x in ndimage] + elif n_channels == 4: + ndimage_cv2 = [cv2.cvtColor(x, cv2.COLOR_RGBA2BGRA) for x in ndimage] + else: + ndimage_cv2 = [cv2.cvtColor(x, cv2.COLOR_RGB2BGR) for x in ndimage] + + retval = cv2.imwritemulti(self.file_handle, ndimage_cv2, params) + + if retval is False: + # not sure what scenario would trigger this, but + # it can occur theoretically. + raise IOError("OpenCV failed to write.") # pragma: no cover + + if self.request._uri_type == URI_BYTES: + return Path(self.file_handle).read_bytes() + + def properties( + self, + index: int = None, + colorspace: Union[int, str] = None, + flags: int = cv2.IMREAD_COLOR, + ) -> ImageProperties: + """Standardized image metadata. + + Parameters + ---------- + index : int, Ellipsis + If int, get the properties of the index-th image in the + ImageResource. If ``...``, get the properties of the image stack + that contains all images. If None (default), use ``index=0`` if the + image contains exactly one image and ``index=...`` otherwise. + colorspace : str, int + The colorspace to convert into after loading and before returning + the image. If None (default) keep grayscale images as is, convert + images with an alpha channel to ``RGBA`` and all other images to + ``RGB``. If int, interpret ``colorspace`` as one of OpenCVs + `conversion flags + `_ + and use it for conversion. If str, convert the image into the given + colorspace. Possible string values are: ``"RGB"``, ``"BGR"``, + ``"RGBA"``, ``"BGRA"``, ``"GRAY"``, ``"HSV"``, or ``"LAB"``. + flags : int + The OpenCV flag(s) to pass to the reader. Refer to the `OpenCV docs + `_ + for details. + + Returns + ------- + props : ImageProperties + A dataclass filled with standardized image metadata. + + Notes + ----- + Reading properties with OpenCV involves decoding pixel data, because + OpenCV doesn't provide a direct way to access metadata. + + """ + + if index is None: + n_images = cv2.imcount(self.file_handle, flags) + is_batch = n_images > 1 + elif index is Ellipsis: + n_images = cv2.imcount(self.file_handle, flags) + is_batch = True + else: + is_batch = False + + # unfortunately, OpenCV doesn't allow reading shape without reading pixel data + if is_batch: + img = self.read(index=0, flags=flags, colorspace=colorspace) + return ImageProperties( + shape=(n_images, *img.shape), + dtype=img.dtype, + n_images=n_images, + is_batch=True, + ) + + img = self.read(index=index, flags=flags, colorspace=colorspace) + return ImageProperties(shape=img.shape, dtype=img.dtype, is_batch=False) + + def metadata( + self, index: int = None, exclude_applied: bool = True + ) -> Dict[str, Any]: + """Format-specific metadata. + + .. warning:: + OpenCV does not support reading metadata. When called, this function + will raise a ``NotImplementedError``. + + Parameters + ---------- + index : int + This parameter has no effect. + exclude_applied : bool + This parameter has no effect. + + """ + + warnings.warn("OpenCV does not support reading metadata.", UserWarning) + return dict() diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/pillow.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/pillow.py new file mode 100644 index 0000000000000000000000000000000000000000..83d534fddc5b7a4c57b63a3942929a45b63b36b0 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/pillow.py @@ -0,0 +1,614 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +"""Read/Write images using Pillow/PIL. + +Backend Library: `Pillow `_ + +Plugin that wraps the the Pillow library. Pillow is a friendly fork of PIL +(Python Image Library) and supports reading and writing of common formats (jpg, +png, gif, tiff, ...). For, the complete list of features and supported formats +please refer to pillows official docs (see the Backend Library link). + +Parameters +---------- +request : Request + A request object representing the resource to be operated on. + +Methods +------- + +.. autosummary:: + :toctree: _plugins/pillow + + PillowPlugin.read + PillowPlugin.write + PillowPlugin.iter + PillowPlugin.get_meta + +""" + +import sys +import warnings +from io import BytesIO +from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union, cast + +import numpy as np +from PIL import ExifTags, GifImagePlugin, Image, ImageSequence, UnidentifiedImageError +from PIL import __version__ as pil_version # type: ignore + +from ..core.request import URI_BYTES, InitializationError, IOMode, Request +from ..core.v3_plugin_api import ImageProperties, PluginV3 +from ..typing import ArrayLike + + +def pillow_version() -> Tuple[int]: + return tuple(int(x) for x in pil_version.split(".")) + + +def _exif_orientation_transform(orientation: int, mode: str) -> Callable: + # get transformation that transforms an image from a + # given EXIF orientation into the standard orientation + + # -1 if the mode has color channel, 0 otherwise + axis = -2 if Image.getmodebands(mode) > 1 else -1 + + EXIF_ORIENTATION = { + 1: lambda x: x, + 2: lambda x: np.flip(x, axis=axis), + 3: lambda x: np.rot90(x, k=2), + 4: lambda x: np.flip(x, axis=axis - 1), + 5: lambda x: np.flip(np.rot90(x, k=3), axis=axis), + 6: lambda x: np.rot90(x, k=3), + 7: lambda x: np.flip(np.rot90(x, k=1), axis=axis), + 8: lambda x: np.rot90(x, k=1), + } + + # Some buggy/legacy software may not write the correct orientation (i.e. 0) + # No transformation if orientation is unknown or missing + return EXIF_ORIENTATION.get(orientation, lambda x: x) + + +class PillowPlugin(PluginV3): + def __init__(self, request: Request) -> None: + """Instantiate a new Pillow Plugin Object + + Parameters + ---------- + request : {Request} + A request object representing the resource to be operated on. + + """ + + super().__init__(request) + + # Register HEIF opener for Pillow + try: + from pillow_heif import register_heif_opener + except ImportError: + pass + else: + register_heif_opener() + + # Register AVIF opener for Pillow + try: + from pillow_heif import register_avif_opener + except ImportError: + pass + else: + register_avif_opener() + + self._image: Image = None + self.images_to_write = [] + + if request.mode.io_mode == IOMode.read: + try: + # Check if it is generally possible to read the image. + # This will not read any data and merely try to find a + # compatible pillow plugin (ref: the pillow docs). + image = Image.open(request.get_file()) + except UnidentifiedImageError: + if request._uri_type == URI_BYTES: + raise InitializationError( + "Pillow can not read the provided bytes." + ) from None + else: + raise InitializationError( + f"Pillow can not read {request.raw_uri}." + ) from None + + self._image = image + else: + self.save_args = {} + + extension = self.request.extension or self.request.format_hint + if extension is None: + warnings.warn( + "Can't determine file format to write as. You _must_" + " set `format` during write or the call will fail. Use " + "`extension` to supress this warning. ", + UserWarning, + ) + return + + tirage = [Image.preinit, Image.init] + for format_loader in tirage: + format_loader() + if extension in Image.registered_extensions().keys(): + return + + raise InitializationError( + f"Pillow can not write `{extension}` files." + ) from None + + def close(self) -> None: + self._flush_writer() + + if self._image: + self._image.close() + + self._request.finish() + + def read( + self, + *, + index: int = None, + mode: str = None, + rotate: bool = False, + apply_gamma: bool = False, + writeable_output: bool = True, + pilmode: str = None, + exifrotate: bool = None, + as_gray: bool = None, + ) -> np.ndarray: + """ + Parses the given URI and creates a ndarray from it. + + Parameters + ---------- + index : int + If the ImageResource contains multiple ndimages, and index is an + integer, select the index-th ndimage from among them and return it. + If index is an ellipsis (...), read all ndimages in the file and + stack them along a new batch dimension and return them. If index is + None, this plugin reads the first image of the file (index=0) unless + the image is a GIF or APNG, in which case all images are read + (index=...). + mode : str + Convert the image to the given mode before returning it. If None, + the mode will be left unchanged. Possible modes can be found at: + https://pillow.readthedocs.io/en/stable/handbook/concepts.html#modes + rotate : bool + If True and the image contains an EXIF orientation tag, + apply the orientation before returning the ndimage. + apply_gamma : bool + If True and the image contains metadata about gamma, apply gamma + correction to the image. + writable_output : bool + If True, ensure that the image is writable before returning it to + the user. This incurs a full copy of the pixel data if the data + served by pillow is read-only. Consequentially, setting this flag to + False improves performance for some images. + pilmode : str + Deprecated, use `mode` instead. + exifrotate : bool + Deprecated, use `rotate` instead. + as_gray : bool + Deprecated. Exists to raise a constructive error message. + + Returns + ------- + ndimage : ndarray + A numpy array containing the loaded image data + + Notes + ----- + If you read a paletted image (e.g. GIF) then the plugin will apply the + palette by default. Should you wish to read the palette indices of each + pixel use ``mode="P"``. The coresponding color pallete can be found in + the image's metadata using the ``palette`` key when metadata is + extracted using the ``exclude_applied=False`` kwarg. The latter is + needed, as palettes are applied by default and hence excluded by default + to keep metadata and pixel data consistent. + + """ + + if pilmode is not None: + warnings.warn( + "`pilmode` is deprecated. Use `mode` instead.", DeprecationWarning + ) + mode = pilmode + + if exifrotate is not None: + warnings.warn( + "`exifrotate` is deprecated. Use `rotate` instead.", DeprecationWarning + ) + rotate = exifrotate + + if as_gray is not None: + raise TypeError( + "The keyword `as_gray` is no longer supported." + "Use `mode='F'` for a backward-compatible result, or " + " `mode='L'` for an integer-valued result." + ) + + if self._image.format == "GIF": + # Converting GIF P frames to RGB + # https://github.com/python-pillow/Pillow/pull/6150 + GifImagePlugin.LOADING_STRATEGY = ( + GifImagePlugin.LoadingStrategy.RGB_AFTER_DIFFERENT_PALETTE_ONLY + ) + + if index is None: + if self._image.format == "GIF": + index = Ellipsis + elif self._image.custom_mimetype == "image/apng": + index = Ellipsis + else: + index = 0 + + if isinstance(index, int): + # will raise IO error if index >= number of frames in image + self._image.seek(index) + image = self._apply_transforms( + self._image, mode, rotate, apply_gamma, writeable_output + ) + else: + iterator = self.iter( + mode=mode, + rotate=rotate, + apply_gamma=apply_gamma, + writeable_output=writeable_output, + ) + image = np.stack([im for im in iterator], axis=0) + + return image + + def iter( + self, + *, + mode: str = None, + rotate: bool = False, + apply_gamma: bool = False, + writeable_output: bool = True, + ) -> Iterator[np.ndarray]: + """ + Iterate over all ndimages/frames in the URI + + Parameters + ---------- + mode : {str, None} + Convert the image to the given mode before returning it. If None, + the mode will be left unchanged. Possible modes can be found at: + https://pillow.readthedocs.io/en/stable/handbook/concepts.html#modes + rotate : {bool} + If set to ``True`` and the image contains an EXIF orientation tag, + apply the orientation before returning the ndimage. + apply_gamma : {bool} + If ``True`` and the image contains metadata about gamma, apply gamma + correction to the image. + writable_output : bool + If True, ensure that the image is writable before returning it to + the user. This incurs a full copy of the pixel data if the data + served by pillow is read-only. Consequentially, setting this flag to + False improves performance for some images. + """ + + for im in ImageSequence.Iterator(self._image): + yield self._apply_transforms( + im, mode, rotate, apply_gamma, writeable_output + ) + + def _apply_transforms( + self, image, mode, rotate, apply_gamma, writeable_output + ) -> np.ndarray: + if mode is not None: + image = image.convert(mode) + elif image.mode == "P": + # adjust for pillow9 changes + # see: https://github.com/python-pillow/Pillow/issues/5929 + image = image.convert(image.palette.mode) + elif image.format == "PNG" and image.mode == "I": + major, minor, patch = pillow_version() + + if sys.byteorder == "little": + desired_mode = "I;16" + else: # pragma: no cover + # can't test big-endian in GH-Actions + desired_mode = "I;16B" + + if major < 10: # pragma: no cover + warnings.warn( + "Loading 16-bit (uint16) PNG as int32 due to limitations " + "in pillow's PNG decoder. This will be fixed in a future " + "version of pillow which will make this warning dissapear.", + UserWarning, + ) + elif minor < 1: # pragma: no cover + # pillow<10.1.0 can directly decode into 16-bit grayscale + image.mode = desired_mode + else: + # pillow >= 10.1.0 + image = image.convert(desired_mode) + + image = np.asarray(image) + + meta = self.metadata(index=self._image.tell(), exclude_applied=False) + if rotate and "Orientation" in meta: + transformation = _exif_orientation_transform( + meta["Orientation"], self._image.mode + ) + image = transformation(image) + + if apply_gamma and "gamma" in meta: + gamma = float(meta["gamma"]) + scale = float(65536 if image.dtype == np.uint16 else 255) + gain = 1.0 + image = ((image / scale) ** gamma) * scale * gain + 0.4999 + image = np.round(image).astype(np.uint8) + + if writeable_output and not image.flags["WRITEABLE"]: + image = np.array(image) + + return image + + def write( + self, + ndimage: Union[ArrayLike, List[ArrayLike]], + *, + mode: str = None, + format: str = None, + is_batch: bool = None, + **kwargs, + ) -> Optional[bytes]: + """ + Write an ndimage to the URI specified in path. + + If the URI points to a file on the current host and the file does not + yet exist it will be created. If the file exists already, it will be + appended if possible; otherwise, it will be replaced. + + If necessary, the image is broken down along the leading dimension to + fit into individual frames of the chosen format. If the format doesn't + support multiple frames, and IOError is raised. + + Parameters + ---------- + image : ndarray or list + The ndimage to write. If a list is given each element is expected to + be an ndimage. + mode : str + Specify the image's color format. If None (default), the mode is + inferred from the array's shape and dtype. Possible modes can be + found at: + https://pillow.readthedocs.io/en/stable/handbook/concepts.html#modes + format : str + Optional format override. If omitted, the format to use is + determined from the filename extension. If a file object was used + instead of a filename, this parameter must always be used. + is_batch : bool + Explicitly tell the writer that ``image`` is a batch of images + (True) or not (False). If None, the writer will guess this from the + provided ``mode`` or ``image.shape``. While the latter often works, + it may cause problems for small images due to aliasing of spatial + and color-channel axes. + kwargs : ... + Extra arguments to pass to pillow. If a writer doesn't recognise an + option, it is silently ignored. The available options are described + in pillow's `image format documentation + `_ + for each writer. + + Notes + ----- + When writing batches of very narrow (2-4 pixels wide) gray images set + the ``mode`` explicitly to avoid the batch being identified as a colored + image. + + """ + if "fps" in kwargs: + warnings.warn( + "The keyword `fps` is no longer supported. Use `duration`" + "(in ms) instead, e.g. `fps=50` == `duration=20` (1000 * 1/50).", + DeprecationWarning, + ) + kwargs["duration"] = 1000 * 1 / kwargs.get("fps") + + if isinstance(ndimage, list): + ndimage = np.stack(ndimage, axis=0) + is_batch = True + else: + ndimage = np.asarray(ndimage) + + # check if ndimage is a batch of frames/pages (e.g. for writing GIF) + # if mode is given, use it; otherwise fall back to image.ndim only + if is_batch is not None: + pass + elif mode is not None: + is_batch = ( + ndimage.ndim > 3 if Image.getmodebands(mode) > 1 else ndimage.ndim > 2 + ) + elif ndimage.ndim == 2: + is_batch = False + elif ndimage.ndim == 3 and ndimage.shape[-1] == 1: + raise ValueError("Can't write images with one color channel.") + elif ndimage.ndim == 3 and ndimage.shape[-1] in [2, 3, 4]: + # Note: this makes a channel-last assumption + is_batch = False + else: + is_batch = True + + if not is_batch: + ndimage = ndimage[None, ...] + + for frame in ndimage: + pil_frame = Image.fromarray(frame, mode=mode) + if "bits" in kwargs: + pil_frame = pil_frame.quantize(colors=2 ** kwargs["bits"]) + self.images_to_write.append(pil_frame) + + if ( + format is not None + and "format" in self.save_args + and self.save_args["format"] != format + ): + old_format = self.save_args["format"] + warnings.warn( + "Changing the output format during incremental" + " writes is strongly discouraged." + f" Was `{old_format}`, is now `{format}`.", + UserWarning, + ) + + extension = self.request.extension or self.request.format_hint + self.save_args["format"] = format or Image.registered_extensions()[extension] + self.save_args.update(kwargs) + + # when writing to `bytes` we flush instantly + result = None + if self._request._uri_type == URI_BYTES: + self._flush_writer() + file = cast(BytesIO, self._request.get_file()) + result = file.getvalue() + + return result + + def _flush_writer(self): + if len(self.images_to_write) == 0: + return + + primary_image = self.images_to_write.pop(0) + + if len(self.images_to_write) > 0: + self.save_args["save_all"] = True + self.save_args["append_images"] = self.images_to_write + + primary_image.save(self._request.get_file(), **self.save_args) + self.images_to_write.clear() + self.save_args.clear() + + def get_meta(self, *, index=0) -> Dict[str, Any]: + return self.metadata(index=index, exclude_applied=False) + + def metadata( + self, index: int = None, exclude_applied: bool = True + ) -> Dict[str, Any]: + """Read ndimage metadata. + + Parameters + ---------- + index : {integer, None} + If the ImageResource contains multiple ndimages, and index is an + integer, select the index-th ndimage from among them and return its + metadata. If index is an ellipsis (...), read and return global + metadata. If index is None, this plugin reads metadata from the + first image of the file (index=0) unless the image is a GIF or APNG, + in which case global metadata is read (index=...). + exclude_applied : bool + If True, exclude metadata fields that are applied to the image while + reading. For example, if the binary data contains a rotation flag, + the image is rotated by default and the rotation flag is excluded + from the metadata to avoid confusion. + + Returns + ------- + metadata : dict + A dictionary of format-specific metadata. + + """ + + if index is None: + if self._image.format == "GIF": + index = Ellipsis + elif self._image.custom_mimetype == "image/apng": + index = Ellipsis + else: + index = 0 + + if isinstance(index, int) and self._image.tell() != index: + self._image.seek(index) + + metadata = self._image.info.copy() + metadata["mode"] = self._image.mode + metadata["shape"] = self._image.size + + if self._image.mode == "P" and not exclude_applied: + metadata["palette"] = np.asarray(tuple(self._image.palette.colors.keys())) + + if self._image.getexif(): + exif_data = { + ExifTags.TAGS.get(key, "unknown"): value + for key, value in dict(self._image.getexif()).items() + } + exif_data.pop("unknown", None) + metadata.update(exif_data) + + if exclude_applied: + metadata.pop("Orientation", None) + + return metadata + + def properties(self, index: int = None) -> ImageProperties: + """Standardized ndimage metadata + Parameters + ---------- + index : int + If the ImageResource contains multiple ndimages, and index is an + integer, select the index-th ndimage from among them and return its + properties. If index is an ellipsis (...), read and return the + properties of all ndimages in the file stacked along a new batch + dimension. If index is None, this plugin reads and returns the + properties of the first image (index=0) unless the image is a GIF or + APNG, in which case it reads and returns the properties all images + (index=...). + + Returns + ------- + properties : ImageProperties + A dataclass filled with standardized image metadata. + + Notes + ----- + This does not decode pixel data and is fast for large images. + + """ + + if index is None: + if self._image.format == "GIF": + index = Ellipsis + elif self._image.custom_mimetype == "image/apng": + index = Ellipsis + else: + index = 0 + + if index is Ellipsis: + self._image.seek(0) + else: + self._image.seek(index) + + if self._image.mode == "P": + # mode of palette images is determined by their palette + mode = self._image.palette.mode + else: + mode = self._image.mode + + width: int = self._image.width + height: int = self._image.height + shape: Tuple[int, ...] = (height, width) + + n_frames: Optional[int] = None + if index is ...: + n_frames = getattr(self._image, "n_frames", 1) + shape = (n_frames, *shape) + + dummy = np.asarray(Image.new(mode, (1, 1))) + pil_shape: Tuple[int, ...] = dummy.shape + if len(pil_shape) > 2: + shape = (*shape, *pil_shape[2:]) + + return ImageProperties( + shape=shape, + dtype=dummy.dtype, + n_images=n_frames, + is_batch=index is Ellipsis, + ) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/pillow_info.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/pillow_info.py new file mode 100644 index 0000000000000000000000000000000000000000..59b971ce792cca172764da7f2faf8f0654547643 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/pillow_info.py @@ -0,0 +1,1053 @@ +# -*- coding: utf-8 -*- + +# styletest: ignore E122 E123 E501 + +""" +Module that contain info about the Pillow formats. The first part of +this module generates this info and writes it to its own bottom half +if run as a script. +""" + +import warnings + +warnings.warn( + "The `PillowFormat` plugin is deprecated and will be removed in ImageIO v3." + " Use the new `PillowPlugin` instead.", + DeprecationWarning, +) + + +def generate_info(): # pragma: no cover + from urllib.request import urlopen + import PIL + from PIL import Image + + Image.init() + + ids = [] + formats = [] + docs = {} + + # Collect formats and their summary from plugin modules + for mod_name in dir(PIL): + if "ImagePlugin" in mod_name: + mod = getattr(PIL, mod_name) + for ob_name in dir(mod): + ob = getattr(mod, ob_name) + if isinstance(ob, type) and issubclass(ob, Image.Image): + if ob.format in ids: + print("Found duplicate for", ob.format) + else: + ids.append(ob.format) + formats.append((ob.format, ob.format_description)) + + # Add extension info + for i in range(len(formats)): + id, summary = formats[i] + ext = " ".join([e for e in Image.EXTENSION if Image.EXTENSION[e] == id]) + formats[i] = id, summary, ext + + # Get documentation of formats + url = "https://raw.githubusercontent.com/python-pillow/Pillow/master/docs/handbook/image-file-formats.rst" # noqa + lines = urlopen(url).read().decode().splitlines() + lines.append("End") + lines.append("---") # for the end + + # Parse documentation + cur_name = "" + cur_part = [] + for i in range(len(lines)): + line = lines[i] + if line.startswith(("^^^", "---", "===")): + if cur_name and cur_name in ids: + text = "\n".join(cur_part[:-1]) + text = text.replace("versionadded::", "versionadded:: Pillow ") + text = text.replace("Image.open`", "Image.write`") + docs[cur_name] = text + cur_part = [] + cur_name = lines[i - 1].strip().replace(" ", "").upper() + else: + cur_part.append(" " + line) + + # Fill in the blancs + for id in ids: + if id in docs: + docs[id] = "*From the Pillow docs:*\n\n" + docs[id] + else: + docs[id] = "No docs for %s." % id + print("no docs for", id) + + # Sort before writing + formats.sort(key=lambda x: x[0]) + ids.sort() + + # Read file ... + code = open(__file__, "rb").read().decode() + code, divider, _ = code.partition("## BELOW IS " + "AUTOGENERATED") + code += divider + "\n\n" + + # Write formats + code += "pillow_formats = [\n" + for i in range(len(formats)): + print(formats[i]) + code += " (%r, %r, %r),\n" % formats[i] + code += " ]\n\n\n" + + # Write docs + code += "pillow_docs = {\n" + for id in ids: + code += '%r:\nu"""%s""",\n' % (id, docs[id]) + code += "}\n" + + # Write back + with open(__file__, "wb") as f: + f.write(code.encode()) + + +if __name__ == "__main__": + generate_info() + + +# BELOW IS AUTOGENERATED + +pillow_formats = [ + ("BMP", "Windows Bitmap", ".bmp"), + ("BUFR", "BUFR", ".bufr"), + ("CUR", "Windows Cursor", ".cur"), + ("DCX", "Intel DCX", ".dcx"), + ("DDS", "DirectDraw Surface", ".dds"), + ("DIB", "Windows Bitmap", ""), + ("EPS", "Encapsulated Postscript", ".ps .eps"), + ("FITS", "FITS", ".fit .fits"), + ("FLI", "Autodesk FLI/FLC Animation", ".fli .flc"), + ("FPX", "FlashPix", ".fpx"), + ("FTEX", "Texture File Format (IW2:EOC)", ".ftc .ftu"), + ("GBR", "GIMP brush file", ".gbr"), + ("GIF", "Compuserve GIF", ".gif"), + ("GRIB", "GRIB", ".grib"), + ("HDF5", "HDF5", ".h5 .hdf"), + ("ICNS", "Mac OS icns resource", ".icns"), + ("ICO", "Windows Icon", ".ico"), + ("IM", "IFUNC Image Memory", ".im"), + ("IMT", "IM Tools", ""), + ("IPTC", "IPTC/NAA", ".iim"), + ("JPEG", "JPEG (ISO 10918)", ".jfif .jpe .jpg .jpeg"), + ("JPEG2000", "JPEG 2000 (ISO 15444)", ".jp2 .j2k .jpc .jpf .jpx .j2c"), + ("MCIDAS", "McIdas area file", ""), + ("MIC", "Microsoft Image Composer", ".mic"), + ("MPEG", "MPEG", ".mpg .mpeg"), + ("MPO", "MPO (CIPA DC-007)", ".mpo"), + ("MSP", "Windows Paint", ".msp"), + ("PCD", "Kodak PhotoCD", ".pcd"), + ("PCX", "Paintbrush", ".pcx"), + ("PIXAR", "PIXAR raster image", ".pxr"), + ("PNG", "Portable network graphics", ".png"), + ("PPM", "Pbmplus image", ".pbm .pgm .ppm"), + ("PSD", "Adobe Photoshop", ".psd"), + ("SGI", "SGI Image File Format", ".bw .rgb .rgba .sgi"), + ("SPIDER", "Spider 2D image", ""), + ("SUN", "Sun Raster File", ".ras"), + ("TGA", "Targa", ".tga"), + ("TIFF", "Adobe TIFF", ".tif .tiff"), + ("WMF", "Windows Metafile", ".wmf .emf"), + ("XBM", "X11 Bitmap", ".xbm"), + ("XPM", "X11 Pixel Map", ".xpm"), + ("XVThumb", "XV thumbnail image", ""), +] + + +pillow_docs = { + "BMP": """*From the Pillow docs:* + + + PIL reads and writes Windows and OS/2 BMP files containing ``1``, ``L``, ``P``, + or ``RGB`` data. 16-colour images are read as ``P`` images. Run-length encoding + is not supported. + + The :py:meth:`~PIL.Image.Image.write` method sets the following + :py:attr:`~PIL.Image.Image.info` properties: + + **compression** + Set to ``bmp_rle`` if the file is run-length encoded. + """, + "BUFR": """*From the Pillow docs:* + + + .. versionadded:: Pillow 1.1.3 + + PIL provides a stub driver for BUFR files. + + To add read or write support to your application, use + :py:func:`PIL.BufrStubImagePlugin.register_handler`. + """, + "CUR": """*From the Pillow docs:* + + + CUR is used to store cursors on Windows. The CUR decoder reads the largest + available cursor. Animated cursors are not supported. + """, + "DCX": """*From the Pillow docs:* + + + DCX is a container file format for PCX files, defined by Intel. The DCX format + is commonly used in fax applications. The DCX decoder can read files containing + ``1``, ``L``, ``P``, or ``RGB`` data. + + When the file is opened, only the first image is read. You can use + :py:meth:`~file.seek` or :py:mod:`~PIL.ImageSequence` to read other images. + + """, + "DDS": """*From the Pillow docs:* + + + DDS is a popular container texture format used in video games and natively + supported by DirectX. + Currently, DXT1, DXT3, and DXT5 pixel formats are supported and only in ``RGBA`` + mode. + + .. versionadded:: Pillow 3.4.0 DXT3 + """, + "DIB": """No docs for DIB.""", + "EPS": """*From the Pillow docs:* + + + PIL identifies EPS files containing image data, and can read files that contain + embedded raster images (ImageData descriptors). If Ghostscript is available, + other EPS files can be read as well. The EPS driver can also write EPS + images. The EPS driver can read EPS images in ``L``, ``LAB``, ``RGB`` and + ``CMYK`` mode, but Ghostscript may convert the images to ``RGB`` mode rather + than leaving them in the original color space. The EPS driver can write images + in ``L``, ``RGB`` and ``CMYK`` modes. + + If Ghostscript is available, you can call the :py:meth:`~PIL.Image.Image.load` + method with the following parameter to affect how Ghostscript renders the EPS + + **scale** + Affects the scale of the resultant rasterized image. If the EPS suggests + that the image be rendered at 100px x 100px, setting this parameter to + 2 will make the Ghostscript render a 200px x 200px image instead. The + relative position of the bounding box is maintained:: + + im = Image.open(...) + im.size #(100,100) + im.load(scale=2) + im.size #(200,200) + """, + "FITS": """*From the Pillow docs:* + + + .. versionadded:: Pillow 1.1.5 + + PIL provides a stub driver for FITS files. + + To add read or write support to your application, use + :py:func:`PIL.FitsStubImagePlugin.register_handler`. + """, + "FLI": """No docs for FLI.""", + "FPX": """*From the Pillow docs:* + + + PIL reads Kodak FlashPix files. In the current version, only the highest + resolution image is read from the file, and the viewing transform is not taken + into account. + + .. note:: + + To enable full FlashPix support, you need to build and install the IJG JPEG + library before building the Python Imaging Library. See the distribution + README for details. + """, + "FTEX": """*From the Pillow docs:* + + + .. versionadded:: Pillow 3.2.0 + + The FTEX decoder reads textures used for 3D objects in + Independence War 2: Edge Of Chaos. The plugin reads a single texture + per file, in the compressed and uncompressed formats. + """, + "GBR": """*From the Pillow docs:* + + + The GBR decoder reads GIMP brush files, version 1 and 2. + + The :py:meth:`~PIL.Image.Image.write` method sets the following + :py:attr:`~PIL.Image.Image.info` properties: + + **comment** + The brush name. + + **spacing** + The spacing between the brushes, in pixels. Version 2 only. + + GD + ^^ + + PIL reads uncompressed GD files. Note that this file format cannot be + automatically identified, so you must use :py:func:`PIL.GdImageFile.open` to + read such a file. + + The :py:meth:`~PIL.Image.Image.write` method sets the following + :py:attr:`~PIL.Image.Image.info` properties: + + **transparency** + Transparency color index. This key is omitted if the image is not + transparent. + """, + "GIF": """*From the Pillow docs:* + + + PIL reads GIF87a and GIF89a versions of the GIF file format. The library writes + run-length encoded files in GIF87a by default, unless GIF89a features + are used or GIF89a is already in use. + + Note that GIF files are always read as grayscale (``L``) + or palette mode (``P``) images. + + The :py:meth:`~PIL.Image.Image.write` method sets the following + :py:attr:`~PIL.Image.Image.info` properties: + + **background** + Default background color (a palette color index). + + **transparency** + Transparency color index. This key is omitted if the image is not + transparent. + + **version** + Version (either ``GIF87a`` or ``GIF89a``). + + **duration** + May not be present. The time to display the current frame + of the GIF, in milliseconds. + + **loop** + May not be present. The number of times the GIF should loop. + + Reading sequences + ~~~~~~~~~~~~~~~~~ + + The GIF loader supports the :py:meth:`~file.seek` and :py:meth:`~file.tell` + methods. You can seek to the next frame (``im.seek(im.tell() + 1)``), or rewind + the file by seeking to the first frame. Random access is not supported. + + ``im.seek()`` raises an ``EOFError`` if you try to seek after the last frame. + + Saving + ~~~~~~ + + When calling :py:meth:`~PIL.Image.Image.save`, the following options + are available:: + + im.save(out, save_all=True, append_images=[im1, im2, ...]) + + **save_all** + If present and true, all frames of the image will be saved. If + not, then only the first frame of a multiframe image will be saved. + + **append_images** + A list of images to append as additional frames. Each of the + images in the list can be single or multiframe images. + This is currently only supported for GIF, PDF, TIFF, and WebP. + + **duration** + The display duration of each frame of the multiframe gif, in + milliseconds. Pass a single integer for a constant duration, or a + list or tuple to set the duration for each frame separately. + + **loop** + Integer number of times the GIF should loop. + + **optimize** + If present and true, attempt to compress the palette by + eliminating unused colors. This is only useful if the palette can + be compressed to the next smaller power of 2 elements. + + **palette** + Use the specified palette for the saved image. The palette should + be a bytes or bytearray object containing the palette entries in + RGBRGB... form. It should be no more than 768 bytes. Alternately, + the palette can be passed in as an + :py:class:`PIL.ImagePalette.ImagePalette` object. + + **disposal** + Indicates the way in which the graphic is to be treated after being displayed. + + * 0 - No disposal specified. + * 1 - Do not dispose. + * 2 - Restore to background color. + * 3 - Restore to previous content. + + Pass a single integer for a constant disposal, or a list or tuple + to set the disposal for each frame separately. + + Reading local images + ~~~~~~~~~~~~~~~~~~~~ + + The GIF loader creates an image memory the same size as the GIF file’s *logical + screen size*, and pastes the actual pixel data (the *local image*) into this + image. If you only want the actual pixel rectangle, you can manipulate the + :py:attr:`~PIL.Image.Image.size` and :py:attr:`~PIL.Image.Image.tile` + attributes before loading the file:: + + im = Image.open(...) + + if im.tile[0][0] == "gif": + # only read the first "local image" from this GIF file + tag, (x0, y0, x1, y1), offset, extra = im.tile[0] + im.size = (x1 - x0, y1 - y0) + im.tile = [(tag, (0, 0) + im.size, offset, extra)] + """, + "GRIB": """*From the Pillow docs:* + + + .. versionadded:: Pillow 1.1.5 + + PIL provides a stub driver for GRIB files. + + The driver requires the file to start with a GRIB header. If you have files + with embedded GRIB data, or files with multiple GRIB fields, your application + has to seek to the header before passing the file handle to PIL. + + To add read or write support to your application, use + :py:func:`PIL.GribStubImagePlugin.register_handler`. + """, + "HDF5": """*From the Pillow docs:* + + + .. versionadded:: Pillow 1.1.5 + + PIL provides a stub driver for HDF5 files. + + To add read or write support to your application, use + :py:func:`PIL.Hdf5StubImagePlugin.register_handler`. + """, + "ICNS": """*From the Pillow docs:* + + + PIL reads and (macOS only) writes macOS ``.icns`` files. By default, the + largest available icon is read, though you can override this by setting the + :py:attr:`~PIL.Image.Image.size` property before calling + :py:meth:`~PIL.Image.Image.load`. The :py:meth:`~PIL.Image.Image.write` method + sets the following :py:attr:`~PIL.Image.Image.info` property: + + **sizes** + A list of supported sizes found in this icon file; these are a + 3-tuple, ``(width, height, scale)``, where ``scale`` is 2 for a retina + icon and 1 for a standard icon. You *are* permitted to use this 3-tuple + format for the :py:attr:`~PIL.Image.Image.size` property if you set it + before calling :py:meth:`~PIL.Image.Image.load`; after loading, the size + will be reset to a 2-tuple containing pixel dimensions (so, e.g. if you + ask for ``(512, 512, 2)``, the final value of + :py:attr:`~PIL.Image.Image.size` will be ``(1024, 1024)``). + """, + "ICO": """*From the Pillow docs:* + + + ICO is used to store icons on Windows. The largest available icon is read. + + The :py:meth:`~PIL.Image.Image.save` method supports the following options: + + **sizes** + A list of sizes including in this ico file; these are a 2-tuple, + ``(width, height)``; Default to ``[(16, 16), (24, 24), (32, 32), (48, 48), + (64, 64), (128, 128), (256, 256)]``. Any sizes bigger than the original + size or 256 will be ignored. + + IM + ^^ + + IM is a format used by LabEye and other applications based on the IFUNC image + processing library. The library reads and writes most uncompressed interchange + versions of this format. + + IM is the only format that can store all internal PIL formats. + """, + "IM": """No docs for IM.""", + "IMT": """*From the Pillow docs:* + + + PIL reads Image Tools images containing ``L`` data. + """, + "IPTC": """No docs for IPTC.""", + "JPEG": """*From the Pillow docs:* + + + PIL reads JPEG, JFIF, and Adobe JPEG files containing ``L``, ``RGB``, or + ``CMYK`` data. It writes standard and progressive JFIF files. + + Using the :py:meth:`~PIL.Image.Image.draft` method, you can speed things up by + converting ``RGB`` images to ``L``, and resize images to 1/2, 1/4 or 1/8 of + their original size while loading them. + + The :py:meth:`~PIL.Image.Image.write` method may set the following + :py:attr:`~PIL.Image.Image.info` properties if available: + + **jfif** + JFIF application marker found. If the file is not a JFIF file, this key is + not present. + + **jfif_version** + A tuple representing the jfif version, (major version, minor version). + + **jfif_density** + A tuple representing the pixel density of the image, in units specified + by jfif_unit. + + **jfif_unit** + Units for the jfif_density: + + * 0 - No Units + * 1 - Pixels per Inch + * 2 - Pixels per Centimeter + + **dpi** + A tuple representing the reported pixel density in pixels per inch, if + the file is a jfif file and the units are in inches. + + **adobe** + Adobe application marker found. If the file is not an Adobe JPEG file, this + key is not present. + + **adobe_transform** + Vendor Specific Tag. + + **progression** + Indicates that this is a progressive JPEG file. + + **icc_profile** + The ICC color profile for the image. + + **exif** + Raw EXIF data from the image. + + + The :py:meth:`~PIL.Image.Image.save` method supports the following options: + + **quality** + The image quality, on a scale from 1 (worst) to 95 (best). The default is + 75. Values above 95 should be avoided; 100 disables portions of the JPEG + compression algorithm, and results in large files with hardly any gain in + image quality. + + **optimize** + If present and true, indicates that the encoder should make an extra pass + over the image in order to select optimal encoder settings. + + **progressive** + If present and true, indicates that this image should be stored as a + progressive JPEG file. + + **dpi** + A tuple of integers representing the pixel density, ``(x,y)``. + + **icc_profile** + If present and true, the image is stored with the provided ICC profile. + If this parameter is not provided, the image will be saved with no profile + attached. To preserve the existing profile:: + + im.save(filename, 'jpeg', icc_profile=im.info.get('icc_profile')) + + **exif** + If present, the image will be stored with the provided raw EXIF data. + + **subsampling** + If present, sets the subsampling for the encoder. + + * ``keep``: Only valid for JPEG files, will retain the original image setting. + * ``4:4:4``, ``4:2:2``, ``4:2:0``: Specific sampling values + * ``-1``: equivalent to ``keep`` + * ``0``: equivalent to ``4:4:4`` + * ``1``: equivalent to ``4:2:2`` + * ``2``: equivalent to ``4:2:0`` + + **qtables** + If present, sets the qtables for the encoder. This is listed as an + advanced option for wizards in the JPEG documentation. Use with + caution. ``qtables`` can be one of several types of values: + + * a string, naming a preset, e.g. ``keep``, ``web_low``, or ``web_high`` + * a list, tuple, or dictionary (with integer keys = + range(len(keys))) of lists of 64 integers. There must be + between 2 and 4 tables. + + .. versionadded:: Pillow 2.5.0 + + + .. note:: + + To enable JPEG support, you need to build and install the IJG JPEG library + before building the Python Imaging Library. See the distribution README for + details. + """, + "JPEG2000": """*From the Pillow docs:* + + + .. versionadded:: Pillow 2.4.0 + + PIL reads and writes JPEG 2000 files containing ``L``, ``LA``, ``RGB`` or + ``RGBA`` data. It can also read files containing ``YCbCr`` data, which it + converts on read into ``RGB`` or ``RGBA`` depending on whether or not there is + an alpha channel. PIL supports JPEG 2000 raw codestreams (``.j2k`` files), as + well as boxed JPEG 2000 files (``.j2p`` or ``.jpx`` files). PIL does *not* + support files whose components have different sampling frequencies. + + When loading, if you set the ``mode`` on the image prior to the + :py:meth:`~PIL.Image.Image.load` method being invoked, you can ask PIL to + convert the image to either ``RGB`` or ``RGBA`` rather than choosing for + itself. It is also possible to set ``reduce`` to the number of resolutions to + discard (each one reduces the size of the resulting image by a factor of 2), + and ``layers`` to specify the number of quality layers to load. + + The :py:meth:`~PIL.Image.Image.save` method supports the following options: + + **offset** + The image offset, as a tuple of integers, e.g. (16, 16) + + **tile_offset** + The tile offset, again as a 2-tuple of integers. + + **tile_size** + The tile size as a 2-tuple. If not specified, or if set to None, the + image will be saved without tiling. + + **quality_mode** + Either `"rates"` or `"dB"` depending on the units you want to use to + specify image quality. + + **quality_layers** + A sequence of numbers, each of which represents either an approximate size + reduction (if quality mode is `"rates"`) or a signal to noise ratio value + in decibels. If not specified, defaults to a single layer of full quality. + + **num_resolutions** + The number of different image resolutions to be stored (which corresponds + to the number of Discrete Wavelet Transform decompositions plus one). + + **codeblock_size** + The code-block size as a 2-tuple. Minimum size is 4 x 4, maximum is 1024 x + 1024, with the additional restriction that no code-block may have more + than 4096 coefficients (i.e. the product of the two numbers must be no + greater than 4096). + + **precinct_size** + The precinct size as a 2-tuple. Must be a power of two along both axes, + and must be greater than the code-block size. + + **irreversible** + If ``True``, use the lossy Irreversible Color Transformation + followed by DWT 9-7. Defaults to ``False``, which means to use the + Reversible Color Transformation with DWT 5-3. + + **progression** + Controls the progression order; must be one of ``"LRCP"``, ``"RLCP"``, + ``"RPCL"``, ``"PCRL"``, ``"CPRL"``. The letters stand for Component, + Position, Resolution and Layer respectively and control the order of + encoding, the idea being that e.g. an image encoded using LRCP mode can + have its quality layers decoded as they arrive at the decoder, while one + encoded using RLCP mode will have increasing resolutions decoded as they + arrive, and so on. + + **cinema_mode** + Set the encoder to produce output compliant with the digital cinema + specifications. The options here are ``"no"`` (the default), + ``"cinema2k-24"`` for 24fps 2K, ``"cinema2k-48"`` for 48fps 2K, and + ``"cinema4k-24"`` for 24fps 4K. Note that for compliant 2K files, + *at least one* of your image dimensions must match 2048 x 1080, while + for compliant 4K files, *at least one* of the dimensions must match + 4096 x 2160. + + .. note:: + + To enable JPEG 2000 support, you need to build and install the OpenJPEG + library, version 2.0.0 or higher, before building the Python Imaging + Library. + + Windows users can install the OpenJPEG binaries available on the + OpenJPEG website, but must add them to their PATH in order to use PIL (if + you fail to do this, you will get errors about not being able to load the + ``_imaging`` DLL). + """, + "MCIDAS": """*From the Pillow docs:* + + + PIL identifies and reads 8-bit McIdas area files. + """, + "MIC": """*From the Pillow docs:* + + + PIL identifies and reads Microsoft Image Composer (MIC) files. When opened, the + first sprite in the file is loaded. You can use :py:meth:`~file.seek` and + :py:meth:`~file.tell` to read other sprites from the file. + + Note that there may be an embedded gamma of 2.2 in MIC files. + """, + "MPEG": """*From the Pillow docs:* + + + PIL identifies MPEG files. + """, + "MPO": """*From the Pillow docs:* + + + Pillow identifies and reads Multi Picture Object (MPO) files, loading the primary + image when first opened. The :py:meth:`~file.seek` and :py:meth:`~file.tell` + methods may be used to read other pictures from the file. The pictures are + zero-indexed and random access is supported. + """, + "MSP": """*From the Pillow docs:* + + + PIL identifies and reads MSP files from Windows 1 and 2. The library writes + uncompressed (Windows 1) versions of this format. + """, + "PCD": """*From the Pillow docs:* + + + PIL reads PhotoCD files containing ``RGB`` data. This only reads the 768x512 + resolution image from the file. Higher resolutions are encoded in a proprietary + encoding. + """, + "PCX": """*From the Pillow docs:* + + + PIL reads and writes PCX files containing ``1``, ``L``, ``P``, or ``RGB`` data. + """, + "PIXAR": """*From the Pillow docs:* + + + PIL provides limited support for PIXAR raster files. The library can identify + and read “dumped” RGB files. + + The format code is ``PIXAR``. + """, + "PNG": """*From the Pillow docs:* + + + PIL identifies, reads, and writes PNG files containing ``1``, ``L``, ``P``, + ``RGB``, or ``RGBA`` data. Interlaced files are supported as of v1.1.7. + + The :py:meth:`~PIL.Image.Image.write` method sets the following + :py:attr:`~PIL.Image.Image.info` properties, when appropriate: + + **chromaticity** + The chromaticity points, as an 8 tuple of floats. (``White Point + X``, ``White Point Y``, ``Red X``, ``Red Y``, ``Green X``, ``Green + Y``, ``Blue X``, ``Blue Y``) + + **gamma** + Gamma, given as a floating point number. + + **srgb** + The sRGB rendering intent as an integer. + + * 0 Perceptual + * 1 Relative Colorimetric + * 2 Saturation + * 3 Absolute Colorimetric + + **transparency** + For ``P`` images: Either the palette index for full transparent pixels, + or a byte string with alpha values for each palette entry. + + For ``L`` and ``RGB`` images, the color that represents full transparent + pixels in this image. + + This key is omitted if the image is not a transparent palette image. + + ``Open`` also sets ``Image.text`` to a list of the values of the + ``tEXt``, ``zTXt``, and ``iTXt`` chunks of the PNG image. Individual + compressed chunks are limited to a decompressed size of + ``PngImagePlugin.MAX_TEXT_CHUNK``, by default 1MB, to prevent + decompression bombs. Additionally, the total size of all of the text + chunks is limited to ``PngImagePlugin.MAX_TEXT_MEMORY``, defaulting to + 64MB. + + The :py:meth:`~PIL.Image.Image.save` method supports the following options: + + **optimize** + If present and true, instructs the PNG writer to make the output file as + small as possible. This includes extra processing in order to find optimal + encoder settings. + + **transparency** + For ``P``, ``L``, and ``RGB`` images, this option controls what + color image to mark as transparent. + + For ``P`` images, this can be a either the palette index, + or a byte string with alpha values for each palette entry. + + **dpi** + A tuple of two numbers corresponding to the desired dpi in each direction. + + **pnginfo** + A :py:class:`PIL.PngImagePlugin.PngInfo` instance containing text tags. + + **compress_level** + ZLIB compression level, a number between 0 and 9: 1 gives best speed, + 9 gives best compression, 0 gives no compression at all. Default is 6. + When ``optimize`` option is True ``compress_level`` has no effect + (it is set to 9 regardless of a value passed). + + **icc_profile** + The ICC Profile to include in the saved file. + + **bits (experimental)** + For ``P`` images, this option controls how many bits to store. If omitted, + the PNG writer uses 8 bits (256 colors). + + **dictionary (experimental)** + Set the ZLIB encoder dictionary. + + .. note:: + + To enable PNG support, you need to build and install the ZLIB compression + library before building the Python Imaging Library. See the installation + documentation for details. + """, + "PPM": """*From the Pillow docs:* + + + PIL reads and writes PBM, PGM and PPM files containing ``1``, ``L`` or ``RGB`` + data. + """, + "PSD": """*From the Pillow docs:* + + + PIL identifies and reads PSD files written by Adobe Photoshop 2.5 and 3.0. + + """, + "SGI": """*From the Pillow docs:* + + + Pillow reads and writes uncompressed ``L``, ``RGB``, and ``RGBA`` files. + + """, + "SPIDER": """*From the Pillow docs:* + + + PIL reads and writes SPIDER image files of 32-bit floating point data + ("F;32F"). + + PIL also reads SPIDER stack files containing sequences of SPIDER images. The + :py:meth:`~file.seek` and :py:meth:`~file.tell` methods are supported, and + random access is allowed. + + The :py:meth:`~PIL.Image.Image.write` method sets the following attributes: + + **format** + Set to ``SPIDER`` + + **istack** + Set to 1 if the file is an image stack, else 0. + + **nimages** + Set to the number of images in the stack. + + A convenience method, :py:meth:`~PIL.Image.Image.convert2byte`, is provided for + converting floating point data to byte data (mode ``L``):: + + im = Image.open('image001.spi').convert2byte() + + Writing files in SPIDER format + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + The extension of SPIDER files may be any 3 alphanumeric characters. Therefore + the output format must be specified explicitly:: + + im.save('newimage.spi', format='SPIDER') + + For more information about the SPIDER image processing package, see the + `SPIDER homepage`_ at `Wadsworth Center`_. + + .. _SPIDER homepage: https://spider.wadsworth.org/spider_doc/spider/docs/spider.html + .. _Wadsworth Center: https://www.wadsworth.org/ + """, + "SUN": """No docs for SUN.""", + "TGA": """*From the Pillow docs:* + + + PIL reads 24- and 32-bit uncompressed and run-length encoded TGA files. + """, + "TIFF": """*From the Pillow docs:* + + + Pillow reads and writes TIFF files. It can read both striped and tiled + images, pixel and plane interleaved multi-band images. If you have + libtiff and its headers installed, PIL can read and write many kinds + of compressed TIFF files. If not, PIL will only read and write + uncompressed files. + + .. note:: + + Beginning in version 5.0.0, Pillow requires libtiff to read or + write compressed files. Prior to that release, Pillow had buggy + support for reading Packbits, LZW and JPEG compressed TIFFs + without using libtiff. + + The :py:meth:`~PIL.Image.Image.write` method sets the following + :py:attr:`~PIL.Image.Image.info` properties: + + **compression** + Compression mode. + + .. versionadded:: Pillow 2.0.0 + + **dpi** + Image resolution as an ``(xdpi, ydpi)`` tuple, where applicable. You can use + the :py:attr:`~PIL.Image.Image.tag` attribute to get more detailed + information about the image resolution. + + .. versionadded:: Pillow 1.1.5 + + **resolution** + Image resolution as an ``(xres, yres)`` tuple, where applicable. This is a + measurement in whichever unit is specified by the file. + + .. versionadded:: Pillow 1.1.5 + + + The :py:attr:`~PIL.Image.Image.tag_v2` attribute contains a dictionary + of TIFF metadata. The keys are numerical indexes from + :py:attr:`~PIL.TiffTags.TAGS_V2`. Values are strings or numbers for single + items, multiple values are returned in a tuple of values. Rational + numbers are returned as a :py:class:`~PIL.TiffImagePlugin.IFDRational` + object. + + .. versionadded:: Pillow 3.0.0 + + For compatibility with legacy code, the + :py:attr:`~PIL.Image.Image.tag` attribute contains a dictionary of + decoded TIFF fields as returned prior to version 3.0.0. Values are + returned as either strings or tuples of numeric values. Rational + numbers are returned as a tuple of ``(numerator, denominator)``. + + .. deprecated:: 3.0.0 + + + Saving Tiff Images + ~~~~~~~~~~~~~~~~~~ + + The :py:meth:`~PIL.Image.Image.save` method can take the following keyword arguments: + + **save_all** + If true, Pillow will save all frames of the image to a multiframe tiff document. + + .. versionadded:: Pillow 3.4.0 + + **tiffinfo** + A :py:class:`~PIL.TiffImagePlugin.ImageFileDirectory_v2` object or dict + object containing tiff tags and values. The TIFF field type is + autodetected for Numeric and string values, any other types + require using an :py:class:`~PIL.TiffImagePlugin.ImageFileDirectory_v2` + object and setting the type in + :py:attr:`~PIL.TiffImagePlugin.ImageFileDirectory_v2.tagtype` with + the appropriate numerical value from + ``TiffTags.TYPES``. + + .. versionadded:: Pillow 2.3.0 + + Metadata values that are of the rational type should be passed in + using a :py:class:`~PIL.TiffImagePlugin.IFDRational` object. + + .. versionadded:: Pillow 3.1.0 + + For compatibility with legacy code, a + :py:class:`~PIL.TiffImagePlugin.ImageFileDirectory_v1` object may + be passed in this field. However, this is deprecated. + + .. versionadded:: Pillow 3.0.0 + + .. note:: + + Only some tags are currently supported when writing using + libtiff. The supported list is found in + :py:attr:`~PIL:TiffTags.LIBTIFF_CORE`. + + **compression** + A string containing the desired compression method for the + file. (valid only with libtiff installed) Valid compression + methods are: ``None``, ``"tiff_ccitt"``, ``"group3"``, + ``"group4"``, ``"tiff_jpeg"``, ``"tiff_adobe_deflate"``, + ``"tiff_thunderscan"``, ``"tiff_deflate"``, ``"tiff_sgilog"``, + ``"tiff_sgilog24"``, ``"tiff_raw_16"`` + + These arguments to set the tiff header fields are an alternative to + using the general tags available through tiffinfo. + + **description** + + **software** + + **date_time** + + **artist** + + **copyright** + Strings + + **resolution_unit** + A string of "inch", "centimeter" or "cm" + + **resolution** + + **x_resolution** + + **y_resolution** + + **dpi** + Either a Float, 2 tuple of (numerator, denominator) or a + :py:class:`~PIL.TiffImagePlugin.IFDRational`. Resolution implies + an equal x and y resolution, dpi also implies a unit of inches. + + """, + "WMF": """*From the Pillow docs:* + + + PIL can identify playable WMF files. + + In PIL 1.1.4 and earlier, the WMF driver provides some limited rendering + support, but not enough to be useful for any real application. + + In PIL 1.1.5 and later, the WMF driver is a stub driver. To add WMF read or + write support to your application, use + :py:func:`PIL.WmfImagePlugin.register_handler` to register a WMF handler. + + :: + + from PIL import Image + from PIL import WmfImagePlugin + + class WmfHandler: + def open(self, im): + ... + def load(self, im): + ... + return image + def save(self, im, fp, filename): + ... + + wmf_handler = WmfHandler() + + WmfImagePlugin.register_handler(wmf_handler) + + im = Image.open("sample.wmf")""", + "XBM": """*From the Pillow docs:* + + + PIL reads and writes X bitmap files (mode ``1``). + """, + "XPM": """*From the Pillow docs:* + + + PIL reads X pixmap files (mode ``P``) with 256 colors or less. + + The :py:meth:`~PIL.Image.Image.write` method sets the following + :py:attr:`~PIL.Image.Image.info` properties: + + **transparency** + Transparency color index. This key is omitted if the image is not + transparent. + """, + "XVThumb": """No docs for XVThumb.""", +} diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/pillow_legacy.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/pillow_legacy.py new file mode 100644 index 0000000000000000000000000000000000000000..9007c87b45b8aa14eef263c172fa95b3f788524a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/pillow_legacy.py @@ -0,0 +1,823 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +"""Read/Write images using pillow/PIL (legacy). + +Backend Library: `Pillow `_ + +Pillow is a friendly fork of PIL (Python Image Library) and supports +reading and writing of common formats (jpg, png, gif, tiff, ...). While +these docs provide an overview of some of its features, pillow is +constantly improving. Hence, the complete list of features can be found +in pillows official docs (see the Backend Library link). + +Parameters for Reading +---------------------- +pilmode : str + (Available for all formats except GIF-PIL) + From the Pillow documentation: + + * 'L' (8-bit pixels, grayscale) + * 'P' (8-bit pixels, mapped to any other mode using a color palette) + * 'RGB' (3x8-bit pixels, true color) + * 'RGBA' (4x8-bit pixels, true color with transparency mask) + * 'CMYK' (4x8-bit pixels, color separation) + * 'YCbCr' (3x8-bit pixels, color video format) + * 'I' (32-bit signed integer pixels) + * 'F' (32-bit floating point pixels) + + PIL also provides limited support for a few special modes, including + 'LA' ('L' with alpha), 'RGBX' (true color with padding) and 'RGBa' + (true color with premultiplied alpha). + + When translating a color image to grayscale (mode 'L', 'I' or 'F'), + the library uses the ITU-R 601-2 luma transform:: + + L = R * 299/1000 + G * 587/1000 + B * 114/1000 +as_gray : bool + (Available for all formats except GIF-PIL) + If True, the image is converted using mode 'F'. When `mode` is + not None and `as_gray` is True, the image is first converted + according to `mode`, and the result is then "flattened" using + mode 'F'. +ignoregamma : bool + (Only available in PNG-PIL) + Avoid gamma correction. Default True. +exifrotate : bool + (Only available in JPEG-PIL) + Automatically rotate the image according to exif flag. Default True. + + +Parameters for saving +--------------------- +optimize : bool + (Only available in PNG-PIL) + If present and true, instructs the PNG writer to make the output file + as small as possible. This includes extra processing in order to find + optimal encoder settings. +transparency: + (Only available in PNG-PIL) + This option controls what color image to mark as transparent. +dpi: tuple of two scalars + (Only available in PNG-PIL) + The desired dpi in each direction. +pnginfo: PIL.PngImagePlugin.PngInfo + (Only available in PNG-PIL) + Object containing text tags. +compress_level: int + (Only available in PNG-PIL) + ZLIB compression level, a number between 0 and 9: 1 gives best speed, + 9 gives best compression, 0 gives no compression at all. Default is 9. + When ``optimize`` option is True ``compress_level`` has no effect + (it is set to 9 regardless of a value passed). +compression: int + (Only available in PNG-PIL) + Compatibility with the freeimage PNG format. If given, it overrides + compress_level. +icc_profile: + (Only available in PNG-PIL) + The ICC Profile to include in the saved file. +bits (experimental): int + (Only available in PNG-PIL) + This option controls how many bits to store. If omitted, + the PNG writer uses 8 bits (256 colors). +quantize: + (Only available in PNG-PIL) + Compatibility with the freeimage PNG format. If given, it overrides + bits. In this case, given as a number between 1-256. +dictionary (experimental): dict + (Only available in PNG-PIL) + Set the ZLIB encoder dictionary. +prefer_uint8: bool + (Only available in PNG-PIL) + Let the PNG writer truncate uint16 image arrays to uint8 if their values fall + within the range [0, 255]. Defaults to true for legacy compatibility, however + it is recommended to set this to false to avoid unexpected behavior when + saving e.g. weakly saturated images. + +quality : scalar + (Only available in JPEG-PIL) + The compression factor of the saved image (1..100), higher + numbers result in higher quality but larger file size. Default 75. +progressive : bool + (Only available in JPEG-PIL) + Save as a progressive JPEG file (e.g. for images on the web). + Default False. +optimize : bool + (Only available in JPEG-PIL) + On saving, compute optimal Huffman coding tables (can reduce a few + percent of file size). Default False. +dpi : tuple of int + (Only available in JPEG-PIL) + The pixel density, ``(x,y)``. +icc_profile : object + (Only available in JPEG-PIL) + If present and true, the image is stored with the provided ICC profile. + If this parameter is not provided, the image will be saved with no + profile attached. +exif : dict + (Only available in JPEG-PIL) + If present, the image will be stored with the provided raw EXIF data. +subsampling : str + (Only available in JPEG-PIL) + Sets the subsampling for the encoder. See Pillow docs for details. +qtables : object + (Only available in JPEG-PIL) + Set the qtables for the encoder. See Pillow docs for details. +quality_mode : str + (Only available in JPEG2000-PIL) + Either `"rates"` or `"dB"` depending on the units you want to use to + specify image quality. +quality : float + (Only available in JPEG2000-PIL) + Approximate size reduction (if quality mode is `rates`) or a signal to noise ratio + in decibels (if quality mode is `dB`). +loop : int + (Only available in GIF-PIL) + The number of iterations. Default 0 (meaning loop indefinitely). +duration : {float, list} + (Only available in GIF-PIL) + The duration (in milliseconds) of each frame. Either specify one value + that is used for all frames, or one value for each frame. +fps : float + (Only available in GIF-PIL) + The number of frames per second. If duration is not given, the + duration for each frame is set to 1/fps. Default 10. +palettesize : int + (Only available in GIF-PIL) + The number of colors to quantize the image to. Is rounded to + the nearest power of two. Default 256. +subrectangles : bool + (Only available in GIF-PIL) + If True, will try and optimize the GIF by storing only the + rectangular parts of each frame that change with respect to the + previous. Default False. + +Notes +----- +To enable JPEG 2000 support, you need to build and install the OpenJPEG library, +version 2.0.0 or higher, before building the Python Imaging Library. Windows +users can install the OpenJPEG binaries available on the OpenJPEG website, but +must add them to their PATH in order to use PIL (if you fail to do this, you +will get errors about not being able to load the ``_imaging`` DLL). + +GIF images read with this plugin are always RGBA. The alpha channel is ignored +when saving RGB images. +""" + +import logging +import threading + +import numpy as np + +from ..core import Format, image_as_uint +from ..core.request import URI_FILE, URI_BYTES + + +logger = logging.getLogger(__name__) + + +# todo: Pillow ImageGrab module supports grabbing the screen on Win and OSX. + + +GENERIC_DOCS = """ + Parameters for reading + ---------------------- + + pilmode : str + From the Pillow documentation: + + * 'L' (8-bit pixels, grayscale) + * 'P' (8-bit pixels, mapped to any other mode using a color palette) + * 'RGB' (3x8-bit pixels, true color) + * 'RGBA' (4x8-bit pixels, true color with transparency mask) + * 'CMYK' (4x8-bit pixels, color separation) + * 'YCbCr' (3x8-bit pixels, color video format) + * 'I' (32-bit signed integer pixels) + * 'F' (32-bit floating point pixels) + + PIL also provides limited support for a few special modes, including + 'LA' ('L' with alpha), 'RGBX' (true color with padding) and 'RGBa' + (true color with premultiplied alpha). + + When translating a color image to grayscale (mode 'L', 'I' or 'F'), + the library uses the ITU-R 601-2 luma transform:: + + L = R * 299/1000 + G * 587/1000 + B * 114/1000 + as_gray : bool + If True, the image is converted using mode 'F'. When `mode` is + not None and `as_gray` is True, the image is first converted + according to `mode`, and the result is then "flattened" using + mode 'F'. +""" + + +class PillowFormat(Format): + """ + Base format class for Pillow formats. + """ + + _pillow_imported = False + _Image = None + _modes = "i" + _description = "" + + def __init__(self, *args, plugin_id: str = None, **kwargs): + super(PillowFormat, self).__init__(*args, **kwargs) + # Used to synchronize _init_pillow(), see #244 + self._lock = threading.RLock() + + self._plugin_id = plugin_id + + @property + def plugin_id(self): + """The PIL plugin id.""" + return self._plugin_id # Set when format is created + + def _init_pillow(self): + with self._lock: + if not self._pillow_imported: + self._pillow_imported = True # more like tried to import + import PIL + + if not hasattr(PIL, "__version__"): # pragma: no cover + raise ImportError( + "Imageio Pillow plugin requires " "Pillow, not PIL!" + ) + from PIL import Image + + self._Image = Image + elif self._Image is None: # pragma: no cover + raise RuntimeError("Imageio Pillow plugin requires " "Pillow lib.") + Image = self._Image + + if self.plugin_id in ("PNG", "JPEG", "BMP", "GIF", "PPM"): + Image.preinit() + else: + Image.init() + return Image + + def _can_read(self, request): + Image = self._init_pillow() + if self.plugin_id in Image.OPEN: + factory, accept = Image.OPEN[self.plugin_id] + if accept: + if request.firstbytes and accept(request.firstbytes): + return True + + def _can_write(self, request): + Image = self._init_pillow() + if request.extension in self.extensions or request._uri_type in [ + URI_FILE, + URI_BYTES, + ]: + if self.plugin_id in Image.SAVE: + return True + + class Reader(Format.Reader): + def _open(self, pilmode=None, as_gray=False): + Image = self.format._init_pillow() + try: + factory, accept = Image.OPEN[self.format.plugin_id] + except KeyError: + raise RuntimeError("Format %s cannot read images." % self.format.name) + self._fp = self._get_file() + self._im = factory(self._fp, "") + if hasattr(Image, "_decompression_bomb_check"): + Image._decompression_bomb_check(self._im.size) + # Save the raw mode used by the palette for a BMP because it may not be the number of channels + # When the data is read, imageio hands the palette to PIL to handle and clears the rawmode argument + # However, there is a bug in PIL with handling animated GIFs with a different color palette on each frame. + # This issue is resolved by using the raw palette data but the rawmode information is now lost. So we + # store the raw mode for later use + if self._im.palette and self._im.palette.dirty: + self._im.palette.rawmode_saved = self._im.palette.rawmode + pil_try_read(self._im) + # Store args + self._kwargs = dict( + as_gray=as_gray, is_gray=_palette_is_grayscale(self._im) + ) + # setting mode=None is not the same as just not providing it + if pilmode is not None: + self._kwargs["mode"] = pilmode + # Set length + self._length = 1 + if hasattr(self._im, "n_frames"): + self._length = self._im.n_frames + + def _get_file(self): + self._we_own_fp = False + return self.request.get_file() + + def _close(self): + save_pillow_close(self._im) + if self._we_own_fp: + self._fp.close() + # else: request object handles closing the _fp + + def _get_length(self): + return self._length + + def _seek(self, index): + try: + self._im.seek(index) + except EOFError: + raise IndexError("Could not seek to index %i" % index) + + def _get_data(self, index): + if index >= self._length: + raise IndexError("Image index %i > %i" % (index, self._length)) + i = self._im.tell() + if i > index: + self._seek(index) # just try + else: + while i < index: # some formats need to be read in sequence + i += 1 + self._seek(i) + if self._im.palette and self._im.palette.dirty: + self._im.palette.rawmode_saved = self._im.palette.rawmode + self._im.getdata()[0] + im = pil_get_frame(self._im, **self._kwargs) + return im, self._im.info + + def _get_meta_data(self, index): + if not (index is None or index == 0): + raise IndexError() + return self._im.info + + class Writer(Format.Writer): + def _open(self): + Image = self.format._init_pillow() + try: + self._save_func = Image.SAVE[self.format.plugin_id] + except KeyError: + raise RuntimeError("Format %s cannot write images." % self.format.name) + self._fp = self.request.get_file() + self._meta = {} + self._written = False + + def _close(self): + pass # request object handled closing _fp + + def _append_data(self, im, meta): + if self._written: + raise RuntimeError( + "Format %s only supports single images." % self.format.name + ) + # Pop unit dimension for grayscale images + if im.ndim == 3 and im.shape[-1] == 1: + im = im[:, :, 0] + self._written = True + self._meta.update(meta) + img = ndarray_to_pil( + im, self.format.plugin_id, self._meta.pop("prefer_uint8", True) + ) + if "bits" in self._meta: + img = img.quantize() # Make it a P image, so bits arg is used + img.save(self._fp, format=self.format.plugin_id, **self._meta) + save_pillow_close(img) + + def set_meta_data(self, meta): + self._meta.update(meta) + + +class PNGFormat(PillowFormat): + """See :mod:`imageio.plugins.pillow_legacy`""" + + class Reader(PillowFormat.Reader): + def _open(self, pilmode=None, as_gray=False, ignoregamma=True): + return PillowFormat.Reader._open(self, pilmode=pilmode, as_gray=as_gray) + + def _get_data(self, index): + im, info = PillowFormat.Reader._get_data(self, index) + if not self.request.kwargs.get("ignoregamma", True): + # The gamma value in the file represents the gamma factor for the + # hardware on the system where the file was created, and is meant + # to be able to match the colors with the system on which the + # image is shown. See also issue #366 + try: + gamma = float(info["gamma"]) + except (KeyError, ValueError): + pass + else: + scale = float(65536 if im.dtype == np.uint16 else 255) + gain = 1.0 + im[:] = ((im / scale) ** gamma) * scale * gain + 0.4999 + return im, info + + # -- + + class Writer(PillowFormat.Writer): + def _open(self, compression=None, quantize=None, interlaced=False, **kwargs): + # Better default for compression + kwargs["compress_level"] = kwargs.get("compress_level", 9) + + if compression is not None: + if compression < 0 or compression > 9: + raise ValueError("Invalid PNG compression level: %r" % compression) + kwargs["compress_level"] = compression + if quantize is not None: + for bits in range(1, 9): + if 2**bits == quantize: + break + else: + raise ValueError( + "PNG quantize must be power of two, " "not %r" % quantize + ) + kwargs["bits"] = bits + if interlaced: + logger.warning("PIL PNG writer cannot produce interlaced images.") + + ok_keys = ( + "optimize", + "transparency", + "dpi", + "pnginfo", + "bits", + "compress_level", + "icc_profile", + "dictionary", + "prefer_uint8", + ) + for key in kwargs: + if key not in ok_keys: + raise TypeError("Invalid arg for PNG writer: %r" % key) + + PillowFormat.Writer._open(self) + self._meta.update(kwargs) + + def _append_data(self, im, meta): + if str(im.dtype) == "uint16" and (im.ndim == 2 or im.shape[-1] == 1): + im = image_as_uint(im, bitdepth=16) + else: + im = image_as_uint(im, bitdepth=8) + PillowFormat.Writer._append_data(self, im, meta) + + +class JPEGFormat(PillowFormat): + """See :mod:`imageio.plugins.pillow_legacy`""" + + class Reader(PillowFormat.Reader): + def _open(self, pilmode=None, as_gray=False, exifrotate=True): + return PillowFormat.Reader._open(self, pilmode=pilmode, as_gray=as_gray) + + def _get_file(self): + # Pillow uses seek for JPG, so we cannot directly stream from web + if self.request.filename.startswith( + ("http://", "https://") + ) or ".zip/" in self.request.filename.replace("\\", "/"): + self._we_own_fp = True + return open(self.request.get_local_filename(), "rb") + else: + self._we_own_fp = False + return self.request.get_file() + + def _get_data(self, index): + im, info = PillowFormat.Reader._get_data(self, index) + + # Handle exif + if "exif" in info: + from PIL.ExifTags import TAGS + + info["EXIF_MAIN"] = {} + for tag, value in self._im._getexif().items(): + decoded = TAGS.get(tag, tag) + info["EXIF_MAIN"][decoded] = value + + im = self._rotate(im, info) + return im, info + + def _rotate(self, im, meta): + """Use Orientation information from EXIF meta data to + orient the image correctly. Similar code as in FreeImage plugin. + """ + if self.request.kwargs.get("exifrotate", True): + try: + ori = meta["EXIF_MAIN"]["Orientation"] + except KeyError: # pragma: no cover + pass # Orientation not available + else: # pragma: no cover - we cannot touch all cases + # www.impulseadventure.com/photo/exif-orientation.html + if ori in [1, 2]: + pass + if ori in [3, 4]: + im = np.rot90(im, 2) + if ori in [5, 6]: + im = np.rot90(im, 3) + if ori in [7, 8]: + im = np.rot90(im) + if ori in [2, 4, 5, 7]: # Flipped cases (rare) + im = np.fliplr(im) + return im + + # -- + + class Writer(PillowFormat.Writer): + def _open(self, quality=75, progressive=False, optimize=False, **kwargs): + # The JPEG quality can be between 0 (worst) and 100 (best) + quality = int(quality) + if quality < 0 or quality > 100: + raise ValueError("JPEG quality should be between 0 and 100.") + + kwargs["quality"] = quality + kwargs["progressive"] = bool(progressive) + kwargs["optimize"] = bool(progressive) + + PillowFormat.Writer._open(self) + self._meta.update(kwargs) + + def _append_data(self, im, meta): + if im.ndim == 3 and im.shape[-1] == 4: + raise IOError("JPEG does not support alpha channel.") + im = image_as_uint(im, bitdepth=8) + PillowFormat.Writer._append_data(self, im, meta) + return + + +class JPEG2000Format(PillowFormat): + """See :mod:`imageio.plugins.pillow_legacy`""" + + class Reader(PillowFormat.Reader): + def _open(self, pilmode=None, as_gray=False): + return PillowFormat.Reader._open(self, pilmode=pilmode, as_gray=as_gray) + + def _get_file(self): + # Pillow uses seek for JPG, so we cannot directly stream from web + if self.request.filename.startswith( + ("http://", "https://") + ) or ".zip/" in self.request.filename.replace("\\", "/"): + self._we_own_fp = True + return open(self.request.get_local_filename(), "rb") + else: + self._we_own_fp = False + return self.request.get_file() + + def _get_data(self, index): + im, info = PillowFormat.Reader._get_data(self, index) + + # Handle exif + if "exif" in info: + from PIL.ExifTags import TAGS + + info["EXIF_MAIN"] = {} + for tag, value in self._im._getexif().items(): + decoded = TAGS.get(tag, tag) + info["EXIF_MAIN"][decoded] = value + + im = self._rotate(im, info) + return im, info + + def _rotate(self, im, meta): + """Use Orientation information from EXIF meta data to + orient the image correctly. Similar code as in FreeImage plugin. + """ + if self.request.kwargs.get("exifrotate", True): + try: + ori = meta["EXIF_MAIN"]["Orientation"] + except KeyError: # pragma: no cover + pass # Orientation not available + else: # pragma: no cover - we cannot touch all cases + # www.impulseadventure.com/photo/exif-orientation.html + if ori in [1, 2]: + pass + if ori in [3, 4]: + im = np.rot90(im, 2) + if ori in [5, 6]: + im = np.rot90(im, 3) + if ori in [7, 8]: + im = np.rot90(im) + if ori in [2, 4, 5, 7]: # Flipped cases (rare) + im = np.fliplr(im) + return im + + # -- + + class Writer(PillowFormat.Writer): + def _open(self, quality_mode="rates", quality=5, **kwargs): + # Check quality - in Pillow it should be no higher than 95 + if quality_mode not in {"rates", "dB"}: + raise ValueError("Quality mode should be either 'rates' or 'dB'") + + quality = float(quality) + + if quality_mode == "rates" and (quality < 1 or quality > 1000): + raise ValueError( + "The quality value {} seems to be an invalid rate!".format(quality) + ) + elif quality_mode == "dB" and (quality < 15 or quality > 100): + raise ValueError( + "The quality value {} seems to be an invalid PSNR!".format(quality) + ) + + kwargs["quality_mode"] = quality_mode + kwargs["quality_layers"] = [quality] + + PillowFormat.Writer._open(self) + self._meta.update(kwargs) + + def _append_data(self, im, meta): + if im.ndim == 3 and im.shape[-1] == 4: + raise IOError( + "The current implementation of JPEG 2000 does not support alpha channel." + ) + im = image_as_uint(im, bitdepth=8) + PillowFormat.Writer._append_data(self, im, meta) + return + + +def save_pillow_close(im): + # see issue #216 and #300 + if hasattr(im, "close"): + if hasattr(getattr(im, "fp", None), "close"): + im.close() + + +# Func from skimage + +# This cells contains code from scikit-image, in particular from +# http://github.com/scikit-image/scikit-image/blob/master/ +# skimage/io/_plugins/pil_plugin.py +# The scikit-image license applies. + + +def pil_try_read(im): + try: + # this will raise an IOError if the file is not readable + im.getdata()[0] + except IOError as e: + site = "http://pillow.readthedocs.io/en/latest/installation.html" + site += "#external-libraries" + pillow_error_message = str(e) + error_message = ( + 'Could not load "%s" \n' + 'Reason: "%s"\n' + "Please see documentation at: %s" + % (im.filename, pillow_error_message, site) + ) + raise ValueError(error_message) + + +def _palette_is_grayscale(pil_image): + if pil_image.mode != "P": + return False + elif pil_image.info.get("transparency", None): # see issue #475 + return False + # get palette as an array with R, G, B columns + # Note: starting in pillow 9.1 palettes may have less than 256 entries + palette = np.asarray(pil_image.getpalette()).reshape((-1, 3)) + # Not all palette colors are used; unused colors have junk values. + start, stop = pil_image.getextrema() + valid_palette = palette[start : stop + 1] + # Image is grayscale if channel differences (R - G and G - B) + # are all zero. + return np.allclose(np.diff(valid_palette), 0) + + +def pil_get_frame(im, is_gray=None, as_gray=None, mode=None, dtype=None): + """ + is_gray: Whether the image *is* gray (by inspecting its palette). + as_gray: Whether the resulting image must be converted to gaey. + mode: The mode to convert to. + """ + + if is_gray is None: + is_gray = _palette_is_grayscale(im) + + frame = im + + # Convert ... + if mode is not None: + # Mode is explicitly given ... + if mode != im.mode: + frame = im.convert(mode) + elif as_gray: + pass # don't do any auto-conversions (but do the explicit one above) + elif im.mode == "P" and is_gray: + # Paletted images that are already gray by their palette + # are converted so that the resulting numpy array is 2D. + frame = im.convert("L") + elif im.mode == "P": + # Paletted images are converted to RGB/RGBA. We jump some loops to make + # this work well. + if im.info.get("transparency", None) is not None: + # Let Pillow apply the transparency, see issue #210 and #246 + frame = im.convert("RGBA") + elif im.palette.mode in ("RGB", "RGBA"): + # We can do this ourselves. Pillow seems to sometimes screw + # this up if a multi-gif has a palette for each frame ... + # Create palette array + p = np.frombuffer(im.palette.getdata()[1], np.uint8) + # Restore the raw mode that was saved to be used to parse the palette + if hasattr(im.palette, "rawmode_saved"): + im.palette.rawmode = im.palette.rawmode_saved + mode = im.palette.rawmode if im.palette.rawmode else im.palette.mode + nchannels = len(mode) + # Shape it. + p.shape = -1, nchannels + if p.shape[1] == 3 or (p.shape[1] == 4 and mode[-1] == "X"): + p = np.column_stack((p[:, :3], 255 * np.ones(p.shape[0], p.dtype))) + # Swap the axes if the mode is in BGR and not RGB + if mode.startswith("BGR"): + p = p[:, [2, 1, 0]] if p.shape[1] == 3 else p[:, [2, 1, 0, 3]] + # Apply palette + frame_paletted = np.array(im, np.uint8) + try: + frame = p[frame_paletted] + except Exception: + # Ok, let PIL do it. The introduction of the branch that + # tests `im.info['transparency']` should make this happen + # much less often, but let's keep it, to be safe. + frame = im.convert("RGBA") + else: + # Let Pillow do it. Unlinke skimage, we always convert + # to RGBA; palettes can be RGBA. + if True: # im.format == 'PNG' and 'transparency' in im.info: + frame = im.convert("RGBA") + else: + frame = im.convert("RGB") + elif "A" in im.mode: + frame = im.convert("RGBA") + elif im.mode == "CMYK": + frame = im.convert("RGB") + elif im.format == "GIF" and im.mode == "RGB": + # pillow9 returns RGBA images for subsequent frames so that it can deal + # with multi-frame GIF that use frame-level palettes and don't dispose + # all areas. + + # For backwards compatibility, we promote everything to RGBA. + frame = im.convert("RGBA") + + # Apply a post-convert if necessary + if as_gray: + frame = frame.convert("F") # Scipy compat + elif not isinstance(frame, np.ndarray) and frame.mode == "1": + # Workaround for crash in PIL. When im is 1-bit, the call array(im) + # can cause a segfault, or generate garbage. See + # https://github.com/scipy/scipy/issues/2138 and + # https://github.com/python-pillow/Pillow/issues/350. + # + # This converts im from a 1-bit image to an 8-bit image. + frame = frame.convert("L") + + # Convert to numpy array + if im.mode.startswith("I;16"): + # e.g. in16 PNG's + shape = im.size + dtype = ">u2" if im.mode.endswith("B") else "= 0: + arr = arr.astype(np.uint8) + mode = mode_base = "L" + + else: + arr = image_as_uint(arr, bitdepth=16) + + else: + arr = image_as_uint(arr, bitdepth=8) + mode = "L" + mode_base = "L" + + if mode == "I;16" and int(getattr(Image, "__version__", "0").split(".")[0]) < 6: + # Pillow < v6.0.0 has limited support for the "I;16" mode, + # requiring us to fall back to this expensive workaround. + # tobytes actually creates a copy of the image, which is costly. + array_buffer = arr.tobytes() + if arr.ndim == 2: + im = Image.new(mode_base, arr.T.shape) + im.frombytes(array_buffer, "raw", mode) + else: + image_shape = (arr.shape[1], arr.shape[0]) + im = Image.frombytes(mode, image_shape, array_buffer) + return im + else: + return Image.fromarray(arr, mode) + + +# imported for backwards compatibility +from .pillowmulti import GIFFormat, TIFFFormat # noqa: E402, F401 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/pillowmulti.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/pillowmulti.py new file mode 100644 index 0000000000000000000000000000000000000000..e75a18eada4beaf7e2b37dbaf3e244c0b7cc2886 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/pillowmulti.py @@ -0,0 +1,338 @@ +""" +PIL formats for multiple images. +""" + +import logging + +import numpy as np + +from .pillow_legacy import PillowFormat, image_as_uint, ndarray_to_pil + +logger = logging.getLogger(__name__) + +NeuQuant = None # we can implement this when we need it + + +class TIFFFormat(PillowFormat): + _modes = "i" # arg, why bother; people should use the tiffile version + _description = "TIFF format (Pillow)" + + +class GIFFormat(PillowFormat): + """See :mod:`imageio.plugins.pillow_legacy`""" + + _modes = "iI" + _description = "Static and animated gif (Pillow)" + + # GIF reader needs no modifications compared to base pillow reader + + class Writer(PillowFormat.Writer): # pragma: no cover + def _open( + self, + loop=0, + duration=None, + fps=10, + palettesize=256, + quantizer=0, + subrectangles=False, + ): + from PIL import __version__ as pillow_version + + major, minor, patch = tuple(int(x) for x in pillow_version.split(".")) + if major == 10 and minor >= 1: + raise ImportError( + f"Pillow v{pillow_version} is not supported by ImageIO's legacy " + "pillow plugin when writing GIFs. Consider switching to the new " + "plugin or downgrading to `pillow<10.1.0`." + ) + + # Check palettesize + palettesize = int(palettesize) + if palettesize < 2 or palettesize > 256: + raise ValueError("GIF quantize param must be 2..256") + if palettesize not in [2, 4, 8, 16, 32, 64, 128, 256]: + palettesize = 2 ** int(np.log2(128) + 0.999) + logger.warning( + "Warning: palettesize (%r) modified to a factor of " + "two between 2-256." % palettesize + ) + # Duratrion / fps + if duration is None: + self._duration = 1.0 / float(fps) + elif isinstance(duration, (list, tuple)): + self._duration = [float(d) for d in duration] + else: + self._duration = float(duration) + # loop + loop = float(loop) + if loop <= 0 or loop == float("inf"): + loop = 0 + loop = int(loop) + # Subrectangles / dispose + subrectangles = bool(subrectangles) + self._dispose = 1 if subrectangles else 2 + # The "0" (median cut) quantizer is by far the best + + fp = self.request.get_file() + self._writer = GifWriter( + fp, subrectangles, loop, quantizer, int(palettesize) + ) + + def _close(self): + self._writer.close() + + def _append_data(self, im, meta): + im = image_as_uint(im, bitdepth=8) + if im.ndim == 3 and im.shape[-1] == 1: + im = im[:, :, 0] + duration = self._duration + if isinstance(duration, list): + duration = duration[min(len(duration) - 1, self._writer._count)] + dispose = self._dispose + self._writer.add_image(im, duration, dispose) + + return + + +def intToBin(i): + return i.to_bytes(2, byteorder="little") + + +class GifWriter: # pragma: no cover + """Class that for helping write the animated GIF file. This is based on + code from images2gif.py (part of visvis). The version here is modified + to allow streamed writing. + """ + + def __init__( + self, + file, + opt_subrectangle=True, + opt_loop=0, + opt_quantizer=0, + opt_palette_size=256, + ): + self.fp = file + + self.opt_subrectangle = opt_subrectangle + self.opt_loop = opt_loop + self.opt_quantizer = opt_quantizer + self.opt_palette_size = opt_palette_size + + self._previous_image = None # as np array + self._global_palette = None # as bytes + self._count = 0 + + from PIL.GifImagePlugin import getdata + + self.getdata = getdata + + def add_image(self, im, duration, dispose): + # Prepare image + im_rect, rect = im, (0, 0) + if self.opt_subrectangle: + im_rect, rect = self.getSubRectangle(im) + im_pil = self.converToPIL(im_rect, self.opt_quantizer, self.opt_palette_size) + + # Get pallette - apparently, this is the 3d element of the header + # (but it has not always been). Best we've got. Its not the same + # as im_pil.palette.tobytes(). + from PIL.GifImagePlugin import getheader + + palette = getheader(im_pil)[0][3] + + # Write image + if self._count == 0: + self.write_header(im_pil, palette, self.opt_loop) + self._global_palette = palette + self.write_image(im_pil, palette, rect, duration, dispose) + # assert len(palette) == len(self._global_palette) + + # Bookkeeping + self._previous_image = im + self._count += 1 + + def write_header(self, im, globalPalette, loop): + # Gather info + header = self.getheaderAnim(im) + appext = self.getAppExt(loop) + # Write + self.fp.write(header) + self.fp.write(globalPalette) + self.fp.write(appext) + + def close(self): + self.fp.write(";".encode("utf-8")) # end gif + + def write_image(self, im, palette, rect, duration, dispose): + fp = self.fp + + # Gather local image header and data, using PIL's getdata. That + # function returns a list of bytes objects, but which parts are + # what has changed multiple times, so we put together the first + # parts until we have enough to form the image header. + data = self.getdata(im) + imdes = b"" + while data and len(imdes) < 11: + imdes += data.pop(0) + assert len(imdes) == 11 + + # Make image descriptor suitable for using 256 local color palette + lid = self.getImageDescriptor(im, rect) + graphext = self.getGraphicsControlExt(duration, dispose) + + # Write local header + if (palette != self._global_palette) or (dispose != 2): + # Use local color palette + fp.write(graphext) + fp.write(lid) # write suitable image descriptor + fp.write(palette) # write local color table + fp.write(b"\x08") # LZW minimum size code + else: + # Use global color palette + fp.write(graphext) + fp.write(imdes) # write suitable image descriptor + + # Write image data + for d in data: + fp.write(d) + + def getheaderAnim(self, im): + """Get animation header. To replace PILs getheader()[0]""" + bb = b"GIF89a" + bb += intToBin(im.size[0]) + bb += intToBin(im.size[1]) + bb += b"\x87\x00\x00" + return bb + + def getImageDescriptor(self, im, xy=None): + """Used for the local color table properties per image. + Otherwise global color table applies to all frames irrespective of + whether additional colors comes in play that require a redefined + palette. Still a maximum of 256 color per frame, obviously. + + Written by Ant1 on 2010-08-22 + Modified by Alex Robinson in Janurari 2011 to implement subrectangles. + """ + + # Defaule use full image and place at upper left + if xy is None: + xy = (0, 0) + + # Image separator, + bb = b"\x2c" + + # Image position and size + bb += intToBin(xy[0]) # Left position + bb += intToBin(xy[1]) # Top position + bb += intToBin(im.size[0]) # image width + bb += intToBin(im.size[1]) # image height + + # packed field: local color table flag1, interlace0, sorted table0, + # reserved00, lct size111=7=2^(7 + 1)=256. + bb += b"\x87" + + # LZW minimum size code now comes later, begining of [imagedata] blocks + return bb + + def getAppExt(self, loop): + """Application extension. This part specifies the amount of loops. + If loop is 0 or inf, it goes on infinitely. + """ + if loop == 1: + return b"" + if loop == 0: + loop = 2**16 - 1 + bb = b"" + if loop != 0: # omit the extension if we would like a nonlooping gif + bb = b"\x21\xff\x0b" # application extension + bb += b"NETSCAPE2.0" + bb += b"\x03\x01" + bb += intToBin(loop) + bb += b"\x00" # end + return bb + + def getGraphicsControlExt(self, duration=0.1, dispose=2): + """Graphics Control Extension. A sort of header at the start of + each image. Specifies duration and transparancy. + + Dispose + ------- + * 0 - No disposal specified. + * 1 - Do not dispose. The graphic is to be left in place. + * 2 - Restore to background color. The area used by the graphic + must be restored to the background color. + * 3 - Restore to previous. The decoder is required to restore the + area overwritten by the graphic with what was there prior to + rendering the graphic. + * 4-7 -To be defined. + """ + + bb = b"\x21\xf9\x04" + bb += chr((dispose & 3) << 2).encode("utf-8") + # low bit 1 == transparency, + # 2nd bit 1 == user input , next 3 bits, the low two of which are used, + # are dispose. + bb += intToBin(int(duration * 100 + 0.5)) # in 100th of seconds + bb += b"\x00" # no transparant color + bb += b"\x00" # end + return bb + + def getSubRectangle(self, im): + """Calculate the minimal rectangle that need updating. Returns + a two-element tuple containing the cropped image and an x-y tuple. + + Calculating the subrectangles takes extra time, obviously. However, + if the image sizes were reduced, the actual writing of the GIF + goes faster. In some cases applying this method produces a GIF faster. + """ + + # Cannot do subrectangle for first image + if self._count == 0: + return im, (0, 0) + + prev = self._previous_image + + # Get difference, sum over colors + diff = np.abs(im - prev) + if diff.ndim == 3: + diff = diff.sum(2) + # Get begin and end for both dimensions + X = np.argwhere(diff.sum(0)) + Y = np.argwhere(diff.sum(1)) + # Get rect coordinates + if X.size and Y.size: + x0, x1 = int(X[0]), int(X[-1] + 1) + y0, y1 = int(Y[0]), int(Y[-1] + 1) + else: # No change ... make it minimal + x0, x1 = 0, 2 + y0, y1 = 0, 2 + + return im[y0:y1, x0:x1], (x0, y0) + + def converToPIL(self, im, quantizer, palette_size=256): + """Convert image to Paletted PIL image. + + PIL used to not do a very good job at quantization, but I guess + this has improved a lot (at least in Pillow). I don't think we need + neuqant (and we can add it later if we really want). + """ + + im_pil = ndarray_to_pil(im, "gif") + + if quantizer in ("nq", "neuquant"): + # NeuQuant algorithm + nq_samplefac = 10 # 10 seems good in general + im_pil = im_pil.convert("RGBA") # NQ assumes RGBA + nqInstance = NeuQuant(im_pil, nq_samplefac) # Learn colors + im_pil = nqInstance.quantize(im_pil, colors=palette_size) + elif quantizer in (0, 1, 2): + # Adaptive PIL algorithm + if quantizer == 2: + im_pil = im_pil.convert("RGBA") + else: + im_pil = im_pil.convert("RGB") + im_pil = im_pil.quantize(colors=palette_size, method=quantizer) + else: + raise ValueError("Invalid value for quantizer: %r" % quantizer) + return im_pil diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/pyav.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/pyav.py new file mode 100644 index 0000000000000000000000000000000000000000..04fa26581ca2d9d8a19bebcbbb9062614e441553 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/pyav.py @@ -0,0 +1,1244 @@ +"""Read/Write Videos (and images) using PyAV. + +.. note:: + To use this plugin you need to have `PyAV `_ + installed:: + + pip install av + +This plugin wraps pyAV, a pythonic binding for the FFMPEG library. It is similar +to our FFMPEG plugin, has improved performance, features a robust interface, and +aims to supersede the FFMPEG plugin in the future. + + +Methods +------- +.. note:: + Check the respective function for a list of supported kwargs and detailed + documentation. + +.. autosummary:: + :toctree: + + PyAVPlugin.read + PyAVPlugin.iter + PyAVPlugin.write + PyAVPlugin.properties + PyAVPlugin.metadata + +Additional methods available inside the :func:`imopen ` +context: + +.. autosummary:: + :toctree: + + PyAVPlugin.init_video_stream + PyAVPlugin.write_frame + PyAVPlugin.set_video_filter + PyAVPlugin.container_metadata + PyAVPlugin.video_stream_metadata + +Advanced API +------------ + +In addition to the default ImageIO v3 API this plugin exposes custom functions +that are specific to reading/writing video and its metadata. These are available +inside the :func:`imopen ` context and allow fine-grained +control over how the video is processed. The functions are documented above and +below you can find a usage example:: + + import imageio.v3 as iio + + with iio.imopen("test.mp4", "w", plugin="pyav") as file: + file.init_video_stream("libx264") + file.container_metadata["comment"] = "This video was created using ImageIO." + + for _ in range(5): + for frame in iio.imiter("imageio:newtonscradle.gif"): + file.write_frame(frame) + + meta = iio.immeta("test.mp4", plugin="pyav") + assert meta["comment"] == "This video was created using ImageIO." + + + +Pixel Formats (Colorspaces) +--------------------------- + +By default, this plugin converts the video into 8-bit RGB (called ``rgb24`` in +ffmpeg). This is a useful behavior for many use-cases, but sometimes you may +want to use the video's native colorspace or you may wish to convert the video +into an entirely different colorspace. This is controlled using the ``format`` +kwarg. You can use ``format=None`` to leave the image in its native colorspace +or specify any colorspace supported by FFMPEG as long as it is stridable, i.e., +as long as it can be represented by a single numpy array. Some useful choices +include: + +- rgb24 (default; 8-bit RGB) +- rgb48le (16-bit lower-endian RGB) +- bgr24 (8-bit BGR; openCVs default colorspace) +- gray (8-bit grayscale) +- yuv444p (8-bit channel-first YUV) + +Further, FFMPEG maintains a list of available formats, albeit not as part of the +narrative docs. It can be `found here +`_ (warning: C source +code). + +Filters +------- + +On top of providing basic read/write functionality, this plugin allows you to +use the full collection of `video filters available in FFMPEG +`_. This means that you +can apply excessive preprocessing to your video before retrieving it as a numpy +array or apply excessive post-processing before you encode your data. + +Filters come in two forms: sequences or graphs. Filter sequences are, as the +name suggests, sequences of filters that are applied one after the other. They +are specified using the ``filter_sequence`` kwarg. Filter graphs, on the other +hand, come in the form of a directed graph and are specified using the +``filter_graph`` kwarg. + +.. note:: + All filters are either sequences or graphs. If all you want is to apply a + single filter, you can do this by specifying a filter sequence with a single + entry. + +A ``filter_sequence`` is a list of filters, each defined through a 2-element +tuple of the form ``(filter_name, filter_parameters)``. The first element of the +tuple is the name of the filter. The second element are the filter parameters, +which can be given either as a string or a dict. The string matches the same +format that you would use when specifying the filter using the ffmpeg +command-line tool and the dict has entries of the form ``parameter:value``. For +example:: + + import imageio.v3 as iio + + # using a filter_parameters str + img1 = iio.imread( + "imageio:cockatoo.mp4", + plugin="pyav", + filter_sequence=[ + ("rotate", "45*PI/180") + ] + ) + + # using a filter_parameters dict + img2 = iio.imread( + "imageio:cockatoo.mp4", + plugin="pyav", + filter_sequence=[ + ("rotate", {"angle":"45*PI/180", "fillcolor":"AliceBlue"}) + ] + ) + +A ``filter_graph``, on the other hand, is specified using a ``(nodes, edges)`` +tuple. It is best explained using an example:: + + img = iio.imread( + "imageio:cockatoo.mp4", + plugin="pyav", + filter_graph=( + { + "split": ("split", ""), + "scale_overlay":("scale", "512:-1"), + "overlay":("overlay", "x=25:y=25:enable='between(t,1,8)'"), + }, + [ + ("video_in", "split", 0, 0), + ("split", "overlay", 0, 0), + ("split", "scale_overlay", 1, 0), + ("scale_overlay", "overlay", 0, 1), + ("overlay", "video_out", 0, 0), + ] + ) + ) + +The above transforms the video to have picture-in-picture of itself in the top +left corner. As you can see, nodes are specified using a dict which has names as +its keys and filter tuples as values; the same tuples as the ones used when +defining a filter sequence. Edges are a list of a 4-tuples of the form +``(node_out, node_in, output_idx, input_idx)`` and specify which two filters are +connected and which inputs/outputs should be used for this. + +Further, there are two special nodes in a filter graph: ``video_in`` and +``video_out``, which represent the graph's input and output respectively. These +names can not be chosen for other nodes (those nodes would simply be +overwritten), and for a graph to be valid there must be a path from the input to +the output and all nodes in the graph must be connected. + +While most graphs are quite simple, they can become very complex and we +recommend that you read through the `FFMPEG documentation +`_ and their +examples to better understand how to use them. + +""" + +from fractions import Fraction +from math import ceil +from typing import Any, Dict, Generator, List, Optional, Tuple, Union + +import av +import av.filter +import numpy as np +from av.codec.context import Flags +from numpy.lib.stride_tricks import as_strided + +from ..core import Request +from ..core.request import URI_BYTES, InitializationError, IOMode +from ..core.v3_plugin_api import ImageProperties, PluginV3 + + +def _format_to_dtype(format: av.VideoFormat) -> np.dtype: + """Convert a pyAV video format into a numpy dtype""" + + if len(format.components) == 0: + # fake format + raise ValueError( + f"Can't determine dtype from format `{format.name}`. It has no channels." + ) + + endian = ">" if format.is_big_endian else "<" + dtype = "f" if "f32" in format.name else "u" + bits_per_channel = [x.bits for x in format.components] + n_bytes = str(int(ceil(bits_per_channel[0] / 8))) + + return np.dtype(endian + dtype + n_bytes) + + +def _get_frame_shape(frame: av.VideoFrame) -> Tuple[int, ...]: + """Compute the frame's array shape + + Parameters + ---------- + frame : av.VideoFrame + A frame for which the resulting shape should be computed. + + Returns + ------- + shape : Tuple[int, ...] + A tuple describing the shape of the image data in the frame. + + """ + + widths = [component.width for component in frame.format.components] + heights = [component.height for component in frame.format.components] + bits = np.array([component.bits for component in frame.format.components]) + line_sizes = [plane.line_size for plane in frame.planes] + + subsampled_width = widths[:-1] != widths[1:] + subsampled_height = heights[:-1] != heights[1:] + unaligned_components = np.any(bits % 8 != 0) or (line_sizes[:-1] != line_sizes[1:]) + if subsampled_width or subsampled_height or unaligned_components: + raise IOError( + f"{frame.format.name} can't be expressed as a strided array." + "Use `format=` to select a format to convert into." + ) + + shape = [frame.height, frame.width] + + # ffmpeg doesn't have a notion of channel-first or channel-last formats + # instead it stores frames in one or more planes which contain individual + # components of a pixel depending on the pixel format. For channel-first + # formats each component lives on a separate plane (n_planes) and for + # channel-last formats all components are packed on a single plane + # (n_channels) + n_planes = max([component.plane for component in frame.format.components]) + 1 + if n_planes > 1: + shape = [n_planes] + shape + + channels_per_plane = [0] * n_planes + for component in frame.format.components: + channels_per_plane[component.plane] += 1 + n_channels = max(channels_per_plane) + + if n_channels > 1: + shape = shape + [n_channels] + + return tuple(shape) + + +def _get_frame_type(picture_type: int) -> str: + """Return a human-readable name for provided picture type + + Parameters + ---------- + picture_type : int + The picture type extracted from Frame.pict_type + + Returns + ------- + picture_name : str + A human readable name of the picture type + + """ + + if not isinstance(picture_type, int): + # old pyAV versions send an enum, not an int + return picture_type.name + + picture_types = [ + "NONE", + "I", + "P", + "B", + "S", + "SI", + "SP", + "BI", + ] + + return picture_types[picture_type] + + +class PyAVPlugin(PluginV3): + """Support for pyAV as backend. + + Parameters + ---------- + request : iio.Request + A request object that represents the users intent. It provides a + standard interface to access various the various ImageResources and + serves them to the plugin as a file object (or file). Check the docs for + details. + container : str + Only used during `iio_mode="w"`! If not None, overwrite the default container + format chosen by pyav. + kwargs : Any + Additional kwargs are forwarded to PyAV's constructor. + + """ + + def __init__(self, request: Request, *, container: str = None, **kwargs) -> None: + """Initialize a new Plugin Instance. + + See Plugin's docstring for detailed documentation. + + Notes + ----- + The implementation here stores the request as a local variable that is + exposed using a @property below. If you inherit from PluginV3, remember + to call ``super().__init__(request)``. + + """ + + super().__init__(request) + + self._container = None + self._video_stream = None + self._video_filter = None + + if request.mode.io_mode == IOMode.read: + self._next_idx = 0 + try: + if request._uri_type == 5: # 5 is the value of URI_HTTP + # pyav should read from HTTP by itself. This enables reading + # HTTP-based streams like DASH. Note that solving streams + # like this is temporary until the new request object gets + # implemented. + self._container = av.open(request.raw_uri, **kwargs) + else: + self._container = av.open(request.get_file(), **kwargs) + self._video_stream = self._container.streams.video[0] + self._decoder = self._container.decode(video=0) + except av.FFmpegError: + if isinstance(request.raw_uri, bytes): + msg = "PyAV does not support these ``" + else: + msg = f"PyAV does not support `{request.raw_uri}`" + raise InitializationError(msg) from None + else: + self.frames_written = 0 + file_handle = self.request.get_file() + filename = getattr(file_handle, "name", None) + extension = self.request.extension or self.request.format_hint + if extension is None: + raise InitializationError("Can't determine output container to use.") + + # hacky, but beats running our own format selection logic + # (since av_guess_format is not exposed) + try: + setattr(file_handle, "name", filename or "tmp" + extension) + except AttributeError: + pass # read-only, nothing we can do + + try: + self._container = av.open( + file_handle, mode="w", format=container, **kwargs + ) + except ValueError: + raise InitializationError( + f"PyAV can not write to `{self.request.raw_uri}`" + ) + + # --------------------- + # Standard V3 Interface + # --------------------- + + def read( + self, + *, + index: int = ..., + format: str = "rgb24", + filter_sequence: List[Tuple[str, Union[str, dict]]] = None, + filter_graph: Tuple[dict, List] = None, + constant_framerate: bool = None, + thread_count: int = 0, + thread_type: str = None, + ) -> np.ndarray: + """Read frames from the video. + + If ``index`` is an integer, this function reads the index-th frame from + the file. If ``index`` is ... (Ellipsis), this function reads all frames + from the video, stacks them along the first dimension, and returns a + batch of frames. + + Parameters + ---------- + index : int + The index of the frame to read, e.g. ``index=5`` reads the 5th + frame. If ``...``, read all the frames in the video and stack them + along a new, prepended, batch dimension. + format : str + Set the returned colorspace. If not None (default: rgb24), convert + the data into the given format before returning it. If ``None`` + return the data in the encoded format if it can be expressed as a + strided array; otherwise raise an Exception. + filter_sequence : List[str, str, dict] + If not None, apply the given sequence of FFmpeg filters to each + ndimage. Check the (module-level) plugin docs for details and + examples. + filter_graph : (dict, List) + If not None, apply the given graph of FFmpeg filters to each + ndimage. The graph is given as a tuple of two dicts. The first dict + contains a (named) set of nodes, and the second dict contains a set + of edges between nodes of the previous dict. Check the (module-level) + plugin docs for details and examples. + constant_framerate : bool + If True assume the video's framerate is constant. This allows for + faster seeking inside the file. If False, the video is reset before + each read and searched from the beginning. If None (default), this + value will be read from the container format. + thread_count : int + How many threads to use when decoding a frame. The default is 0, + which will set the number using ffmpeg's default, which is based on + the codec, number of available cores, threadding model, and other + considerations. + thread_type : str + The threading model to be used. One of + + - `"SLICE"`: threads assemble parts of the current frame + - `"FRAME"`: threads may assemble future frames + - None (default): Uses ``"FRAME"`` if ``index=...`` and ffmpeg's + default otherwise. + + + Returns + ------- + frame : np.ndarray + A numpy array containing loaded frame data. + + Notes + ----- + Accessing random frames repeatedly is costly (O(k), where k is the + average distance between two keyframes). You should do so only sparingly + if possible. In some cases, it can be faster to bulk-read the video (if + it fits into memory) and to then access the returned ndarray randomly. + + The current implementation may cause problems for b-frames, i.e., + bidirectionaly predicted pictures. I lack test videos to write unit + tests for this case. + + Reading from an index other than ``...``, i.e. reading a single frame, + currently doesn't support filters that introduce delays. + + """ + + if index is ...: + props = self.properties(format=format) + uses_filter = ( + self._video_filter is not None + or filter_graph is not None + or filter_sequence is not None + ) + + self._container.seek(0) + if not uses_filter and props.shape[0] != 0: + frames = np.empty(props.shape, dtype=props.dtype) + for idx, frame in enumerate( + self.iter( + format=format, + filter_sequence=filter_sequence, + filter_graph=filter_graph, + thread_count=thread_count, + thread_type=thread_type or "FRAME", + ) + ): + frames[idx] = frame + else: + frames = np.stack( + [ + x + for x in self.iter( + format=format, + filter_sequence=filter_sequence, + filter_graph=filter_graph, + thread_count=thread_count, + thread_type=thread_type or "FRAME", + ) + ] + ) + + # reset stream container, because threading model can't change after + # first access + self._video_stream = self._container.streams.video[0] + + return frames + + if thread_type is not None and not ( + self._video_stream.thread_type == thread_type + or self._video_stream.thread_type.name == thread_type + ): + self._video_stream.thread_type = thread_type + + if ( + thread_count != 0 + and thread_count != self._video_stream.codec_context.thread_count + ): + # in FFMPEG thread_count == 0 means use the default count, which we + # change to mean don't change the thread count. + self._video_stream.codec_context.thread_count = thread_count + + if constant_framerate is None: + # "variable_fps" is now a flag (handle got removed). Full list at + # https://pyav.org/docs/stable/api/container.html#module-av.format + variable_fps = bool(self._container.format.flags & 0x400) + constant_framerate = not variable_fps + + # note: cheap for contigous incremental reads + self._seek(index, constant_framerate=constant_framerate) + desired_frame = next(self._decoder) + self._next_idx += 1 + + self.set_video_filter(filter_sequence, filter_graph) + if self._video_filter is not None: + desired_frame = self._video_filter.send(desired_frame) + + return self._unpack_frame(desired_frame, format=format) + + def iter( + self, + *, + format: str = "rgb24", + filter_sequence: List[Tuple[str, Union[str, dict]]] = None, + filter_graph: Tuple[dict, List] = None, + thread_count: int = 0, + thread_type: str = None, + ) -> np.ndarray: + """Yield frames from the video. + + Parameters + ---------- + frame : np.ndarray + A numpy array containing loaded frame data. + format : str + Convert the data into the given format before returning it. If None, + return the data in the encoded format if it can be expressed as a + strided array; otherwise raise an Exception. + filter_sequence : List[str, str, dict] + Set the returned colorspace. If not None (default: rgb24), convert + the data into the given format before returning it. If ``None`` + return the data in the encoded format if it can be expressed as a + strided array; otherwise raise an Exception. + filter_graph : (dict, List) + If not None, apply the given graph of FFmpeg filters to each + ndimage. The graph is given as a tuple of two dicts. The first dict + contains a (named) set of nodes, and the second dict contains a set + of edges between nodes of the previous dict. Check the (module-level) + plugin docs for details and examples. + thread_count : int + How many threads to use when decoding a frame. The default is 0, + which will set the number using ffmpeg's default, which is based on + the codec, number of available cores, threadding model, and other + considerations. + thread_type : str + The threading model to be used. One of + + - `"SLICE"` (default): threads assemble parts of the current frame + - `"FRAME"`: threads may assemble future frames (faster for bulk reading) + + + Yields + ------ + frame : np.ndarray + A (decoded) video frame. + + + """ + + self._video_stream.thread_type = thread_type or "SLICE" + self._video_stream.codec_context.thread_count = thread_count + + self.set_video_filter(filter_sequence, filter_graph) + + for frame in self._decoder: + self._next_idx += 1 + + if self._video_filter is not None: + try: + frame = self._video_filter.send(frame) + except StopIteration: + break + + if frame is None: + continue + + yield self._unpack_frame(frame, format=format) + + if self._video_filter is not None: + for frame in self._video_filter: + yield self._unpack_frame(frame, format=format) + + def write( + self, + ndimage: Union[np.ndarray, List[np.ndarray]], + *, + codec: str = None, + is_batch: bool = True, + fps: int = 24, + in_pixel_format: str = "rgb24", + out_pixel_format: str = None, + filter_sequence: List[Tuple[str, Union[str, dict]]] = None, + filter_graph: Tuple[dict, List] = None, + ) -> Optional[bytes]: + """Save a ndimage as a video. + + Given a batch of frames (stacked along the first axis) or a list of + frames, encode them and add the result to the ImageResource. + + Parameters + ---------- + ndimage : ArrayLike, List[ArrayLike] + The ndimage to encode and write to the ImageResource. + codec : str + The codec to use when encoding frames. Only needed on first write + and ignored on subsequent writes. + is_batch : bool + If True (default), the ndimage is a batch of images, otherwise it is + a single image. This parameter has no effect on lists of ndimages. + fps : str + The resulting videos frames per second. + in_pixel_format : str + The pixel format of the incoming ndarray. Defaults to "rgb24" and can + be any stridable pix_fmt supported by FFmpeg. + out_pixel_format : str + The pixel format to use while encoding frames. If None (default) + use the codec's default. + filter_sequence : List[str, str, dict] + If not None, apply the given sequence of FFmpeg filters to each + ndimage. Check the (module-level) plugin docs for details and + examples. + filter_graph : (dict, List) + If not None, apply the given graph of FFmpeg filters to each + ndimage. The graph is given as a tuple of two dicts. The first dict + contains a (named) set of nodes, and the second dict contains a set + of edges between nodes of the previous dict. Check the (module-level) + plugin docs for details and examples. + + Returns + ------- + encoded_image : bytes or None + If the chosen ImageResource is the special target ``""`` then + write will return a byte string containing the encoded image data. + Otherwise, it returns None. + + Notes + ----- + When writing ````, the video is finalized immediately after the + first write call and calling write multiple times to append frames is + not possible. + + """ + + if isinstance(ndimage, list): + # frames shapes must agree for video + if any(f.shape != ndimage[0].shape for f in ndimage): + raise ValueError("All frames should have the same shape") + elif not is_batch: + ndimage = np.asarray(ndimage)[None, ...] + else: + ndimage = np.asarray(ndimage) + + if self._video_stream is None: + self.init_video_stream(codec, fps=fps, pixel_format=out_pixel_format) + + self.set_video_filter(filter_sequence, filter_graph) + + for img in ndimage: + self.write_frame(img, pixel_format=in_pixel_format) + + if self.request._uri_type == URI_BYTES: + # bytes are immutuable, so we have to flush immediately + # and can't support appending + self._flush_writer() + self._container.close() + + return self.request.get_file().getvalue() + + def properties(self, index: int = ..., *, format: str = "rgb24") -> ImageProperties: + """Standardized ndimage metadata. + + Parameters + ---------- + index : int + The index of the ndimage for which to return properties. If ``...`` + (Ellipsis, default), return the properties for the resulting batch + of frames. + format : str + If not None (default: rgb24), convert the data into the given format + before returning it. If None return the data in the encoded format + if that can be expressed as a strided array; otherwise raise an + Exception. + + Returns + ------- + properties : ImageProperties + A dataclass filled with standardized image metadata. + + Notes + ----- + This function is efficient and won't process any pixel data. + + The provided metadata does not include modifications by any filters + (through ``filter_sequence`` or ``filter_graph``). + + """ + + video_width = self._video_stream.codec_context.width + video_height = self._video_stream.codec_context.height + pix_format = format or self._video_stream.codec_context.pix_fmt + frame_template = av.VideoFrame(video_width, video_height, pix_format) + + shape = _get_frame_shape(frame_template) + if index is ...: + n_frames = self._video_stream.frames + shape = (n_frames,) + shape + + return ImageProperties( + shape=tuple(shape), + dtype=_format_to_dtype(frame_template.format), + n_images=shape[0] if index is ... else None, + is_batch=index is ..., + ) + + def metadata( + self, + index: int = ..., + exclude_applied: bool = True, + constant_framerate: bool = None, + ) -> Dict[str, Any]: + """Format-specific metadata. + + Returns a dictionary filled with metadata that is either stored in the + container, the video stream, or the frame's side-data. + + Parameters + ---------- + index : int + If ... (Ellipsis, default) return global metadata (the metadata + stored in the container and video stream). If not ..., return the + side data stored in the frame at the given index. + exclude_applied : bool + Currently, this parameter has no effect. It exists for compliance with + the ImageIO v3 API. + constant_framerate : bool + If True assume the video's framerate is constant. This allows for + faster seeking inside the file. If False, the video is reset before + each read and searched from the beginning. If None (default), this + value will be read from the container format. + + Returns + ------- + metadata : dict + A dictionary filled with format-specific metadata fields and their + values. + + """ + + metadata = dict() + + if index is ...: + # useful flags defined on the container and/or video stream + metadata.update( + { + "video_format": self._video_stream.codec_context.pix_fmt, + "codec": self._video_stream.codec.name, + "long_codec": self._video_stream.codec.long_name, + "profile": self._video_stream.profile, + "fps": float(self._video_stream.guessed_rate), + } + ) + if self._video_stream.duration is not None: + duration = float( + self._video_stream.duration * self._video_stream.time_base + ) + metadata.update({"duration": duration}) + + metadata.update(self.container_metadata) + metadata.update(self.video_stream_metadata) + return metadata + + if constant_framerate is None: + # "variable_fps" is now a flag (handle got removed). Full list at + # https://pyav.org/docs/stable/api/container.html#module-av.format + variable_fps = bool(self._container.format.flags & 0x400) + constant_framerate = not variable_fps + + self._seek(index, constant_framerate=constant_framerate) + desired_frame = next(self._decoder) + self._next_idx += 1 + + # useful flags defined on the frame + metadata.update( + { + "key_frame": bool(desired_frame.key_frame), + "time": desired_frame.time, + "interlaced_frame": bool(desired_frame.interlaced_frame), + "frame_type": _get_frame_type(desired_frame.pict_type), + } + ) + + # side data + metadata.update( + {item.type.name: bytes(item) for item in desired_frame.side_data} + ) + + return metadata + + def close(self) -> None: + """Close the Video.""" + + is_write = self.request.mode.io_mode == IOMode.write + if is_write and self._video_stream is not None: + self._flush_writer() + + if self._video_stream is not None: + self._video_stream = None + + if self._container is not None: + self._container.close() + + self.request.finish() + + def __enter__(self) -> "PyAVPlugin": + return super().__enter__() + + # ------------------------------ + # Add-on Interface inside imopen + # ------------------------------ + + def init_video_stream( + self, + codec: str, + *, + fps: float = 24, + pixel_format: str = None, + max_keyframe_interval: int = None, + force_keyframes: bool = None, + ) -> None: + """Initialize a new video stream. + + This function adds a new video stream to the ImageResource using the + selected encoder (codec), framerate, and colorspace. + + Parameters + ---------- + codec : str + The codec to use, e.g. ``"h264"`` or ``"vp9"``. + fps : float + The desired framerate of the video stream (frames per second). + pixel_format : str + The pixel format to use while encoding frames. If None (default) use + the codec's default. + max_keyframe_interval : int + The maximum distance between two intra frames (I-frames). Also known + as GOP size. If unspecified use the codec's default. Note that not + every I-frame is a keyframe; see the notes for details. + force_keyframes : bool + If True, limit inter frames dependency to frames within the current + keyframe interval (GOP), i.e., force every I-frame to be a keyframe. + If unspecified, use the codec's default. + + Notes + ----- + You can usually leave ``max_keyframe_interval`` and ``force_keyframes`` + at their default values, unless you try to generate seek-optimized video + or have a similar specialist use-case. In this case, ``force_keyframes`` + controls the ability to seek to _every_ I-frame, and + ``max_keyframe_interval`` controls how close to a random frame you can + seek. Low values allow more fine-grained seek at the expense of + file-size (and thus I/O performance). + + """ + # It may introduce `OverflowError` if `fps` is float + # which is a legacy issue of `pyav`: https://github.com/PyAV-Org/PyAV/issues/242 + fps = Fraction.from_float(fps).limit_denominator(65535) + stream = self._container.add_stream(codec, fps) + stream.time_base = Fraction(1 / fps).limit_denominator(int(2**16 - 1)) + if pixel_format is not None: + stream.pix_fmt = pixel_format + if max_keyframe_interval is not None: + stream.gop_size = max_keyframe_interval + if force_keyframes is not None: + if force_keyframes: + stream.codec_context.flags |= Flags.closed_gop + else: + stream.codec_context.flags &= ~Flags.closed_gop + + self._video_stream = stream + + def write_frame(self, frame: np.ndarray, *, pixel_format: str = "rgb24") -> None: + """Add a frame to the video stream. + + This function appends a new frame to the video. It assumes that the + stream previously has been initialized. I.e., ``init_video_stream`` has + to be called before calling this function for the write to succeed. + + Parameters + ---------- + frame : np.ndarray + The image to be appended/written to the video stream. + pixel_format : str + The colorspace (pixel format) of the incoming frame. + + Notes + ----- + Frames may be held in a buffer, e.g., by the filter pipeline used during + writing or by FFMPEG to batch them prior to encoding. Make sure to + ``.close()`` the plugin or to use a context manager to ensure that all + frames are written to the ImageResource. + + """ + + # manual packing of ndarray into frame + # (this should live in pyAV, but it doesn't support all the formats we + # want and PRs there are slow) + pixel_format = av.VideoFormat(pixel_format) + img_dtype = _format_to_dtype(pixel_format) + width = frame.shape[2 if pixel_format.is_planar else 1] + height = frame.shape[1 if pixel_format.is_planar else 0] + av_frame = av.VideoFrame(width, height, pixel_format.name) + if pixel_format.is_planar: + for idx, plane in enumerate(av_frame.planes): + plane_array = np.frombuffer(plane, dtype=img_dtype) + plane_array = as_strided( + plane_array, + shape=(plane.height, plane.width), + strides=(plane.line_size, img_dtype.itemsize), + ) + plane_array[...] = frame[idx] + else: + if pixel_format.name.startswith("bayer_"): + # ffmpeg doesn't describe bayer formats correctly + # see https://github.com/imageio/imageio/issues/761#issuecomment-1059318851 + # and following for details. + n_channels = 1 + else: + n_channels = len(pixel_format.components) + + plane = av_frame.planes[0] + plane_shape = (plane.height, plane.width) + plane_strides = (plane.line_size, n_channels * img_dtype.itemsize) + if n_channels > 1: + plane_shape += (n_channels,) + plane_strides += (img_dtype.itemsize,) + + plane_array = as_strided( + np.frombuffer(plane, dtype=img_dtype), + shape=plane_shape, + strides=plane_strides, + ) + plane_array[...] = frame + + stream = self._video_stream + if stream.codec_context.time_base: + av_frame.time_base = stream.codec_context.time_base + av_frame.pts = self.frames_written + self.frames_written += 1 + + if self._video_filter is not None: + av_frame = self._video_filter.send(av_frame) + if av_frame is None: + return + + if stream.frames == 0: + stream.width = av_frame.width + stream.height = av_frame.height + + for packet in stream.encode(av_frame): + self._container.mux(packet) + + def set_video_filter( + self, + filter_sequence: List[Tuple[str, Union[str, dict]]] = None, + filter_graph: Tuple[dict, List] = None, + ) -> None: + """Set the filter(s) to use. + + This function creates a new FFMPEG filter graph to use when reading or + writing video. In the case of reading, frames are passed through the + filter graph before begin returned and, in case of writing, frames are + passed through the filter before being written to the video. + + Parameters + ---------- + filter_sequence : List[str, str, dict] + If not None, apply the given sequence of FFmpeg filters to each + ndimage. Check the (module-level) plugin docs for details and + examples. + filter_graph : (dict, List) + If not None, apply the given graph of FFmpeg filters to each + ndimage. The graph is given as a tuple of two dicts. The first dict + contains a (named) set of nodes, and the second dict contains a set + of edges between nodes of the previous dict. Check the + (module-level) plugin docs for details and examples. + + Notes + ----- + Changing a filter graph with lag during reading or writing will + currently cause frames in the filter queue to be lost. + + """ + + if filter_sequence is None and filter_graph is None: + self._video_filter = None + return + + if filter_sequence is None: + filter_sequence = list() + + node_descriptors: Dict[str, Tuple[str, Union[str, Dict]]] + edges: List[Tuple[str, str, int, int]] + if filter_graph is None: + node_descriptors, edges = dict(), [("video_in", "video_out", 0, 0)] + else: + node_descriptors, edges = filter_graph + + graph = av.filter.Graph() + + previous_node = graph.add_buffer(template=self._video_stream) + for filter_name, argument in filter_sequence: + if isinstance(argument, str): + current_node = graph.add(filter_name, argument) + else: + current_node = graph.add(filter_name, **argument) + previous_node.link_to(current_node) + previous_node = current_node + + nodes = dict() + nodes["video_in"] = previous_node + nodes["video_out"] = graph.add("buffersink") + for name, (filter_name, arguments) in node_descriptors.items(): + if isinstance(arguments, str): + nodes[name] = graph.add(filter_name, arguments) + else: + nodes[name] = graph.add(filter_name, **arguments) + + for from_note, to_node, out_idx, in_idx in edges: + nodes[from_note].link_to(nodes[to_node], out_idx, in_idx) + + graph.configure() + + def video_filter(): + # this starts a co-routine + # send frames using graph.send() + frame = yield None + + # send and receive frames in "parallel" + while frame is not None: + graph.push(frame) + try: + frame = yield graph.pull() + except av.error.BlockingIOError: + # filter has lag and needs more frames + frame = yield None + except av.error.EOFError: + break + + try: + # send EOF in av>=9.0 + graph.push(None) + except ValueError: # pragma: no cover + # handle av<9.0 + pass + + # all frames have been sent, empty the filter + while True: + try: + yield graph.pull() + except av.error.EOFError: + break # EOF + except av.error.BlockingIOError: # pragma: no cover + # handle av<9.0 + break + + self._video_filter = video_filter() + self._video_filter.send(None) + + @property + def container_metadata(self): + """Container-specific metadata. + + A dictionary containing metadata stored at the container level. + + """ + return self._container.metadata + + @property + def video_stream_metadata(self): + """Stream-specific metadata. + + A dictionary containing metadata stored at the stream level. + + """ + return self._video_stream.metadata + + # ------------------------------- + # Internals and private functions + # ------------------------------- + + def _unpack_frame(self, frame: av.VideoFrame, *, format: str = None) -> np.ndarray: + """Convert a av.VideoFrame into a ndarray + + Parameters + ---------- + frame : av.VideoFrame + The frame to unpack. + format : str + If not None, convert the frame to the given format before unpacking. + + """ + + if format is not None: + frame = frame.reformat(format=format) + + dtype = _format_to_dtype(frame.format) + shape = _get_frame_shape(frame) + + planes = list() + for idx in range(len(frame.planes)): + n_channels = sum( + [ + x.bits // (dtype.itemsize * 8) + for x in frame.format.components + if x.plane == idx + ] + ) + av_plane = frame.planes[idx] + plane_shape = (av_plane.height, av_plane.width) + plane_strides = (av_plane.line_size, n_channels * dtype.itemsize) + if n_channels > 1: + plane_shape += (n_channels,) + plane_strides += (dtype.itemsize,) + + np_plane = as_strided( + np.frombuffer(av_plane, dtype=dtype), + shape=plane_shape, + strides=plane_strides, + ) + planes.append(np_plane) + + if len(planes) > 1: + # Note: the planes *should* exist inside a contigous memory block + # somewhere inside av.Frame however pyAV does not appear to expose this, + # so we are forced to copy the planes individually instead of wrapping + # them :( + out = np.concatenate(planes).reshape(shape) + else: + out = planes[0] + + return out + + def _seek(self, index, *, constant_framerate: bool = True) -> Generator: + """Seeks to the frame at the given index.""" + + if index == self._next_idx: + return # fast path :) + + # we must decode at least once before we seek otherwise the + # returned frames become corrupt. + if self._next_idx == 0: + next(self._decoder) + self._next_idx += 1 + + if index == self._next_idx: + return # fast path :) + + # remove this branch until I find a way to efficiently find the next + # keyframe. keeping this as a reminder + # if self._next_idx < index and index < self._next_keyframe_idx: + # frames_to_yield = index - self._next_idx + if not constant_framerate and index > self._next_idx: + frames_to_yield = index - self._next_idx + elif not constant_framerate: + # seek backwards and can't link idx and pts + self._container.seek(0) + self._decoder = self._container.decode(video=0) + self._next_idx = 0 + + frames_to_yield = index + else: + # we know that the time between consecutive frames is constant + # hence we can link index and pts + + # how many pts lie between two frames + sec_delta = 1 / self._video_stream.guessed_rate + pts_delta = sec_delta / self._video_stream.time_base + + index_pts = int(index * pts_delta) + + # this only seeks to the closed (preceeding) keyframe + self._container.seek(index_pts, stream=self._video_stream) + self._decoder = self._container.decode(video=0) + + # this may be made faster if we could get the keyframe's time without + # decoding it + keyframe = next(self._decoder) + keyframe_time = keyframe.pts * keyframe.time_base + keyframe_pts = int(keyframe_time / self._video_stream.time_base) + keyframe_index = keyframe_pts // pts_delta + + self._container.seek(index_pts, stream=self._video_stream) + self._next_idx = keyframe_index + + frames_to_yield = index - keyframe_index + + for _ in range(frames_to_yield): + next(self._decoder) + self._next_idx += 1 + + def _flush_writer(self): + """Flush the filter and encoder + + This will reset the filter to `None` and send EoF to the encoder, + i.e., after calling, no more frames may be written. + + """ + + stream = self._video_stream + + if self._video_filter is not None: + # flush encoder + for av_frame in self._video_filter: + if stream.frames == 0: + stream.width = av_frame.width + stream.height = av_frame.height + for packet in stream.encode(av_frame): + self._container.mux(packet) + self._video_filter = None + + # flush stream + for packet in stream.encode(): + self._container.mux(packet) + self._video_stream = None diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/rawpy.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/rawpy.py new file mode 100644 index 0000000000000000000000000000000000000000..220ca937fb0789110d08a4e9faefca6ceec4c91e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/rawpy.py @@ -0,0 +1,191 @@ +"""Read/Write images using rawpy. + +rawpy is an easy-to-use Python wrapper for the LibRaw library. +It also contains some extra functionality for finding and repairing hot/dead pixels. +""" + +from typing import Any, Dict, Iterator, List, Optional, Tuple, Union +import rawpy +import numpy as np + +from ..core.request import URI_BYTES, InitializationError, IOMode, Request +from ..core.v3_plugin_api import ImageProperties, PluginV3 +from ..typing import ArrayLike + + +class RawPyPlugin(PluginV3): + """A class representing the rawpy plugin. + + Methods + ------- + + .. autosummary:: + :toctree: _plugins/rawpy + + RawPyPlugin.read + """ + + def __init__(self, request: Request) -> None: + """Instantiates a new rawpy plugin object + + Parameters + ---------- + request: Request + A request object representing the resource to be operated on. + """ + + super().__init__(request) + + self._image_file = None + + if request.mode.io_mode == IOMode.read: + try: + self._image_file = rawpy.imread(request.get_file()) + except ( + rawpy.NotSupportedError, + rawpy.LibRawFileUnsupportedError, + rawpy.LibRawIOError, + ): + if request._uri_type == URI_BYTES: + raise InitializationError( + "RawPy can not read the provided bytes." + ) from None + else: + raise InitializationError( + f"RawPy can not read {request.raw_uri}." + ) from None + elif request.mode.io_mode == IOMode.write: + raise InitializationError("RawPy does not support writing.") from None + + def close(self) -> None: + if self._image_file: + self._image_file.close() + + self._request.finish() + + def read(self, *, index: int = 0, **kwargs) -> np.ndarray: + """Read Raw Image. + + Returns + ------- + nd_image: ndarray + The image data + """ + + nd_image: np.ndarray + + try: + nd_image = self._image_file.postprocess(**kwargs) + except Exception: + pass + + if index is Ellipsis: + nd_image = nd_image[None, ...] + + return nd_image + + def write(self, ndimage: Union[ArrayLike, List[ArrayLike]]) -> Optional[bytes]: + """RawPy does not support writing.""" + raise NotImplementedError() + + def iter(self) -> Iterator[np.ndarray]: + """Load the image. + + Returns + ------- + nd_image: ndarray + The image data + """ + + try: + yield self.read() + except Exception: + pass + + def metadata( + self, index: int = None, exclude_applied: bool = True + ) -> Dict[str, Any]: + """Read ndimage metadata. + + Parameters + ---------- + exclude_applied : bool + If True, exclude metadata fields that are applied to the image while + reading. For example, if the binary data contains a rotation flag, + the image is rotated by default and the rotation flag is excluded + from the metadata to avoid confusion. + + Returns + ------- + metadata : dict + A dictionary of format-specific metadata. + + """ + + metadata = {} + + image_size = self._image_file.sizes + + metadata["black_level_per_channel"] = self._image_file.black_level_per_channel + metadata["camera_white_level_per_channel"] = ( + self._image_file.camera_white_level_per_channel + ) + metadata["color_desc"] = self._image_file.color_desc + metadata["color_matrix"] = self._image_file.color_matrix + metadata["daylight_whitebalance"] = self._image_file.daylight_whitebalance + metadata["dtype"] = self._image_file.raw_image.dtype + metadata["flip"] = image_size.flip + metadata["num_colors"] = self._image_file.num_colors + metadata["tone_curve"] = self._image_file.tone_curve + metadata["width"] = image_size.width + metadata["height"] = image_size.height + metadata["raw_width"] = image_size.raw_width + metadata["raw_height"] = image_size.raw_height + metadata["raw_shape"] = self._image_file.raw_image.shape + metadata["iwidth"] = image_size.iwidth + metadata["iheight"] = image_size.iheight + metadata["pixel_aspect"] = image_size.pixel_aspect + metadata["white_level"] = self._image_file.white_level + + if exclude_applied: + metadata.pop("black_level_per_channel", None) + metadata.pop("camera_white_level_per_channel", None) + metadata.pop("color_desc", None) + metadata.pop("color_matrix", None) + metadata.pop("daylight_whitebalance", None) + metadata.pop("dtype", None) + metadata.pop("flip", None) + metadata.pop("num_colors", None) + metadata.pop("tone_curve", None) + metadata.pop("raw_width", None) + metadata.pop("raw_height", None) + metadata.pop("raw_shape", None) + metadata.pop("iwidth", None) + metadata.pop("iheight", None) + metadata.pop("white_level", None) + + return metadata + + def properties(self, index: int = None) -> ImageProperties: + """Standardized ndimage metadata + + Returns + ------- + properties : ImageProperties + A dataclass filled with standardized image metadata. + + Notes + ----- + This does not decode pixel data and is fast for large images. + + """ + + ImageSize = self._image_file.sizes + + width: int = ImageSize.width + height: int = ImageSize.height + shape: Tuple[int, ...] = (height, width) + + dtype = self._image_file.raw_image.dtype + + return ImageProperties(shape=shape, dtype=dtype) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/simpleitk.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/simpleitk.py new file mode 100644 index 0000000000000000000000000000000000000000..e1580c0764d0c6ddc03e39be417664c0b701f4d7 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/simpleitk.py @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +"""Read/Write images using SimpleITK. + +Backend: `Insight Toolkit `_ + +.. note:: + To use this plugin you have to install its backend:: + + pip install imageio[itk] + +The ItkFormat uses the ITK or SimpleITK library to support a range of +ITK-related formats. It also supports a few common formats (e.g. PNG and JPEG). + +Parameters +---------- +None + +""" + +from ..core import Format, has_module + +_itk = None # Defer loading to load_lib() function. + + +def load_lib(): + global _itk, _read_function, _write_function + try: + import itk as _itk + + _read_function = _itk.imread + _write_function = _itk.imwrite + except ImportError: + try: + import SimpleITK as _itk + + _read_function = _itk.ReadImage + _write_function = _itk.WriteImage + except ImportError: + raise ImportError( + "itk could not be found. " + "Please try " + " python -m pip install itk " + "or " + " python -m pip install simpleitk " + "or refer to " + " https://itkpythonpackage.readthedocs.io/ " + "for further instructions." + ) + return _itk + + +# Split up in real ITK and all supported formats. +ITK_FORMATS = ( + ".gipl", + ".ipl", + ".mha", + ".mhd", + ".nhdr", + "nia", + "hdr", + ".nrrd", + ".nii", + ".nii.gz", + ".img", + ".img.gz", + ".vtk", + "hdf5", + "lsm", + "mnc", + "mnc2", + "mgh", + "mnc", + "pic", +) +ALL_FORMATS = ITK_FORMATS + ( + ".bmp", + ".jpeg", + ".jpg", + ".png", + ".tiff", + ".tif", + ".dicom", + ".dcm", + ".gdcm", +) + + +class ItkFormat(Format): + """See :mod:`imageio.plugins.simpleitk`""" + + def _can_read(self, request): + # If the request is a format that only this plugin can handle, + # we report that we can do it; a useful error will be raised + # when simpleitk is not installed. For the more common formats + # we only report that we can read if the library is installed. + if request.extension in ITK_FORMATS: + return True + if has_module("itk.ImageIOBase") or has_module("SimpleITK"): + return request.extension in ALL_FORMATS + + def _can_write(self, request): + if request.extension in ITK_FORMATS: + return True + if has_module("itk.ImageIOBase") or has_module("SimpleITK"): + return request.extension in ALL_FORMATS + + # -- reader + + class Reader(Format.Reader): + def _open(self, pixel_type=None, fallback_only=None, **kwargs): + if not _itk: + load_lib() + args = () + if pixel_type is not None: + args += (pixel_type,) + if fallback_only is not None: + args += (fallback_only,) + self._img = _read_function(self.request.get_local_filename(), *args) + + def _get_length(self): + return 1 + + def _close(self): + pass + + def _get_data(self, index): + # Get data + if index != 0: + error_msg = "Index out of range while reading from itk file" + raise IndexError(error_msg) + + # Return array and empty meta data + return _itk.GetArrayFromImage(self._img), {} + + def _get_meta_data(self, index): + error_msg = "The itk plugin does not support meta data, currently." + raise RuntimeError(error_msg) + + # -- writer + class Writer(Format.Writer): + def _open(self): + if not _itk: + load_lib() + + def _close(self): + pass + + def _append_data(self, im, meta): + _itk_img = _itk.GetImageFromArray(im) + _write_function(_itk_img, self.request.get_local_filename()) + + def set_meta_data(self, meta): + error_msg = "The itk plugin does not support meta data, currently." + raise RuntimeError(error_msg) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/spe.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/spe.py new file mode 100644 index 0000000000000000000000000000000000000000..da743d4a64ff70ef8c922e7afab112fdbcc671a9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/spe.py @@ -0,0 +1,955 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +"""Read SPE files. + +This plugin supports reading files saved in the Princeton Instruments +SPE file format. + +Parameters +---------- +check_filesize : bool + The number of frames in the file is stored in the file header. However, + this number may be wrong for certain software. If this is `True` + (default), derive the number of frames also from the file size and + raise a warning if the two values do not match. +char_encoding : str + Deprecated. Exists for backwards compatibility; use ``char_encoding`` of + ``metadata`` instead. +sdt_meta : bool + Deprecated. Exists for backwards compatibility; use ``sdt_control`` of + ``metadata`` instead. + +Methods +------- +.. note:: + Check the respective function for a list of supported kwargs and detailed + documentation. + +.. autosummary:: + :toctree: + + SpePlugin.read + SpePlugin.iter + SpePlugin.properties + SpePlugin.metadata + +""" + +from datetime import datetime +import logging +import os +from typing import ( + Any, + Callable, + Dict, + Iterator, + List, + Mapping, + Optional, + Sequence, + Tuple, + Union, +) +import warnings + +import numpy as np + +from ..core.request import Request, IOMode, InitializationError +from ..core.v3_plugin_api import PluginV3, ImageProperties + + +logger = logging.getLogger(__name__) + + +class Spec: + """SPE file specification data + + Tuples of (offset, datatype, count), where offset is the offset in the SPE + file and datatype is the datatype as used in `numpy.fromfile`() + + `data_start` is the offset of actual image data. + + `dtypes` translates SPE datatypes (0...4) to numpy ones, e. g. dtypes[0] + is dtype(" Tuple[int, int]: + """Get the version of SDT-control metadata encoded in the comments + + Parameters + ---------- + comments + List of SPE file comments, typically ``metadata["comments"]``. + + Returns + ------- + Major and minor version. ``-1, -1`` if detection failed. + """ + if comments[4][70:76] != "COMVER": + return -1, -1 + try: + return int(comments[4][76:78]), int(comments[4][78:80]) + except ValueError: + return -1, -1 + + @staticmethod + def parse_comments( + comments: Sequence[str], version: Tuple[int, int] + ) -> Dict[str, Any]: + """Extract SDT-control metadata from comments + + Parameters + ---------- + comments + List of SPE file comments, typically ``metadata["comments"]``. + version + Major and minor version of SDT-control metadata format + + Returns + ------- + Dict of metadata + """ + sdt_md = {} + for minor in range(version[1] + 1): + # Metadata with same major version is backwards compatible. + # Fields are specified incrementally in `comment_fields`. + # E.g. if the file has version 5.01, `comment_fields[5, 0]` and + # `comment_fields[5, 1]` need to be decoded. + try: + cmt = __class__.comment_fields[version[0], minor] + except KeyError: + continue + for name, spec in cmt.items(): + try: + v = spec.cvt(comments[spec.n][spec.slice]) + if spec.scale is not None: + v *= spec.scale + sdt_md[name] = v + except Exception as e: + warnings.warn( + f"Failed to decode SDT-control metadata field `{name}`: {e}" + ) + sdt_md[name] = None + if version not in __class__.comment_fields: + supported_ver = ", ".join( + map(lambda x: f"{x[0]}.{x[1]:02}", __class__.comment_fields) + ) + warnings.warn( + f"Unsupported SDT-control metadata version {version[0]}.{version[1]:02}. " + f"Only versions {supported_ver} are supported. " + "Some or all SDT-control metadata may be missing." + ) + comment = comments[0] + comments[2] + sdt_md["comment"] = comment.strip() + return sdt_md + + @staticmethod + def get_datetime(date: str, time: str) -> Union[datetime, None]: + """Turn date and time saved by SDT-control into proper datetime object + + Parameters + ---------- + date + SPE file date, typically ``metadata["date"]``. + time + SPE file date, typically ``metadata["time_local"]``. + + Returns + ------- + File's datetime if parsing was succsessful, else None. + """ + try: + month = __class__.months[date[2:5]] + return datetime( + int(date[5:9]), + month, + int(date[0:2]), + int(time[0:2]), + int(time[2:4]), + int(time[4:6]), + ) + except Exception as e: + logger.info(f"Failed to decode date from SDT-control metadata: {e}.") + + @staticmethod + def extract_metadata(meta: Mapping, char_encoding: str = "latin1"): + """Extract SDT-control metadata from SPE metadata + + SDT-control stores some metadata in comments and other fields. + Extract them and remove unused entries. + + Parameters + ---------- + meta + SPE file metadata. Modified in place. + char_encoding + Character encoding used to decode strings in the metadata. + """ + comver = __class__.get_comment_version(meta["comments"]) + if any(c < 0 for c in comver): + # This file most likely was not created by SDT-control + logger.debug("SDT-control comments not found.") + return + + sdt_meta = __class__.parse_comments(meta["comments"], comver) + meta.pop("comments") + meta.update(sdt_meta) + + # Get date and time in a usable format + dt = __class__.get_datetime(meta["date"], meta["time_local"]) + if dt: + meta["datetime"] = dt + meta.pop("date") + meta.pop("time_local") + + sp4 = meta["spare_4"] + try: + meta["modulation_script"] = sp4.decode(char_encoding) + meta.pop("spare_4") + except UnicodeDecodeError: + warnings.warn( + "Failed to decode SDT-control laser " + "modulation script. Bad char_encoding?" + ) + + # Get rid of unused data + meta.pop("time_utc") + meta.pop("exposure_sec") + + +class SpePlugin(PluginV3): + def __init__( + self, + request: Request, + check_filesize: bool = True, + char_encoding: Optional[str] = None, + sdt_meta: Optional[bool] = None, + ) -> None: + """Instantiate a new SPE file plugin object + + Parameters + ---------- + request : Request + A request object representing the resource to be operated on. + check_filesize : bool + If True, compute the number of frames from the filesize, compare it + to the frame count in the file header, and raise a warning if the + counts don't match. (Certain software may create files with + char_encoding : str + Deprecated. Exists for backwards compatibility; use ``char_encoding`` of + ``metadata`` instead. + sdt_meta : bool + Deprecated. Exists for backwards compatibility; use ``sdt_control`` of + ``metadata`` instead. + + """ + + super().__init__(request) + if request.mode.io_mode == IOMode.write: + raise InitializationError("cannot write SPE files") + + if char_encoding is not None: + warnings.warn( + "Passing `char_encoding` to the constructor is deprecated. " + "Use `char_encoding` parameter of the `metadata()` method " + "instead.", + DeprecationWarning, + ) + self._char_encoding = char_encoding + if sdt_meta is not None: + warnings.warn( + "Passing `sdt_meta` to the constructor is deprecated. " + "Use `sdt_control` parameter of the `metadata()` method " + "instead.", + DeprecationWarning, + ) + self._sdt_meta = sdt_meta + + self._file = self.request.get_file() + + try: + # Spec.basic contains no string, no need to worry about character + # encoding. + info = self._parse_header(Spec.basic, "latin1") + self._file_header_ver = info["file_header_ver"] + self._dtype = Spec.dtypes[info["datatype"]] + self._shape = (info["ydim"], info["xdim"]) + self._len = info["NumFrames"] + + if check_filesize: + # Some software writes incorrect `NumFrames` metadata. + # To determine the number of frames, check the size of the data + # segment -- until the end of the file for SPE<3, until the + # xml footer for SPE>=3. + if info["file_header_ver"] >= 3: + data_end = info["xml_footer_offset"] + else: + self._file.seek(0, os.SEEK_END) + data_end = self._file.tell() + line = data_end - Spec.data_start + line //= self._shape[0] * self._shape[1] * self._dtype.itemsize + if line != self._len: + warnings.warn( + f"The file header of {self.request.filename} claims there are " + f"{self._len} frames, but there are actually {line} frames." + ) + self._len = min(line, self._len) + self._file.seek(Spec.data_start) + except Exception: + raise InitializationError("SPE plugin cannot read the provided file.") + + def read(self, *, index: int = ...) -> np.ndarray: + """Read a frame or all frames from the file + + Parameters + ---------- + index : int + Select the index-th frame from the file. If index is `...`, + select all frames and stack them along a new axis. + + Returns + ------- + A Numpy array of pixel values. + + """ + + if index is Ellipsis: + read_offset = Spec.data_start + count = self._shape[0] * self._shape[1] * self._len + out_shape = (self._len, *self._shape) + elif index < 0: + raise IndexError(f"Index `{index}` is smaller than 0.") + elif index >= self._len: + raise IndexError( + f"Index `{index}` exceeds the number of frames stored in this file (`{self._len}`)." + ) + else: + read_offset = ( + Spec.data_start + + index * self._shape[0] * self._shape[1] * self._dtype.itemsize + ) + count = self._shape[0] * self._shape[1] + out_shape = self._shape + + self._file.seek(read_offset) + data = np.fromfile(self._file, dtype=self._dtype, count=count) + return data.reshape(out_shape) + + def iter(self) -> Iterator[np.ndarray]: + """Iterate over the frames in the file + + Yields + ------ + A Numpy array of pixel values. + """ + + return (self.read(index=i) for i in range(self._len)) + + def metadata( + self, + index: int = ..., + exclude_applied: bool = True, + char_encoding: str = "latin1", + sdt_control: bool = True, + ) -> Dict[str, Any]: + """SPE specific metadata. + + Parameters + ---------- + index : int + Ignored as SPE files only store global metadata. + exclude_applied : bool + Ignored. Exists for API compatibility. + char_encoding : str + The encoding to use when parsing strings. + sdt_control : bool + If `True`, decode special metadata written by the + SDT-control software if present. + + Returns + ------- + metadata : dict + Key-value pairs of metadata. + + Notes + ----- + SPE v3 stores metadata as XML, whereas SPE v2 uses a binary format. + + .. rubric:: Supported SPE v2 Metadata fields + + ROIs : list of dict + Regions of interest used for recording images. Each dict has the + "top_left" key containing x and y coordinates of the top left corner, + the "bottom_right" key with x and y coordinates of the bottom right + corner, and the "bin" key with number of binned pixels in x and y + directions. + comments : list of str + The SPE format allows for 5 comment strings of 80 characters each. + controller_version : int + Hardware version + logic_output : int + Definition of output BNC + amp_hi_cap_low_noise : int + Amp switching mode + mode : int + Timing mode + exp_sec : float + Alternative exposure in seconds + date : str + Date string + detector_temp : float + Detector temperature + detector_type : int + CCD / diode array type + st_diode : int + Trigger diode + delay_time : float + Used with async mode + shutter_control : int + Normal, disabled open, or disabled closed + absorb_live : bool + on / off + absorb_mode : int + Reference strip or file + can_do_virtual_chip : bool + True or False whether chip can do virtual chip + threshold_min_live : bool + on / off + threshold_min_val : float + Threshold minimum value + threshold_max_live : bool + on / off + threshold_max_val : float + Threshold maximum value + time_local : str + Experiment local time + time_utc : str + Experiment UTC time + adc_offset : int + ADC offset + adc_rate : int + ADC rate + adc_type : int + ADC type + adc_resolution : int + ADC resolution + adc_bit_adjust : int + ADC bit adjust + gain : int + gain + sw_version : str + Version of software which created this file + spare_4 : bytes + Reserved space + readout_time : float + Experiment readout time + type : str + Controller type + clockspeed_us : float + Vertical clock speed in microseconds + readout_mode : ["full frame", "frame transfer", "kinetics", ""] + Readout mode. Empty string means that this was not set by the + Software. + window_size : int + Window size for Kinetics mode + file_header_ver : float + File header version + chip_size : [int, int] + x and y dimensions of the camera chip + virt_chip_size : [int, int] + Virtual chip x and y dimensions + pre_pixels : [int, int] + Pre pixels in x and y dimensions + post_pixels : [int, int], + Post pixels in x and y dimensions + geometric : list of {"rotate", "reverse", "flip"} + Geometric operations + sdt_major_version : int + (only for files created by SDT-control) + Major version of SDT-control software + sdt_minor_version : int + (only for files created by SDT-control) + Minor version of SDT-control software + sdt_controller_name : str + (only for files created by SDT-control) + Controller name + exposure_time : float + (only for files created by SDT-control) + Exposure time in seconds + color_code : str + (only for files created by SDT-control) + Color channels used + detection_channels : int + (only for files created by SDT-control) + Number of channels + background_subtraction : bool + (only for files created by SDT-control) + Whether background subtraction war turned on + em_active : bool + (only for files created by SDT-control) + Whether EM was turned on + em_gain : int + (only for files created by SDT-control) + EM gain + modulation_active : bool + (only for files created by SDT-control) + Whether laser modulation (“attenuate”) was turned on + pixel_size : float + (only for files created by SDT-control) + Camera pixel size + sequence_type : str + (only for files created by SDT-control) + Type of sequnce (standard, TOCCSL, arbitrary, …) + grid : float + (only for files created by SDT-control) + Sequence time unit (“grid size”) in seconds + n_macro : int + (only for files created by SDT-control) + Number of macro loops + delay_macro : float + (only for files created by SDT-control) + Time between macro loops in seconds + n_mini : int + (only for files created by SDT-control) + Number of mini loops + delay_mini : float + (only for files created by SDT-control) + Time between mini loops in seconds + n_micro : int (only for files created by SDT-control) + Number of micro loops + delay_micro : float (only for files created by SDT-control) + Time between micro loops in seconds + n_subpics : int + (only for files created by SDT-control) + Number of sub-pictures + delay_shutter : float + (only for files created by SDT-control) + Camera shutter delay in seconds + delay_prebleach : float + (only for files created by SDT-control) + Pre-bleach delay in seconds + bleach_time : float + (only for files created by SDT-control) + Bleaching time in seconds + recovery_time : float + (only for files created by SDT-control) + Recovery time in seconds + comment : str + (only for files created by SDT-control) + User-entered comment. This replaces the "comments" field. + datetime : datetime.datetime + (only for files created by SDT-control) + Combines the "date" and "time_local" keys. The latter two plus + "time_utc" are removed. + modulation_script : str + (only for files created by SDT-control) + Laser modulation script. Replaces the "spare_4" key. + bleach_piezo_active : bool + (only for files created by SDT-control) + Whether piezo for bleaching was enabled + """ + + if self._file_header_ver < 3: + if self._char_encoding is not None: + char_encoding = self._char_encoding + if self._sdt_meta is not None: + sdt_control = self._sdt_meta + return self._metadata_pre_v3(char_encoding, sdt_control) + return self._metadata_post_v3() + + def _metadata_pre_v3(self, char_encoding: str, sdt_control: bool) -> Dict[str, Any]: + """Extract metadata from SPE v2 files + + Parameters + ---------- + char_encoding + String character encoding + sdt_control + If `True`, try to decode special metadata written by the + SDT-control software. + + Returns + ------- + dict mapping metadata names to values. + + """ + + m = self._parse_header(Spec.metadata, char_encoding) + + nr = m.pop("NumROI", None) + nr = 1 if nr < 1 else nr + m["ROIs"] = roi_array_to_dict(m["ROIs"][:nr]) + + # chip sizes + m["chip_size"] = [m.pop(k, None) for k in ("xDimDet", "yDimDet")] + m["virt_chip_size"] = [m.pop(k, None) for k in ("VChipXdim", "VChipYdim")] + m["pre_pixels"] = [m.pop(k, None) for k in ("XPrePixels", "YPrePixels")] + m["post_pixels"] = [m.pop(k, None) for k in ("XPostPixels", "YPostPixels")] + + # convert comments from numpy.str_ to str + m["comments"] = [str(c) for c in m["comments"]] + + # geometric operations + g = [] + f = m.pop("geometric", 0) + if f & 1: + g.append("rotate") + if f & 2: + g.append("reverse") + if f & 4: + g.append("flip") + m["geometric"] = g + + # Make some additional information more human-readable + t = m["type"] + if 1 <= t <= len(Spec.controllers): + m["type"] = Spec.controllers[t - 1] + else: + m["type"] = None + r = m["readout_mode"] + if 1 <= r <= len(Spec.readout_modes): + m["readout_mode"] = Spec.readout_modes[r - 1] + else: + m["readout_mode"] = None + + # bools + for k in ( + "absorb_live", + "can_do_virtual_chip", + "threshold_min_live", + "threshold_max_live", + ): + m[k] = bool(m[k]) + + # Extract SDT-control metadata if desired + if sdt_control: + SDTControlSpec.extract_metadata(m, char_encoding) + + return m + + def _metadata_post_v3(self) -> Dict[str, Any]: + """Extract XML metadata from SPE v3 files + + Returns + ------- + dict with key `"__xml"`, whose value is the XML metadata + """ + + info = self._parse_header(Spec.basic, "latin1") + self._file.seek(info["xml_footer_offset"]) + xml = self._file.read() + return {"__xml": xml} + + def properties(self, index: int = ...) -> ImageProperties: + """Standardized ndimage metadata. + + Parameters + ---------- + index : int + If the index is an integer, select the index-th frame and return + its properties. If index is an Ellipsis (...), return the + properties of all frames in the file stacked along a new batch + dimension. + + Returns + ------- + properties : ImageProperties + A dataclass filled with standardized image metadata. + """ + + if index is Ellipsis: + return ImageProperties( + shape=(self._len, *self._shape), + dtype=self._dtype, + n_images=self._len, + is_batch=True, + ) + return ImageProperties(shape=self._shape, dtype=self._dtype, is_batch=False) + + def _parse_header( + self, spec: Mapping[str, Tuple], char_encoding: str + ) -> Dict[str, Any]: + """Get information from SPE file header + + Parameters + ---------- + spec + Maps header entry name to its location, data type description and + optionally number of entries. See :py:attr:`Spec.basic` and + :py:attr:`Spec.metadata`. + char_encoding + String character encoding + + Returns + ------- + Dict mapping header entry name to its value + """ + + ret = {} + # Decode each string from the numpy array read by np.fromfile + decode = np.vectorize(lambda x: x.decode(char_encoding)) + + for name, sp in spec.items(): + self._file.seek(sp[0]) + cnt = 1 if len(sp) < 3 else sp[2] + v = np.fromfile(self._file, dtype=sp[1], count=cnt) + if v.dtype.kind == "S" and name not in Spec.no_decode: + # Silently ignore string decoding failures + try: + v = decode(v) + except Exception: + warnings.warn( + f'Failed to decode "{name}" metadata ' + "string. Check `char_encoding` parameter." + ) + + try: + # For convenience, if the array contains only one single + # entry, return this entry itself. + v = v.item() + except ValueError: + v = np.squeeze(v) + ret[name] = v + return ret + + +def roi_array_to_dict(a: np.ndarray) -> List[Dict[str, List[int]]]: + """Convert the `ROIs` structured arrays to :py:class:`dict` + + Parameters + ---------- + a + Structured array containing ROI data + + Returns + ------- + One dict per ROI. Keys are "top_left", "bottom_right", and "bin", + values are tuples whose first element is the x axis value and the + second element is the y axis value. + """ + + dict_list = [] + a = a[["startx", "starty", "endx", "endy", "groupx", "groupy"]] + for sx, sy, ex, ey, gx, gy in a: + roi_dict = { + "top_left": [int(sx), int(sy)], + "bottom_right": [int(ex), int(ey)], + "bin": [int(gx), int(gy)], + } + dict_list.append(roi_dict) + return dict_list diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/swf.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/swf.py new file mode 100644 index 0000000000000000000000000000000000000000..4e945460680047b76518f5b70a12dc66413092a9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/swf.py @@ -0,0 +1,336 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +"""Read/Write SWF files. + +Backend: internal + +Shockwave flash (SWF) is a media format designed for rich and +interactive animations. This plugin makes use of this format to +store a series of images in a lossless format with good compression +(zlib). The resulting images can be shown as an animation using +a flash player (such as the browser). + +SWF stores images in RGBA format. RGB or grayscale images are +automatically converted. SWF does not support meta data. + +Parameters for reading +---------------------- +loop : bool + If True, the video will rewind as soon as a frame is requested + beyond the last frame. Otherwise, IndexError is raised. Default False. + +Parameters for saving +--------------------- +fps : int + The speed to play the animation. Default 12. +loop : bool + If True, add a tag to the end of the file to play again from + the first frame. Most flash players will then play the movie + in a loop. Note that the imageio SWF Reader does not check this + tag. Default True. +html : bool + If the output is a file on the file system, write an html file + (in HTML5) that shows the animation. Default False. +compress : bool + Whether to compress the swf file. Default False. You probably don't + want to use this. This does not decrease the file size since + the images are already compressed. It will result in slower + read and write time. The only purpose of this feature is to + create compressed SWF files, so that we can test the + functionality to read them. + +""" + +import os +import zlib +import logging +from io import BytesIO + +import numpy as np + +from ..core import Format, read_n_bytes, image_as_uint + + +logger = logging.getLogger(__name__) + +_swf = None # lazily loaded in lib() + + +def load_lib(): + global _swf + from . import _swf + + return _swf + + +class SWFFormat(Format): + """See :mod:`imageio.plugins.swf`""" + + def _can_read(self, request): + tmp = request.firstbytes[0:3].decode("ascii", "ignore") + if tmp in ("FWS", "CWS"): + return True + + def _can_write(self, request): + if request.extension in self.extensions: + return True + + # -- reader + + class Reader(Format.Reader): + def _open(self, loop=False): + if not _swf: + load_lib() + + self._arg_loop = bool(loop) + + self._fp = self.request.get_file() + + # Check file ... + tmp = self.request.firstbytes[0:3].decode("ascii", "ignore") + if tmp == "FWS": + pass # OK + elif tmp == "CWS": + # Compressed, we need to decompress + bb = self._fp.read() + bb = bb[:8] + zlib.decompress(bb[8:]) + # Wrap up in a file object + self._fp = BytesIO(bb) + else: + raise IOError("This does not look like a valid SWF file") + + # Skip first bytes. This also tests support got seeking ... + try: + self._fp.seek(8) + self._streaming_mode = False + except Exception: + self._streaming_mode = True + self._fp_read(8) + + # Skip header + # Note that the number of frames is there, which we could + # potentially use, but the number of frames does not necessarily + # correspond to the number of images. + nbits = _swf.bits2int(self._fp_read(1), 5) + nbits = 5 + nbits * 4 + Lrect = nbits / 8.0 + if Lrect % 1: + Lrect += 1 + Lrect = int(Lrect) + self._fp_read(Lrect + 3) + + # Now the rest is basically tags ... + self._imlocs = [] # tuple (loc, sze, T, L1) + if not self._streaming_mode: + # Collect locations of frame, while skipping through the data + # This does not read any of the tag *data*. + try: + while True: + isimage, sze, T, L1 = self._read_one_tag() + loc = self._fp.tell() + if isimage: + # Still need to check if the format is right + format = ord(self._fp_read(3)[2:]) + if format == 5: # RGB or RGBA lossless + self._imlocs.append((loc, sze, T, L1)) + self._fp.seek(loc + sze) # Skip over tag + except IndexError: + pass # done reading + + def _fp_read(self, n): + return read_n_bytes(self._fp, n) + + def _close(self): + pass + + def _get_length(self): + if self._streaming_mode: + return np.inf + else: + return len(self._imlocs) + + def _get_data(self, index): + # Check index + if index < 0: + raise IndexError("Index in swf file must be > 0") + if not self._streaming_mode: + if self._arg_loop and self._imlocs: + index = index % len(self._imlocs) + if index >= len(self._imlocs): + raise IndexError("Index out of bounds") + + if self._streaming_mode: + # Walk over tags until we find an image + while True: + isimage, sze, T, L1 = self._read_one_tag() + bb = self._fp_read(sze) # always read data + if isimage: + im = _swf.read_pixels(bb, 0, T, L1) # can be None + if im is not None: + return im, {} + + else: + # Go to corresponding location, read data, and convert to image + loc, sze, T, L1 = self._imlocs[index] + self._fp.seek(loc) + bb = self._fp_read(sze) + # Read_pixels should return ndarry, since we checked format + im = _swf.read_pixels(bb, 0, T, L1) + return im, {} + + def _read_one_tag(self): + """ + Return (True, loc, size, T, L1) if an image that we can read. + Return (False, loc, size, T, L1) if any other tag. + """ + + # Get head + head = self._fp_read(6) + if not head: # pragma: no cover + raise IndexError("Reached end of swf movie") + + # Determine type and length + T, L1, L2 = _swf.get_type_and_len(head) + if not L2: # pragma: no cover + raise RuntimeError("Invalid tag length, could not proceed") + + # Read data + isimage = False + sze = L2 - 6 + # bb = self._fp_read(L2 - 6) + + # Parse tag + if T == 0: + raise IndexError("Reached end of swf movie") + elif T in [20, 36]: + isimage = True + # im = _swf.read_pixels(bb, 0, T, L1) # can be None + elif T in [6, 21, 35, 90]: # pragma: no cover + logger.warning("Ignoring JPEG image: cannot read JPEG.") + else: + pass # Not an image tag + + # Done. Return image. Can be None + # return im + return isimage, sze, T, L1 + + def _get_meta_data(self, index): + return {} # This format does not support meta data + + # -- writer + + class Writer(Format.Writer): + def _open(self, fps=12, loop=True, html=False, compress=False): + if not _swf: + load_lib() + + self._arg_fps = int(fps) + self._arg_loop = bool(loop) + self._arg_html = bool(html) + self._arg_compress = bool(compress) + + self._fp = self.request.get_file() + self._framecounter = 0 + self._framesize = (100, 100) + + # For compress, we use an in-memory file object + if self._arg_compress: + self._fp_real = self._fp + self._fp = BytesIO() + + def _close(self): + self._complete() + # Get size of (uncompressed) file + sze = self._fp.tell() + # set nframes, this is in the potentially compressed region + self._fp.seek(self._location_to_save_nframes) + self._fp.write(_swf.int2uint16(self._framecounter)) + # Compress body? + if self._arg_compress: + bb = self._fp.getvalue() + self._fp = self._fp_real + self._fp.write(bb[:8]) + self._fp.write(zlib.compress(bb[8:])) + sze = self._fp.tell() # renew sze value + # set size + self._fp.seek(4) + self._fp.write(_swf.int2uint32(sze)) + self._fp = None # Disable + + # Write html? + if self._arg_html and os.path.isfile(self.request.filename): + dirname, fname = os.path.split(self.request.filename) + filename = os.path.join(dirname, fname[:-4] + ".html") + w, h = self._framesize + html = HTML % (fname, w, h, fname) + with open(filename, "wb") as f: + f.write(html.encode("utf-8")) + + def _write_header(self, framesize, fps): + self._framesize = framesize + # Called as soon as we know framesize; when we get first frame + bb = b"" + bb += "FC"[self._arg_compress].encode("ascii") + bb += "WS".encode("ascii") # signature bytes + bb += _swf.int2uint8(8) # version + bb += "0000".encode("ascii") # FileLength (leave open for now) + bb += ( + _swf.Tag().make_rect_record(0, framesize[0], 0, framesize[1]).tobytes() + ) + bb += _swf.int2uint8(0) + _swf.int2uint8(fps) # FrameRate + self._location_to_save_nframes = len(bb) + bb += "00".encode("ascii") # nframes (leave open for now) + self._fp.write(bb) + + # Write some initial tags + taglist = _swf.FileAttributesTag(), _swf.SetBackgroundTag(0, 0, 0) + for tag in taglist: + self._fp.write(tag.get_tag()) + + def _complete(self): + # What if no images were saved? + if not self._framecounter: + self._write_header((10, 10), self._arg_fps) + # Write stop tag if we do not loop + if not self._arg_loop: + self._fp.write(_swf.DoActionTag("stop").get_tag()) + # finish with end tag + self._fp.write("\x00\x00".encode("ascii")) + + def _append_data(self, im, meta): + # Correct shape and type + if im.ndim == 3 and im.shape[-1] == 1: + im = im[:, :, 0] + im = image_as_uint(im, bitdepth=8) + # Get frame size + wh = im.shape[1], im.shape[0] + # Write header on first frame + isfirstframe = False + if self._framecounter == 0: + isfirstframe = True + self._write_header(wh, self._arg_fps) + # Create tags + bm = _swf.BitmapTag(im) + sh = _swf.ShapeTag(bm.id, (0, 0), wh) + po = _swf.PlaceObjectTag(1, sh.id, move=(not isfirstframe)) + sf = _swf.ShowFrameTag() + # Write tags + for tag in [bm, sh, po, sf]: + self._fp.write(tag.get_tag()) + self._framecounter += 1 + + def set_meta_data(self, meta): + pass + + +HTML = """ + + + + Show Flash animation %s + + + + +""" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/tifffile.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/tifffile.py new file mode 100644 index 0000000000000000000000000000000000000000..23d516c390f4d592ac71a2af50b9e3c246b69c1c --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/tifffile.py @@ -0,0 +1,561 @@ +# -*- coding: utf-8 -*- +# imageio is distributed under the terms of the (new) BSD License. + +"""Read/Write TIFF files. + +Backend: internal + +Provides support for a wide range of Tiff images using the tifffile +backend. + +Parameters for reading +---------------------- +offset : int + Optional start position of embedded file. By default this is + the current file position. +size : int + Optional size of embedded file. By default this is the number + of bytes from the 'offset' to the end of the file. +multifile : bool + If True (default), series may include pages from multiple files. + Currently applies to OME-TIFF only. +multifile_close : bool + If True (default), keep the handles of other files in multifile + series closed. This is inefficient when few files refer to + many pages. If False, the C runtime may run out of resources. + +Parameters for saving +--------------------- +bigtiff : bool + If True, the BigTIFF format is used. +byteorder : {'<', '>'} + The endianness of the data in the file. + By default this is the system's native byte order. +software : str + Name of the software used to create the image. + Saved with the first page only. + +Metadata for reading +-------------------- +planar_configuration : {'contig', 'planar'} + Specifies if samples are stored contiguous or in separate planes. + By default this setting is inferred from the data shape. + 'contig': last dimension contains samples. + 'planar': third last dimension contains samples. +resolution_unit : int + The resolution unit stored in the TIFF tag. Usually 1 means no/unknown unit, + 2 means dpi (inch), 3 means dpc (centimeter). +resolution : (float, float, str) + A tuple formatted as (X_resolution, Y_resolution, unit). The unit is a + string representing one of the following units:: + + NONE # No unit or unit unknown + INCH # dpi + CENTIMETER # cpi + MILLIMETER + MICROMETER + +compression : int + Value indicating the compression algorithm used, e.g. 5 is LZW, + 7 is JPEG, 8 is deflate. + If 1, data are uncompressed. +predictor : int + Value 2 indicates horizontal differencing was used before compression, + while 3 indicates floating point horizontal differencing. + If 1, no prediction scheme was used before compression. +orientation : {'top_left', 'bottom_right', ...} + Oriented of image array. +is_rgb : bool + True if page contains a RGB image. +is_contig : bool + True if page contains a contiguous image. +is_tiled : bool + True if page contains tiled image. +is_palette : bool + True if page contains a palette-colored image and not OME or STK. +is_reduced : bool + True if page is a reduced image of another image. +is_shaped : bool + True if page contains shape in image_description tag. +is_fluoview : bool + True if page contains FluoView MM_STAMP tag. +is_nih : bool + True if page contains NIH image header. +is_micromanager : bool + True if page contains Micro-Manager metadata. +is_ome : bool + True if page contains OME-XML in image_description tag. +is_sgi : bool + True if page contains SGI image and tile depth tags. +is_mdgel : bool + True if page contains md_file_tag tag. +is_mediacy : bool + True if page contains Media Cybernetics Id tag. +is_stk : bool + True if page contains UIC2Tag tag. +is_lsm : bool + True if page contains LSM CZ_LSM_INFO tag. +description : str + Image description +description1 : str + Additional description +is_imagej : None or str + ImageJ metadata +software : str + Software used to create the TIFF file +datetime : datetime.datetime + Creation date and time + +Metadata for writing +-------------------- +photometric : {'minisblack', 'miniswhite', 'rgb'} + The color space of the image data. + By default this setting is inferred from the data shape. +planarconfig : {'contig', 'planar'} + Specifies if samples are stored contiguous or in separate planes. + By default this setting is inferred from the data shape. + 'contig': last dimension contains samples. + 'planar': third last dimension contains samples. +resolution : (float, float) or ((int, int), (int, int)) + X and Y resolution in dots per inch as float or rational numbers. +description : str + The subject of the image. Saved with the first page only. +compress : int + Values from 0 to 9 controlling the level of zlib (deflate) compression. + If 0, data are written uncompressed (default). +compression : str, (int, int) + Compression scheme used while writing the image. If omitted (default) the + image is not uncompressed. Compression cannot be used to write contiguous + series. Compressors may require certain data shapes, types or value ranges. + For example, JPEG compression requires grayscale or RGB(A), uint8 or 12-bit + uint16. JPEG compression is experimental. JPEG markers and TIFF tags may not + match. Only a limited set of compression schemes are implemented. 'ZLIB' is + short for ADOBE_DEFLATE. The value is written to the Compression tag. +compressionargs: + Extra arguments passed to compression codec, e.g., compression level. Refer + to the Imagecodecs implementation for supported arguments. +predictor : bool + If True, horizontal differencing is applied before compression. + Note that using an int literal 1 actually means no prediction scheme + will be used. +volume : bool + If True, volume data are stored in one tile (if applicable) using + the SGI image_depth and tile_depth tags. + Image width and depth must be multiple of 16. + Few software can read this format, e.g. MeVisLab. +writeshape : bool + If True, write the data shape to the image_description tag + if necessary and no other description is given. +extratags: sequence of tuples + Additional tags as [(code, dtype, count, value, writeonce)]. + + code : int + The TIFF tag Id. + dtype : str + Data type of items in 'value' in Python struct format. + One of B, s, H, I, 2I, b, h, i, f, d, Q, or q. + count : int + Number of data values. Not used for string values. + value : sequence + 'Count' values compatible with 'dtype'. + writeonce : bool + If True, the tag is written to the first page only. + +Notes +----- +Global metadata is stored with the first frame in a TIFF file. +Thus calling :py:meth:`Format.Writer.set_meta_data` after the first frame +was written has no effect. Also, global metadata is ignored if metadata is +provided via the `meta` argument of :py:meth:`Format.Writer.append_data`. + +If you have installed tifffile as a Python package, imageio will attempt +to use that as backend instead of the bundled backend. Doing so can +provide access to new performance improvements and bug fixes. + +""" + +import datetime + +from ..core import Format +from ..core.request import URI_BYTES, URI_FILE + +import numpy as np +import warnings + + +try: + import tifffile as _tifffile +except ImportError: + warnings.warn( + "ImageIO's vendored tifffile backend is deprecated and will be" + " removed in ImageIO v3. Install the tifffile directly:" + " `pip install imageio[tifffile]`", + DeprecationWarning, + ) + from . import _tifffile + + +TIFF_FORMATS = (".tif", ".tiff", ".stk", ".lsm") +WRITE_METADATA_KEYS = ( + "photometric", + "planarconfig", + "resolution", + "description", + "compress", + "compression", + "compressionargs", + "predictor", + "volume", + "writeshape", + "extratags", + "datetime", +) +READ_METADATA_KEYS = ( + "planar_configuration", + "is_fluoview", + "is_nih", + "is_contig", + "is_micromanager", + "is_ome", + "is_lsm", + "is_palette", + "is_reduced", + "is_rgb", + "is_sgi", + "is_shaped", + "is_stk", + "is_tiled", + "is_mdgel", + "resolution_unit", + "compression", + "predictor", + "is_mediacy", + "orientation", + "description", + "description1", + "is_imagej", + "software", +) + + +class TiffFormat(Format): + """Provides support for a wide range of Tiff images using the tifffile + backend. + + Images that contain multiple pages can be read using ``imageio.mimread()`` + to read the individual pages, or ``imageio.volread()`` to obtain a + single (higher dimensional) array. + + Note that global metadata is stored with the first frame in a TIFF file. + Thus calling :py:meth:`Format.Writer.set_meta_data` after the first frame + was written has no effect. Also, global metadata is ignored if metadata is + provided via the `meta` argument of :py:meth:`Format.Writer.append_data`. + + If you have installed tifffile as a Python package, imageio will attempt + to use that as backend instead of the bundled backend. Doing so can + provide access to new performance improvements and bug fixes. + + Parameters for reading + ---------------------- + offset : int + Optional start position of embedded file. By default this is + the current file position. + size : int + Optional size of embedded file. By default this is the number + of bytes from the 'offset' to the end of the file. + multifile : bool + If True (default), series may include pages from multiple files. + Currently applies to OME-TIFF only. + multifile_close : bool + If True (default), keep the handles of other files in multifile + series closed. This is inefficient when few files refer to + many pages. If False, the C runtime may run out of resources. + + Parameters for saving + --------------------- + bigtiff : bool + If True, the BigTIFF format is used. + byteorder : {'<', '>'} + The endianness of the data in the file. + By default this is the system's native byte order. + software : str + Name of the software used to create the image. + Saved with the first page only. + + Metadata for reading + -------------------- + planar_configuration : {'contig', 'planar'} + Specifies if samples are stored contiguous or in separate planes. + By default this setting is inferred from the data shape. + 'contig': last dimension contains samples. + 'planar': third last dimension contains samples. + resolution_unit : (float, float) or ((int, int), (int, int)) + X and Y resolution in dots per inch as float or rational numbers. + compression : int + Value indicating the compression algorithm used, e.g. 5 is LZW, + 7 is JPEG, 8 is deflate. + If 1, data are uncompressed. + predictor : int + Value 2 indicates horizontal differencing was used before compression, + while 3 indicates floating point horizontal differencing. + If 1, no prediction scheme was used before compression. + orientation : {'top_left', 'bottom_right', ...} + Oriented of image array. + is_rgb : bool + True if page contains a RGB image. + is_contig : bool + True if page contains a contiguous image. + is_tiled : bool + True if page contains tiled image. + is_palette : bool + True if page contains a palette-colored image and not OME or STK. + is_reduced : bool + True if page is a reduced image of another image. + is_shaped : bool + True if page contains shape in image_description tag. + is_fluoview : bool + True if page contains FluoView MM_STAMP tag. + is_nih : bool + True if page contains NIH image header. + is_micromanager : bool + True if page contains Micro-Manager metadata. + is_ome : bool + True if page contains OME-XML in image_description tag. + is_sgi : bool + True if page contains SGI image and tile depth tags. + is_stk : bool + True if page contains UIC2Tag tag. + is_mdgel : bool + True if page contains md_file_tag tag. + is_mediacy : bool + True if page contains Media Cybernetics Id tag. + is_stk : bool + True if page contains UIC2Tag tag. + is_lsm : bool + True if page contains LSM CZ_LSM_INFO tag. + description : str + Image description + description1 : str + Additional description + is_imagej : None or str + ImageJ metadata + software : str + Software used to create the TIFF file + datetime : datetime.datetime + Creation date and time + + Metadata for writing + -------------------- + photometric : {'minisblack', 'miniswhite', 'rgb'} + The color space of the image data. + By default this setting is inferred from the data shape. + planarconfig : {'contig', 'planar'} + Specifies if samples are stored contiguous or in separate planes. + By default this setting is inferred from the data shape. + 'contig': last dimension contains samples. + 'planar': third last dimension contains samples. + resolution : (float, float) or ((int, int), (int, int)) + X and Y resolution in dots per inch as float or rational numbers. + description : str + The subject of the image. Saved with the first page only. + compress : int + Values from 0 to 9 controlling the level of zlib (deflate) compression. + If 0, data are written uncompressed (default). + predictor : bool + If True, horizontal differencing is applied before compression. + Note that using an int literal 1 actually means no prediction scheme + will be used. + volume : bool + If True, volume data are stored in one tile (if applicable) using + the SGI image_depth and tile_depth tags. + Image width and depth must be multiple of 16. + Few software can read this format, e.g. MeVisLab. + writeshape : bool + If True, write the data shape to the image_description tag + if necessary and no other description is given. + extratags: sequence of tuples + Additional tags as [(code, dtype, count, value, writeonce)]. + + code : int + The TIFF tag Id. + dtype : str + Data type of items in 'value' in Python struct format. + One of B, s, H, I, 2I, b, h, i, f, d, Q, or q. + count : int + Number of data values. Not used for string values. + value : sequence + 'Count' values compatible with 'dtype'. + writeonce : bool + If True, the tag is written to the first page only. + """ + + def _can_read(self, request): + try: + _tifffile.TiffFile(request.get_file(), **request.kwargs) + except ValueError: + # vendored backend raises value exception + return False + except _tifffile.TiffFileError: # pragma: no-cover + # current version raises custom exception + return False + finally: + request.get_file().seek(0) + + return True + + def _can_write(self, request): + if request._uri_type in [URI_FILE, URI_BYTES]: + pass # special URI + elif request.extension not in self.extensions: + return False + + try: + _tifffile.TiffWriter(request.get_file(), **request.kwargs) + except ValueError: + # vendored backend raises value exception + return False + except _tifffile.TiffFileError: # pragma: no-cover + # current version raises custom exception + return False + finally: + request.get_file().seek(0) + return True + + # -- reader + + class Reader(Format.Reader): + def _open(self, **kwargs): + # Allow loading from http; tifffile uses seek, so download first + if self.request.filename.startswith(("http://", "https://")): + self._f = f = open(self.request.get_local_filename(), "rb") + else: + self._f = None + f = self.request.get_file() + self._tf = _tifffile.TiffFile(f, **kwargs) + + def _close(self): + self._tf.close() + if self._f is not None: + self._f.close() + + def _get_length(self): + return len(self._tf.series) + + def _get_data(self, index): + if index < 0 or index >= self._get_length(): + raise IndexError("Index out of range while reading from tiff file") + + im = self._tf.asarray(series=index) + meta = self._get_meta_data(index) + + return im, meta + + def _get_meta_data(self, index): + meta = {} + page = self._tf.pages[index or 0] + for key in READ_METADATA_KEYS: + try: + meta[key] = getattr(page, key) + except Exception: + pass + + # tifffile <= 0.12.1 use datetime, newer use DateTime + for key in ("datetime", "DateTime"): + try: + meta["datetime"] = datetime.datetime.strptime( + page.tags[key].value, "%Y:%m:%d %H:%M:%S" + ) + break + except Exception: + pass + + if 296 in page.tags: + meta["resolution_unit"] = page.tags[296].value.value + + if 282 in page.tags and 283 in page.tags and 296 in page.tags: + resolution_x = page.tags[282].value + resolution_y = page.tags[283].value + if resolution_x[1] == 0 or resolution_y[1] == 0: + warnings.warn( + "Ignoring resolution metadata, " + "because at least one direction has a 0 denominator.", + RuntimeWarning, + ) + else: + meta["resolution"] = ( + resolution_x[0] / resolution_x[1], + resolution_y[0] / resolution_y[1], + page.tags[296].value.name, + ) + + return meta + + # -- writer + class Writer(Format.Writer): + def _open(self, bigtiff=None, byteorder=None, software=None): + try: + self._tf = _tifffile.TiffWriter( + self.request.get_file(), + bigtiff=bigtiff, + byteorder=byteorder, + software=software, + ) + self._software = None + except TypeError: + # In tifffile >= 0.15, the `software` arg is passed to + # TiffWriter.save + self._tf = _tifffile.TiffWriter( + self.request.get_file(), bigtiff=bigtiff, byteorder=byteorder + ) + self._software = software + + self._meta = {} + self._frames_written = 0 + + def _close(self): + self._tf.close() + + def _append_data(self, im, meta): + if meta is not None: + meta = self._sanitize_meta(meta) + else: + # Use global metadata for first frame + meta = self._meta if self._frames_written == 0 else {} + if self._software is not None and self._frames_written == 0: + meta["software"] = self._software + # No need to check self.request.mode; tifffile figures out whether + # this is a single page, or all page data at once. + try: + # TiffWriter.save has been deprecated in version 2020.9.30 + write_meth = self._tf.write + except AttributeError: + write_meth = self._tf.save + write_meth(np.asanyarray(im), contiguous=False, **meta) + self._frames_written += 1 + + @staticmethod + def _sanitize_meta(meta): + ret = {} + for key, value in meta.items(): + if key in WRITE_METADATA_KEYS: + # Special case of previously read `predictor` int value + # 1(=NONE) translation to False expected by TiffWriter.save + if key == "predictor" and not isinstance(value, bool): + ret[key] = value > 1 + elif key == "compress" and value != 0: + warnings.warn( + "The use of `compress` is deprecated. Use `compression` and `compressionargs` instead.", + DeprecationWarning, + ) + + if _tifffile.__version__ < "2022": + ret["compression"] = (8, value) + else: + ret["compression"] = "zlib" + ret["compressionargs"] = {"level": value} + else: + ret[key] = value + return ret + + def set_meta_data(self, meta): + self._meta = self._sanitize_meta(meta) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/tifffile_v3.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/tifffile_v3.py new file mode 100644 index 0000000000000000000000000000000000000000..054eaf1a50ceae3c056b625682af4f68ba8d3f9d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio/plugins/tifffile_v3.py @@ -0,0 +1,413 @@ +"""Read/Write TIFF files using tifffile. + +.. note:: + To use this plugin you need to have `tifffile + `_ installed:: + + pip install tifffile + +This plugin wraps tifffile, a powerful library to manipulate TIFF files. It +superseeds our previous tifffile plugin and aims to expose all the features of +tifffile. + +The plugin treats individual TIFF series as ndimages. A series is a sequence of +TIFF pages that, when combined describe a meaningful unit, e.g., a volumetric +image (where each slice is stored on an individual page) or a multi-color +staining picture (where each stain is stored on an individual page). Different +TIFF flavors/variants use series in different ways and, as such, the resulting +reading behavior may vary depending on the program used while creating a +particular TIFF file. + +Methods +------- +.. note:: + Check the respective function for a list of supported kwargs and detailed + documentation. + +.. autosummary:: + :toctree: + + TifffilePlugin.read + TifffilePlugin.iter + TifffilePlugin.write + TifffilePlugin.properties + TifffilePlugin.metadata + +Additional methods available inside the :func:`imopen ` +context: + +.. autosummary:: + :toctree: + + TifffilePlugin.iter_pages + +""" + +from io import BytesIO +from typing import Any, Dict, Optional, cast +import warnings + +import numpy as np +import tifffile + +from ..core.request import URI_BYTES, InitializationError, Request +from ..core.v3_plugin_api import ImageProperties, PluginV3 +from ..typing import ArrayLike + + +def _get_resolution(page: tifffile.TiffPage) -> Dict[str, Any]: + metadata = {} + + try: + metadata["resolution_unit"] = page.tags[296].value.value + except KeyError: + # tag 296 missing + return metadata + + try: + resolution_x = page.tags[282].value + resolution_y = page.tags[283].value + + metadata["resolution"] = ( + resolution_x[0] / resolution_x[1], + resolution_y[0] / resolution_y[1], + ) + except KeyError: + # tag 282 or 283 missing + pass + except ZeroDivisionError: + warnings.warn( + "Ignoring resolution metadata because at least one direction has a 0 " + "denominator.", + RuntimeWarning, + ) + + return metadata + + +class TifffilePlugin(PluginV3): + """Support for tifffile as backend. + + Parameters + ---------- + request : iio.Request + A request object that represents the users intent. It provides a + standard interface for a plugin to access the various ImageResources. + Check the docs for details. + kwargs : Any + Additional kwargs are forwarded to tifffile's constructor, i.e. + to ``TiffFile`` for reading or ``TiffWriter`` for writing. + + """ + + def __init__(self, request: Request, **kwargs) -> None: + super().__init__(request) + self._fh = None + + if request.mode.io_mode == "r": + try: + self._fh = tifffile.TiffFile(request.get_file(), **kwargs) + except tifffile.tifffile.TiffFileError: + raise InitializationError("Tifffile can not read this file.") + else: + self._fh = tifffile.TiffWriter(request.get_file(), **kwargs) + + # --------------------- + # Standard V3 Interface + # --------------------- + + def read(self, *, index: int = None, page: int = None, **kwargs) -> np.ndarray: + """Read a ndimage or page. + + The ndimage returned depends on the value of both ``index`` and + ``page``. ``index`` selects the series to read and ``page`` allows + selecting a single page from the selected series. If ``index=None``, + ``page`` is understood as a flat index, i.e., the selection ignores + individual series inside the file. If both ``index`` and ``page`` are + ``None``, then all the series are read and returned as a batch. + + Parameters + ---------- + index : int + If ``int``, select the ndimage (series) located at that index inside + the file and return ``page`` from it. If ``None`` and ``page`` is + ``int`` read the page located at that (flat) index inside the file. + If ``None`` and ``page=None``, read all ndimages from the file and + return them as a batch. + page : int + If ``None`` return the full selected ndimage. If ``int``, read the + page at the selected index and return it. + kwargs : Any + Additional kwargs are forwarded to TiffFile's ``as_array`` method. + + Returns + ------- + ndarray : np.ndarray + The decoded ndimage or page. + """ + + if "key" not in kwargs: + kwargs["key"] = page + elif page is not None: + raise ValueError("Can't use `page` and `key` at the same time.") + + # set plugin default for ``index`` + if index is not None and "series" in kwargs: + raise ValueError("Can't use `series` and `index` at the same time.") + elif "series" in kwargs: + index = kwargs.pop("series") + elif index is not None: + pass + else: + index = 0 + + if index is Ellipsis and page is None: + # read all series in the file and return them as a batch + ndimage = np.stack([x for x in self.iter(**kwargs)]) + else: + index = None if index is Ellipsis else index + ndimage = self._fh.asarray(series=index, **kwargs) + + return ndimage + + def iter(self, **kwargs) -> np.ndarray: + """Yield ndimages from the TIFF. + + Parameters + ---------- + kwargs : Any + Additional kwargs are forwarded to the TiffPageSeries' ``as_array`` + method. + + Yields + ------ + ndimage : np.ndarray + A decoded ndimage. + """ + + for sequence in self._fh.series: + yield sequence.asarray(**kwargs) + + def write( + self, ndimage: ArrayLike, *, is_batch: bool = False, **kwargs + ) -> Optional[bytes]: + """Save a ndimage as TIFF. + + Parameters + ---------- + ndimage : ArrayLike + The ndimage to encode and write to the ImageResource. + is_batch : bool + If True, the first dimension of the given ndimage is treated as a + batch dimension and each element will create a new series. + kwargs : Any + Additional kwargs are forwarded to TiffWriter's ``write`` method. + + Returns + ------- + encoded_image : bytes + If the ImageResource is ``""``, return the encoded bytes. + Otherwise write returns None. + + Notes + ----- + Incremental writing is supported. Subsequent calls to ``write`` will + create new series unless ``contiguous=True`` is used, in which case the + call to write will append to the current series. + + """ + + if not is_batch: + ndimage = np.asarray(ndimage)[None, :] + + for image in ndimage: + self._fh.write(image, **kwargs) + + if self._request._uri_type == URI_BYTES: + self._fh.close() + file = cast(BytesIO, self._request.get_file()) + return file.getvalue() + + def metadata( + self, *, index: int = Ellipsis, page: int = None, exclude_applied: bool = True + ) -> Dict[str, Any]: + """Format-Specific TIFF metadata. + + The metadata returned depends on the value of both ``index`` and + ``page``. ``index`` selects a series and ``page`` allows selecting a + single page from the selected series. If ``index=Ellipsis``, ``page`` is + understood as a flat index, i.e., the selection ignores individual + series inside the file. If ``index=Ellipsis`` and ``page=None`` then + global (file-level) metadata is returned. + + Parameters + ---------- + index : int + Select the series of which to extract metadata from. If Ellipsis, treat + page as a flat index into the file's pages. + page : int + If not None, select the page of which to extract metadata from. If + None, read series-level metadata or, if ``index=...`` global, + file-level metadata. + exclude_applied : bool + For API compatibility. Currently ignored. + + Returns + ------- + metadata : dict + A dictionary with information regarding the tiff flavor (file-level) + or tiff tags (page-level). + """ + + if index is not Ellipsis and page is not None: + target = self._fh.series[index].pages[page] + elif index is not Ellipsis and page is None: + # This is based on my understanding that series-level metadata is + # stored in the first TIFF page. + target = self._fh.series[index].pages[0] + elif index is Ellipsis and page is not None: + target = self._fh.pages[page] + else: + target = None + + metadata = {} + if target is None: + # return file-level metadata + metadata["byteorder"] = self._fh.byteorder + + for flag in tifffile.TIFF.FILE_FLAGS: + flag_value = getattr(self._fh, "is_" + flag) + metadata["is_" + flag] = flag_value + + if flag_value and hasattr(self._fh, flag + "_metadata"): + flavor_metadata = getattr(self._fh, flag + "_metadata") + if isinstance(flavor_metadata, tuple): + metadata.update(flavor_metadata[0]) + else: + metadata.update(flavor_metadata) + else: + # tifffile may return a TiffFrame instead of a page + target = target.keyframe + + metadata.update({tag.name: tag.value for tag in target.tags}) + metadata.update( + { + "planar_configuration": target.planarconfig, + "compression": target.compression, + "predictor": target.predictor, + "orientation": None, # TODO + "description1": target.description1, + "description": target.description, + "software": target.software, + **_get_resolution(target), + "datetime": target.datetime, + } + ) + + return metadata + + def properties(self, *, index: int = None, page: int = None) -> ImageProperties: + """Standardized metadata. + + The properties returned depend on the value of both ``index`` and + ``page``. ``index`` selects a series and ``page`` allows selecting a + single page from the selected series. If ``index=Ellipsis``, ``page`` is + understood as a flat index, i.e., the selection ignores individual + series inside the file. If ``index=Ellipsis`` and ``page=None`` then + global (file-level) properties are returned. If ``index=Ellipsis`` + and ``page=...``, file-level properties for the flattened index are + returned. + + Parameters + ---------- + index : int + If ``int``, select the ndimage (series) located at that index inside + the file. If ``Ellipsis`` and ``page`` is ``int`` extract the + properties of the page located at that (flat) index inside the file. + If ``Ellipsis`` and ``page=None``, return the properties for the + batch of all ndimages in the file. + page : int + If ``None`` return the properties of the full ndimage. If ``...`` + return the properties of the flattened index. If ``int``, + return the properties of the page at the selected index only. + + Returns + ------- + image_properties : ImageProperties + The standardized metadata (properties) of the selected ndimage or series. + + """ + index = index or 0 + page_idx = 0 if page in (None, Ellipsis) else page + + if index is Ellipsis: + target_page = self._fh.pages[page_idx] + else: + target_page = self._fh.series[index].pages[page_idx] + + if index is Ellipsis and page is None: + n_series = len(self._fh.series) + props = ImageProperties( + shape=(n_series, *target_page.shape), + dtype=target_page.dtype, + n_images=n_series, + is_batch=True, + spacing=_get_resolution(target_page).get("resolution"), + ) + elif index is Ellipsis and page is Ellipsis: + n_pages = len(self._fh.pages) + props = ImageProperties( + shape=(n_pages, *target_page.shape), + dtype=target_page.dtype, + n_images=n_pages, + is_batch=True, + spacing=_get_resolution(target_page).get("resolution"), + ) + else: + props = ImageProperties( + shape=target_page.shape, + dtype=target_page.dtype, + is_batch=False, + spacing=_get_resolution(target_page).get("resolution"), + ) + + return props + + def close(self) -> None: + if self._fh is not None: + self._fh.close() + + super().close() + + # ------------------------------ + # Add-on Interface inside imopen + # ------------------------------ + + def iter_pages(self, index=..., **kwargs): + """Yield pages from a TIFF file. + + This generator walks over the flat index of the pages inside an + ImageResource and yields them in order. + + Parameters + ---------- + index : int + The index of the series to yield pages from. If Ellipsis, walk over + the file's flat index (and ignore individual series). + kwargs : Any + Additional kwargs are passed to TiffPage's ``as_array`` method. + + Yields + ------ + page : np.ndarray + A page stored inside the TIFF file. + + """ + + if index is Ellipsis: + pages = self._fh.pages + else: + pages = self._fh.series[index] + + for page in pages: + yield page.asarray(**kwargs) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg-0.6.0.dist-info/INSTALLER b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg-0.6.0.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg-0.6.0.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg-0.6.0.dist-info/LICENSE b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg-0.6.0.dist-info/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..6d27cf66867f1884302d04b05ccdf6f94b124bb9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg-0.6.0.dist-info/LICENSE @@ -0,0 +1,25 @@ +BSD 2-Clause License + +Copyright (c) 2019-2025, imageio +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg-0.6.0.dist-info/METADATA b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg-0.6.0.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..17450fac499e5602479294f9f1941e921434cca5 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg-0.6.0.dist-info/METADATA @@ -0,0 +1,39 @@ +Metadata-Version: 2.1 +Name: imageio-ffmpeg +Version: 0.6.0 +Summary: FFMPEG wrapper for Python +Home-page: https://github.com/imageio/imageio-ffmpeg +Download-URL: http://pypi.python.org/pypi/imageio-ffmpeg +Author: imageio contributors +Author-email: almar.klein@gmail.com +License: BSD-2-Clause +Keywords: video ffmpeg +Platform: any +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Science/Research +Classifier: Intended Audience :: Education +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: MacOS :: MacOS X +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: POSIX +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Provides: imageio_ffmpeg +Requires-Python: >=3.9 +License-File: LICENSE + +FFMPEG wrapper for Python. + +Note that the platform-specific wheels contain the binary executable +of ffmpeg, which makes this package around 60 MiB in size. +I guess that's the cost for being able to read/write video files. + +For Linux users: the above is not the case when installing via your +Linux package manager (if that is possible), because this package would +simply depend on ffmpeg in that case. diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg-0.6.0.dist-info/RECORD b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg-0.6.0.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..e1952002e0207385dd81bc0c117dedf7c163c2b9 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg-0.6.0.dist-info/RECORD @@ -0,0 +1,20 @@ +imageio_ffmpeg-0.6.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +imageio_ffmpeg-0.6.0.dist-info/LICENSE,sha256=7Kc0IdDJBakulSpVPU1YkZxaUHwsLHbjwM-bpAjCmeQ,1317 +imageio_ffmpeg-0.6.0.dist-info/METADATA,sha256=yWl7N5QZpkBJ2Z4iSMTLqWPgO1ytHcZeEKiH0dp2Ue4,1528 +imageio_ffmpeg-0.6.0.dist-info/RECORD,, +imageio_ffmpeg-0.6.0.dist-info/WHEEL,sha256=VQVGmkIKlT8O316bjXskIlqD0l4jxlF8LAsrbEaXTdM,107 +imageio_ffmpeg-0.6.0.dist-info/top_level.txt,sha256=ODQYUYYbtj9I1SjASEMY7h8Q8haLrmMNURSCn0FEB18,15 +imageio_ffmpeg/__init__.py,sha256=Qa_CMd_spCLGkvmiGbUXeLdWDFxN_6CXv5L4CHLR_Yo,227 +imageio_ffmpeg/__pycache__/__init__.cpython-312.pyc,, +imageio_ffmpeg/__pycache__/_definitions.cpython-312.pyc,, +imageio_ffmpeg/__pycache__/_io.cpython-312.pyc,, +imageio_ffmpeg/__pycache__/_parsing.cpython-312.pyc,, +imageio_ffmpeg/__pycache__/_utils.cpython-312.pyc,, +imageio_ffmpeg/_definitions.py,sha256=Ugb5bLRbwnxOMU05wD5NO0XfF96v89aIHp9FcC0WnjI,2041 +imageio_ffmpeg/_io.py,sha256=GgBt0X5cVP_GaitAtttVhuduWY4coV8wHjtwyGtD3-k,27048 +imageio_ffmpeg/_parsing.py,sha256=UUtYPybzv9vQ4sKPwYlRTyaP_k9koKXfl-zI6Rm4Qpo,6840 +imageio_ffmpeg/_utils.py,sha256=eVF4ny9zP3jPfeYIwl6VfV9T1v34S6yURWC_RiGUNF8,3833 +imageio_ffmpeg/binaries/README.md,sha256=sNNt-xuh6lyoc6b228wilbenlErqh1Bx2RpqrRhw530,45 +imageio_ffmpeg/binaries/__init__.py,sha256=zjnRZoM6bQUBYOI30HpzIp4ck07bOy4oxXB-bwN3NW8,45 +imageio_ffmpeg/binaries/__pycache__/__init__.cpython-312.pyc,, +imageio_ffmpeg/binaries/ffmpeg-linux-x86_64-v7.0.2,sha256=5-f7MEd_cX5vVfkYCnA4bGJnfvik1NGl2Uj0CYqj65k,79826272 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg-0.6.0.dist-info/WHEEL b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg-0.6.0.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..7677231d93732ed4f304ed0ca4bc0daf725b8b4e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg-0.6.0.dist-info/WHEEL @@ -0,0 +1,4 @@ +Wheel-Version: 1.0 +Generator: setuptools (74.1.2) +Root-Is-Purelib: true +Tag: py3-none-manylinux2014_x86_64 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg-0.6.0.dist-info/top_level.txt b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg-0.6.0.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..8adec99ed476b9cbd472361d84ddb1224676762d --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg-0.6.0.dist-info/top_level.txt @@ -0,0 +1 @@ +imageio_ffmpeg diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b79d867e2e6508a97ea4d27d8481afd561b1dd69 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg/__init__.py @@ -0,0 +1,8 @@ +""" imageio_ffmpeg, FFMPEG wrapper for Python. +""" + +# flake8: noqa + +from ._definitions import __version__ +from ._io import count_frames_and_secs, read_frames, write_frames +from ._utils import get_ffmpeg_exe, get_ffmpeg_version diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg/_definitions.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg/_definitions.py new file mode 100644 index 0000000000000000000000000000000000000000..d93d7587a1b5a1845e1a0632c6823a96ab54d28b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg/_definitions.py @@ -0,0 +1,69 @@ +import sys +import platform + + +__version__ = "0.6.0" + + +def get_platform(): + # get_os_string and get_arch are taken from wgpu-py + return _get_os_string() + "-" + _get_arch() + + +def _get_os_string(): + if sys.platform.startswith("win"): + return "windows" + elif sys.platform.startswith("darwin"): + return "macos" + elif sys.platform.startswith("linux"): + return "linux" + else: + return sys.platform + + +def _get_arch(): + # See e.g.: https://stackoverflow.com/questions/45124888 + is_64_bit = sys.maxsize > 2**32 + machine = platform.machine() + + if machine == "armv7l": + # Raspberry pi + detected_arch = "armv7" + elif is_64_bit and machine.startswith(("arm", "aarch64")): + # Includes MacOS M1, arm linux, ... + detected_arch = "aarch64" + elif is_64_bit: + detected_arch = "x86_64" + else: + detected_arch = "i686" + return detected_arch + + +# The Linux static builds (https://johnvansickle.com/ffmpeg/) are build +# for Linux kernels 3.2.0 and up (at the time of writing, ffmpeg v7.0.2). +# This corresponds to Ubuntu 12.04 / Debian 7. I'm not entirely sure' +# what manylinux matches that, but I think manylinux2014 should be safe. + + +# Platform string -> ffmpeg filename +FNAME_PER_PLATFORM = { + "macos-aarch64": "ffmpeg-macos-aarch64-v7.1", + "macos-x86_64": "ffmpeg-macos-x86_64-v7.1", # 10.9+ + "windows-x86_64": "ffmpeg-win-x86_64-v7.1.exe", + "windows-i686": "ffmpeg-win32-v4.2.2.exe", # Windows 7+ + "linux-aarch64": "ffmpeg-linux-aarch64-v7.0.2", # Kernel 3.2.0+ + "linux-x86_64": "ffmpeg-linux-x86_64-v7.0.2", +} + +osxplats = "macosx_10_9_intel.macosx_10_9_x86_64" +osxarmplats = "macosx_11_0_arm64" + +# Wheel tag -> platform string +WHEEL_BUILDS = { + "py3-none-manylinux2014_x86_64": "linux-x86_64", + "py3-none-manylinux2014_aarch64": "linux-aarch64", + "py3-none-" + osxplats: "macos-x86_64", + "py3-none-" + osxarmplats: "macos-aarch64", + "py3-none-win32": "windows-i686", + "py3-none-win_amd64": "windows-x86_64", +} diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg/_io.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg/_io.py new file mode 100644 index 0000000000000000000000000000000000000000..faf1ee89ea5bca973e946114a18ef5701a82f42f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg/_io.py @@ -0,0 +1,693 @@ +import pathlib +import subprocess +import sys +import time +from collections import defaultdict +from functools import lru_cache + +from ._parsing import LogCatcher, cvsecs, parse_ffmpeg_header +from ._utils import _popen_kwargs, get_ffmpeg_exe, logger + +ISWIN = sys.platform.startswith("win") + +h264_encoder_preference = defaultdict(lambda: -1) +# The libx264 was the default encoder for a longe time with imageio +h264_encoder_preference["libx264"] = 100 + +# Encoder with the nvidia graphics card dedicated hardware +h264_encoder_preference["h264_nvenc"] = 90 +# Deprecated names for the same encoder +h264_encoder_preference["nvenc_h264"] = 90 +h264_encoder_preference["nvenc"] = 90 + +# vaapi provides hardware encoding with intel integrated graphics chipsets +h264_encoder_preference["h264_vaapi"] = 80 + +# openh264 is cisco's open source encoder +h264_encoder_preference["libopenh264"] = 70 + +h264_encoder_preference["libx264rgb"] = 50 + + +def ffmpeg_test_encoder(encoder): + # Use the null streams to validate if we can encode anything + # https://trac.ffmpeg.org/wiki/Null + cmd = [ + get_ffmpeg_exe(), + "-hide_banner", + "-f", + "lavfi", + "-i", + "nullsrc=s=256x256:d=8", + "-vcodec", + encoder, + "-f", + "null", + "-", + ] + p = subprocess.run( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + return p.returncode == 0 + + +def get_compiled_h264_encoders(): + cmd = [get_ffmpeg_exe(), "-hide_banner", "-encoders"] + p = subprocess.run( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + stdout = p.stdout.decode().replace("\r", "") + # 2022/04/08: hmaarrfk + # I couldn't find a good way to get the list of available encoders from + # the ffmpeg command + # The ffmpeg command return a table that looks like + # Notice the leading space at the very beginning + # On ubuntu with libffmpeg-nvenc-dev we get + # $ ffmpeg -hide_banner -encoders | grep -i h.264 + # + # Encoders: + # V..... = Video + # A..... = Audio + # S..... = Subtitle + # .F.... = Frame-level multithreading + # ..S... = Slice-level multithreading + # ...X.. = Codec is experimental + # ....B. = Supports draw_horiz_band + # .....D = Supports direct rendering method 1 + # ------ + # V..... libx264 libx264 H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (codec h264) + # V..... libx264rgb libx264 H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 RGB (codec h264) + # V....D h264_nvenc NVIDIA NVENC H.264 encoder (codec h264) + # V..... h264_omx OpenMAX IL H.264 video encoder (codec h264) + # V..... h264_qsv H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (Intel Quick Sync Video acceleration) (codec h264) + # V..... h264_v4l2m2m V4L2 mem2mem H.264 encoder wrapper (codec h264) + # V....D h264_vaapi H.264/AVC (VAAPI) (codec h264) + # V..... nvenc NVIDIA NVENC H.264 encoder (codec h264) + # V..... nvenc_h264 NVIDIA NVENC H.264 encoder (codec h264) + # + # However, just because ffmpeg was compiled with the options enabled + # it doesn't mean that it will be successful + header_footer = stdout.split("------") + footer = header_footer[1].strip("\n") + encoders = [] + for line in footer.split("\n"): + # Strip to remove any leading spaces + line = line.strip() + encoder = line.split(" ")[1] + + if encoder in h264_encoder_preference: + # These encoders are known to support H.264 + # We forcibly include them in case their description changes to + # not include the string "H.264" + encoders.append(encoder) + elif (line[0] == "V") and ("H.264" in line): + encoders.append(encoder) + + encoders.sort(reverse=True, key=lambda x: h264_encoder_preference[x]) + if "h264_nvenc" in encoders: + # Remove deprecated names for the same encoder + for encoder in ["nvenc", "nvenc_h264"]: + if encoder in encoders: + encoders.remove(encoder) + # Return an immutable tuple to avoid users corrupting the lru_cache + return tuple(encoders) + + +@lru_cache() +def get_first_available_h264_encoder(): + compiled_encoders = get_compiled_h264_encoders() + for encoder in compiled_encoders: + if ffmpeg_test_encoder(encoder): + return encoder + else: + raise RuntimeError( + "No valid H.264 encoder was found with the ffmpeg installation" + ) + + +def count_frames_and_secs(path): + """ + Get the number of frames and number of seconds for the given video + file. Note that this operation can be quite slow for large files. + + Disclaimer: I've seen this produce different results from actually reading + the frames with older versions of ffmpeg (2.x). Therefore I cannot say + with 100% certainty that the returned values are always exact. + """ + # https://stackoverflow.com/questions/2017843/fetch-frame-count-with-ffmpeg + + if isinstance(path, pathlib.PurePath): + path = str(path) + if not isinstance(path, str): + raise TypeError("Video path must be a string or pathlib.Path.") + + cmd = [ + get_ffmpeg_exe(), + "-i", + path, + "-map", + "0:v:0", + "-vf", + "null", + "-f", + "null", + "-", + ] + try: + out = subprocess.check_output(cmd, stderr=subprocess.STDOUT, **_popen_kwargs()) + except subprocess.CalledProcessError as err: + out = err.output.decode(errors="ignore") + raise RuntimeError( + "FFMPEG call failed with {}:\n{}".format(err.returncode, out) + ) + + # Note that other than with the subprocess calls below, ffmpeg wont hang here. + # Worst case Python will stop/crash and ffmpeg will continue running until done. + + nframes = nsecs = None + for line in reversed(out.splitlines()): + if line.startswith(b"frame="): + line = line.decode(errors="ignore") + i = line.find("frame=") + if i >= 0: + s = line[i:].split("=", 1)[-1].lstrip().split(" ", 1)[0].strip() + nframes = int(s) + i = line.find("time=") + if i >= 0: + s = line[i:].split("=", 1)[-1].lstrip().split(" ", 1)[0].strip() + nsecs = cvsecs(*s.split(":")) + return nframes, nsecs + + raise RuntimeError("Could not get number of frames") # pragma: no cover + + +def read_frames( + path, + pix_fmt="rgb24", + bpp=None, + input_params=None, + output_params=None, + bits_per_pixel=None, +): + """ + Create a generator to iterate over the frames in a video file. + + It first yields a small metadata dictionary that contains: + + * ffmpeg_version: the ffmpeg version in use (as a string). + * codec: a hint about the codec used to encode the video, e.g. "h264". + * source_size: the width and height of the encoded video frames. + * size: the width and height of the frames that will be produced. + * fps: the frames per second. Can be zero if it could not be detected. + * duration: duration in seconds. Can be zero if it could not be detected. + + After that, it yields frames until the end of the video is reached. Each + frame is a bytes object. + + This function makes no assumptions about the number of frames in + the data. For one because this is hard to predict exactly, but also + because it may depend on the provided output_params. If you want + to know the number of frames in a video file, use count_frames_and_secs(). + It is also possible to estimate the number of frames from the fps and + duration, but note that even if both numbers are present, the resulting + value is not always correct. + + Example: + + gen = read_frames(path) + meta = gen.__next__() + for frame in gen: + print(len(frame)) + + Parameters: + path (str): the filename of the file to read from. + pix_fmt (str): the pixel format of the frames to be read. + The default is "rgb24" (frames are uint8 RGB images). + input_params (list): Additional ffmpeg input command line parameters. + output_params (list): Additional ffmpeg output command line parameters. + bits_per_pixel (int): The number of bits per pixel in the output frames. + This depends on the given pix_fmt. Default is 24 (RGB) + bpp (int): DEPRECATED, USE bits_per_pixel INSTEAD. The number of bytes per pixel in the output frames. + This depends on the given pix_fmt. Some pixel formats like yuv420p have 12 bits per pixel + and cannot be set in bytes as integer. For this reason the bpp argument is deprecated. + """ + + # ----- Input args + + if isinstance(path, pathlib.PurePath): + path = str(path) + if not isinstance(path, str): + raise TypeError("Video path must be a string or pathlib.Path.") + # Note: Dont check whether it exists. The source could be e.g. a camera. + + pix_fmt = pix_fmt or "rgb24" + bpp = bpp or 3 + bits_per_pixel = bits_per_pixel or bpp * 8 + input_params = input_params or [] + output_params = output_params or [] + + assert isinstance(pix_fmt, str), "pix_fmt must be a string" + assert isinstance(bits_per_pixel, int), "bpp and bits_per_pixel must be an int" + assert isinstance(input_params, list), "input_params must be a list" + assert isinstance(output_params, list), "output_params must be a list" + + # ----- Prepare + + pre_output_params = ["-pix_fmt", pix_fmt, "-vcodec", "rawvideo", "-f", "image2pipe"] + + cmd = [get_ffmpeg_exe()] + cmd += input_params + ["-i", path] + cmd += pre_output_params + output_params + ["-"] + + process = subprocess.Popen( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + **_popen_kwargs(prevent_sigint=True) + ) + + log_catcher = LogCatcher(process.stderr) + + # Init policy by which to terminate ffmpeg. May be set to "kill" later. + stop_policy = "timeout" # not wait; ffmpeg should be able to quit quickly + + # Enter try block directly after opening the process. + # We terminate ffmpeg in the finally clause. + # Generators are automatically closed when they get deleted, + # so the finally block is guaranteed to run. + try: + # ----- Load meta data + + # Wait for the log catcher to get the meta information + etime = time.time() + 10.0 + while log_catcher.is_alive() and not log_catcher.header and time.time() < etime: + time.sleep(0.01) + + # Check whether we have the information + if not log_catcher.header: + err2 = log_catcher.get_text(0.2) + fmt = "Could not load meta information\n=== stderr ===\n{}" + raise IOError(fmt.format(err2)) + elif "No such file or directory" in log_catcher.header: + raise IOError("{} not found! Wrong path?".format(path)) + + meta = parse_ffmpeg_header(log_catcher.header) + yield meta + + # ----- Read frames + + width, height = meta["size"] + framesize_bits = width * height * bits_per_pixel + framesize_bytes = framesize_bits / 8 + assert ( + framesize_bytes.is_integer() + ), "incorrect bits_per_pixel, framesize in bytes must be an int" + framesize_bytes = int(framesize_bytes) + framenr = 0 + + while True: + framenr += 1 + try: + bb = bytes() + while len(bb) < framesize_bytes: + extra_bytes = process.stdout.read(framesize_bytes - len(bb)) + if not extra_bytes: + if len(bb) == 0: + return + else: + raise RuntimeError( + "End of file reached before full frame could be read." + ) + bb += extra_bytes + yield bb + except Exception as err: + err1 = str(err) + err2 = log_catcher.get_text(0.4) + fmt = "Could not read frame {}:\n{}\n=== stderr ===\n{}" + raise RuntimeError(fmt.format(framenr, err1, err2)) + + except GeneratorExit: + # Note that GeneratorExit does not inherit from Exception but BaseException + pass + + except Exception: + # Normal exceptions fall through + raise + + except BaseException: + # Detect KeyboardInterrupt / SystemExit: don't wait for ffmpeg to quit + stop_policy = "kill" + raise + + finally: + # Stop the LogCatcher thread, which reads from stderr. + log_catcher.stop_me() + + # Make sure that ffmpeg is terminated. + if process.poll() is None: + # Ask ffmpeg to quit + try: + # I read somewhere that modern ffmpeg on Linux prefers a + # "ctrl-c", but tests so far suggests sending q is more robust. + # > p.send_signal(signal.SIGINT) + # Sending q via communicate works, but can hang (see #17) + # > p.communicate(b"q") + # So let's do similar to what communicate does, but without + # reading stdout (which may block). It looks like only closing + # stdout is enough (tried Windows+Linux), but let's play safe. + # Found that writing to stdin can cause "Invalid argument" on + # Windows # and "Broken Pipe" on Unix. + # p.stdin.write(b"q") # commented out in v0.4.1 + process.stdout.close() + process.stdin.close() + # p.stderr.close() -> not here, the log_catcher closes it + except Exception as err: # pragma: no cover + logger.warning("Error while attempting stop ffmpeg (r): " + str(err)) + + if stop_policy == "timeout": + # Wait until timeout, produce a warning and kill if it still exists + try: + etime = time.time() + 1.5 + while time.time() < etime and process.poll() is None: + time.sleep(0.01) + finally: + if process.poll() is None: # pragma: no cover + logger.warning("We had to kill ffmpeg to stop it.") + process.kill() + + else: # stop_policy == "kill" + # Just kill it + process.kill() + + +def write_frames( + path, + size, + pix_fmt_in="rgb24", + pix_fmt_out="yuv420p", + fps=16, + quality=5, + bitrate=None, + codec=None, + macro_block_size=16, + ffmpeg_log_level="warning", + ffmpeg_timeout=None, + input_params=None, + output_params=None, + audio_path=None, + audio_codec=None, +): + """ + Create a generator to write frames (bytes objects) into a video file. + + The frames are written by using the generator's `send()` method. Frames + can be anything that can be written to a file. Typically these are + bytes objects, but c-contiguous Numpy arrays also work. + + Example: + + gen = write_frames(path, size) + gen.send(None) # seed the generator + for frame in frames: + gen.send(frame) + gen.close() # don't forget this + + Parameters: + path (str): the filename to write to. + size (tuple): the width and height of the frames. + pix_fmt_in (str): the pixel format of incoming frames. + E.g. "gray", "gray8a", "rgb24", or "rgba". Default "rgb24". + pix_fmt_out (str): the pixel format to store frames. Default yuv420p". + fps (float): The frames per second. Default 16. + quality (float): A measure for quality between 0 and 10. Default 5. + Ignored if bitrate is given. + bitrate (str): The bitrate, e.g. "192k". The defaults are pretty good. + codec (str): The codec. Default "libx264" for .mp4 (if available from + the ffmpeg executable) or "msmpeg4" for .wmv. + macro_block_size (int): You probably want to align the size of frames + to this value to avoid image resizing. Default 16. Can be set + to 1 to avoid block alignment, though this is not recommended. + ffmpeg_log_level (str): The ffmpeg logging level. Default "warning". + ffmpeg_timeout (float): Timeout in seconds to wait for ffmpeg process + to finish. Value of 0 or None will wait forever (default). The time that + ffmpeg needs depends on CPU speed, compression, and frame size. + input_params (list): Additional ffmpeg input command line parameters. + output_params (list): Additional ffmpeg output command line parameters. + audio_path (str): A input file path for encoding with an audio stream. + Default None, no audio. + audio_codec (str): The audio codec to use if audio_path is provided. + "copy" will try to use audio_path's audio codec without re-encoding. + Default None, but some formats must have certain codecs specified. + """ + + # ----- Input args + + if isinstance(path, pathlib.PurePath): + path = str(path) + if not isinstance(path, str): + raise TypeError("Video path must be a string or pathlib.Path.") + + # The pix_fmt_out yuv420p is the best for the outpur to work in + # QuickTime and most other players. These players only support + # the YUV planar color space with 4:2:0 chroma subsampling for + # H.264 video. Otherwise, depending on the source, ffmpeg may + # output to a pixel format that may be incompatible with these + # players. See https://trac.ffmpeg.org/wiki/Encode/H.264#Encodingfordumbplayers + + pix_fmt_in = pix_fmt_in or "rgb24" + pix_fmt_out = pix_fmt_out or "yuv420p" + fps = fps or 16 + # bitrate, codec, macro_block_size can all be None or ... + macro_block_size = macro_block_size or 16 + ffmpeg_log_level = ffmpeg_log_level or "warning" + input_params = input_params or [] + output_params = output_params or [] + ffmpeg_timeout = ffmpeg_timeout or 0 + + floatish = float, int + if isinstance(size, (tuple, list)): + assert len(size) == 2, "size must be a 2-tuple" + assert isinstance(size[0], int) and isinstance( + size[1], int + ), "size must be ints" + sizestr = "{:d}x{:d}".format(*size) + # elif isinstance(size, str): + # assert "x" in size, "size as string must have format NxM" + # sizestr = size + else: + assert False, "size must be str or tuple" + assert isinstance(pix_fmt_in, str), "pix_fmt_in must be str" + assert isinstance(pix_fmt_out, str), "pix_fmt_out must be str" + assert isinstance(fps, floatish), "fps must be float" + if quality is not None: + assert isinstance(quality, floatish), "quality must be float" + assert 1 <= quality <= 10, "quality must be between 1 and 10 inclusive" + assert isinstance(macro_block_size, int), "macro_block_size must be int" + assert isinstance(ffmpeg_log_level, str), "ffmpeg_log_level must be str" + assert isinstance(ffmpeg_timeout, floatish), "ffmpeg_timeout must be float" + assert isinstance(input_params, list), "input_params must be a list" + assert isinstance(output_params, list), "output_params must be a list" + + # ----- Prepare + + # Get parameters + if not codec: + if path.lower().endswith(".wmv"): + # This is a safer default codec on windows to get videos that + # will play in powerpoint and other apps. H264 is not always + # available on windows. + codec = "msmpeg4" + else: + codec = get_first_available_h264_encoder() + + audio_params = ["-an"] + if audio_path is not None and not path.lower().endswith(".gif"): + audio_params = ["-i", audio_path] + if audio_codec is not None: + output_params += ["-acodec", audio_codec] + output_params += ["-map", "0:v:0", "-map", "1:a:0"] + + # Get command + cmd = [ + get_ffmpeg_exe(), + "-y", + "-f", + "rawvideo", + "-vcodec", + "rawvideo", + "-s", + sizestr, + ] + cmd += ["-pix_fmt", pix_fmt_in, "-r", "{:.02f}".format(fps)] + input_params + cmd += ["-i", "-"] + audio_params + cmd += ["-vcodec", codec, "-pix_fmt", pix_fmt_out] + + # Add fixed bitrate or variable bitrate compression flags + if bitrate is not None: + cmd += ["-b:v", str(bitrate)] + elif quality is not None: # If None, then we don't add anything + quality = 1 - quality / 10.0 + if codec == "libx264": + # crf ranges 0 to 51, 51 being worst. + quality = int(quality * 51) + cmd += ["-crf", str(quality)] # for h264 + else: # Many codecs accept q:v + # q:v range can vary, 1-31, 31 being worst + # But q:v does not always have the same range. + # May need a way to find range for any codec. + quality = int(quality * 30) + 1 + cmd += ["-qscale:v", str(quality)] # for others + + # Note, for most codecs, the image dimensions must be divisible by + # 16 the default for the macro_block_size is 16. Check if image is + # divisible, if not have ffmpeg upsize to nearest size and warn + # user they should correct input image if this is not desired. + if macro_block_size > 1: + if size[0] % macro_block_size > 0 or size[1] % macro_block_size > 0: + out_w = size[0] + out_h = size[1] + if size[0] % macro_block_size > 0: + out_w += macro_block_size - (size[0] % macro_block_size) + if size[1] % macro_block_size > 0: + out_h += macro_block_size - (size[1] % macro_block_size) + cmd += ["-vf", "scale={}:{}".format(out_w, out_h)] + logger.warning( + "IMAGEIO FFMPEG_WRITER WARNING: input image is not" + " divisible by macro_block_size={}, resizing from {} " + "to {} to ensure video compatibility with most codecs " + "and players. To prevent resizing, make your input " + "image divisible by the macro_block_size or set the " + "macro_block_size to 1 (risking incompatibility).".format( + macro_block_size, size[:2], (out_w, out_h) + ) + ) + + # Rather than redirect stderr to a pipe, just set minimal + # output from ffmpeg by default. That way if there are warnings + # the user will see them. + cmd += ["-v", ffmpeg_log_level] + cmd += output_params + cmd.append(path) + cmd_str = " ".join(cmd) + if any( + [level in ffmpeg_log_level for level in ("info", "verbose", "debug", "trace")] + ): + logger.info("RUNNING FFMPEG COMMAND: " + cmd_str) + + # Launch process + p = subprocess.Popen( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=None, + **_popen_kwargs(prevent_sigint=True) + ) + + # Note that directing stderr to a pipe on windows will cause ffmpeg + # to hang if the buffer is not periodically cleared using + # StreamCatcher or other means. + # Setting bufsize to 0 or a small value does not seem to have much effect + # (tried on Windows and Linux). I suspect that ffmpeg buffers + # multiple frames (before encoding in a batch). + + # Init policy by which to terminate ffmpeg. May be set to "kill" later. + stop_policy = "timeout" + if not ffmpeg_timeout: + stop_policy = "wait" + + # ----- Write frames + + # Enter try block directly after opening the process. + # We terminate ffmpeg in the finally clause. + # Generators are automatically closed when they get deleted, + # so the finally block is guaranteed to run. + try: + # Just keep going until the generator.close() is called (raises GeneratorExit). + # This could also happen when the generator is deleted somehow. + nframes = 0 + while True: + # Get frame + bb = yield + + # framesize = size[0] * size[1] * depth * bpp + # assert isinstance(bb, bytes), "Frame must be send as bytes" + # assert len(bb) == framesize, "Frame must have width*height*depth*bpp bytes" + # Actually, we accept anything that can be written to file. + # This e.g. allows writing numpy arrays without having to make a copy ... + + # Write + try: + p.stdin.write(bb) + except Exception as err: + # Show the command and stderr from pipe + msg = ( + "{0:}\n\nFFMPEG COMMAND:\n{1:}\n\nFFMPEG STDERR " + "OUTPUT:\n".format(err, cmd_str) + ) + raise IOError(msg) + + nframes += 1 + + except GeneratorExit: + # Note that GeneratorExit does not inherit from Exception but BaseException + # Detect premature closing + if nframes == 0: + logger.warning("No frames have been written; the written video is invalid.") + + except Exception: + # Normal exceptions fall through + raise + + except BaseException: + # Detect KeyboardInterrupt / SystemExit: don't wait for ffmpeg to quit + stop_policy = "kill" + raise + + finally: + # Make sure that ffmpeg is terminated. + if p.poll() is None: + # Tell ffmpeg that we're done + try: + p.stdin.close() + except Exception as err: # pragma: no cover + logger.warning("Error while attempting stop ffmpeg (w): " + str(err)) + + if stop_policy == "timeout": + # Wait until timeout, produce a warning and kill if it still exists + try: + etime = time.time() + ffmpeg_timeout + while (time.time() < etime) and p.poll() is None: + time.sleep(0.01) + finally: + if p.poll() is None: # pragma: no cover + logger.warning( + "We had to kill ffmpeg to stop it. " + + "Consider increasing ffmpeg_timeout, " + + "or setting it to zero (no timeout)." + ) + p.kill() + + elif stop_policy == "wait": + # Wait forever, kill if it if we're interrupted + try: + while p.poll() is None: + time.sleep(0.01) + finally: # the above can raise e.g. by ctrl-c or systemexit + if p.poll() is None: # pragma: no cover + p.kill() + + else: # stop_policy == "kill": + # Just kill it + p.kill() + # Just to be safe, wrap in try/except + try: + p.stdout.close() + except Exception: + pass diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg/_parsing.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg/_parsing.py new file mode 100644 index 0000000000000000000000000000000000000000..8dd71b82e73dc54c3f1b575f403d632f85e8aa52 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg/_parsing.py @@ -0,0 +1,208 @@ +import re +import threading +import time + +from ._utils import logger + + +class LogCatcher(threading.Thread): + """Thread to keep reading from stderr so that the buffer does not + fill up and stalls the ffmpeg process. On stderr a message is send + on every few frames with some meta information. We only keep the + last ones. + """ + + def __init__(self, file): + self._file = file + self._header = "" + self._lines = [] + self._remainder = b"" + threading.Thread.__init__(self) + self.daemon = True # do not let this thread hold up Python shutdown + self._should_stop = False + self.start() + + def stop_me(self): + self._should_stop = True + + @property + def header(self): + """Get header text. Empty string if the header is not yet parsed.""" + return self._header + + def get_text(self, timeout=0): + """Get the whole text written to stderr so far. To preserve + memory, only the last 50 to 100 frames are kept. + + If a timeout is given, wait for this thread to finish. When + something goes wrong, we stop ffmpeg and want a full report of + stderr, but this thread might need a tiny bit more time. + """ + + # Wait? + if timeout > 0: + etime = time.time() + timeout + while self.is_alive() and time.time() < etime: # pragma: no cover + time.sleep(0.01) + # Return str + lines = b"\n".join(self._lines) + return self._header + "\n" + lines.decode("utf-8", "ignore") + + def run(self): + # Create ref here so it still exists even if Py is shutting down + limit_lines_local = limit_lines + + while not self._should_stop: + time.sleep(0) + # Read one line. Detect when closed, and exit + try: + line = self._file.read(20) + except ValueError: # pragma: no cover + break + if not line: + break + # Process to divide in lines + line = line.replace(b"\r", b"\n").replace(b"\n\n", b"\n") + lines = line.split(b"\n") + lines[0] = self._remainder + lines[0] + self._remainder = lines.pop(-1) + # Process each line + self._lines.extend(lines) + if not self._header: + if get_output_video_line(self._lines): + header = b"\n".join(self._lines) + self._header += header.decode("utf-8", "ignore") + elif self._lines: + self._lines = limit_lines_local(self._lines) + + # Close the file when we're done + # See #61 and #69 + try: + self._file.close() + except Exception: + pass + + +def get_output_video_line(lines): + """Get the line that defines the video stream that ffmpeg outputs, + and which we read. + """ + in_output = False + for line in lines: + sline = line.lstrip() + if sline.startswith(b"Output "): + in_output = True + elif in_output: + if sline.startswith(b"Stream ") and b" Video:" in sline: + return line + + +def limit_lines(lines, N=32): + """When number of lines > 2*N, reduce to N.""" + if len(lines) > 2 * N: + lines = [b"... showing only last few lines ..."] + lines[-N:] + return lines + + +def cvsecs(*args): + """converts a time to second. Either cvsecs(min, secs) or + cvsecs(hours, mins, secs). + """ + if len(args) == 1: + return float(args[0]) + elif len(args) == 2: + return 60 * float(args[0]) + float(args[1]) + elif len(args) == 3: + return 3600 * float(args[0]) + 60 * float(args[1]) + float(args[2]) + + +def parse_ffmpeg_header(text): + lines = text.splitlines() + meta = {} + + # meta["header"] = text # Can enable this for debugging + + # Get version + ver = lines[0].split("version", 1)[-1].split("Copyright")[0] + meta["ffmpeg_version"] = ver.strip() + " " + lines[1].strip() + + # get the output line that speaks about video + videolines = [ + l for l in lines if l.lstrip().startswith("Stream ") and " Video: " in l + ] + + # Codec and pix_fmt hint + line = videolines[0] + meta["codec"] = line.split("Video: ", 1)[-1].lstrip().split(" ", 1)[0].strip() + meta["pix_fmt"] = re.split( + # use a negative lookahead regexp to ignore commas that are contained + # within a parenthesis + # this helps consider a pix_fmt of the kind + # yuv420p(tv, progressive) + # as what it is, instead of erroneously reporting as + # yuv420p(tv + r",\s*(?![^()]*\))", + line.split("Video: ", 1)[-1], + )[1].strip() + + # get the output line that speaks about audio + audiolines = [ + l for l in lines if l.lstrip().startswith("Stream ") and " Audio: " in l + ] + + if len(audiolines) > 0: + audio_line = audiolines[0] + meta["audio_codec"] = ( + audio_line.split("Audio: ", 1)[-1].lstrip().split(" ", 1)[0].strip() + ) + + # get the frame rate. + # matches can be empty, see #171, assume nframes = inf + # the regexp omits values of "1k tbr" which seems a specific edge-case #262 + # it seems that tbr is generally to be preferred #262 + fps = 0 + for line in [videolines[0]]: + matches = re.findall(r" ([0-9]+\.?[0-9]*) (fps)", line) + if matches: + fps = float(matches[0][0].strip()) + meta["fps"] = fps + + # get the size of the original stream, of the form 460x320 (w x h) + line = videolines[0] + match = re.search(" [0-9]*x[0-9]*(,| )", line) + parts = line[match.start() : match.end() - 1].split("x") + meta["source_size"] = tuple(map(int, parts)) + + # get the size of what we receive, of the form 460x320 (w x h) + line = videolines[-1] # Pipe output + match = re.search(" [0-9]*x[0-9]*(,| )", line) + parts = line[match.start() : match.end() - 1].split("x") + meta["size"] = tuple(map(int, parts)) + + # Check the two sizes + if meta["source_size"] != meta["size"]: + logger.warning( + "The frame size for reading {} is " + "different from the source frame size {}.".format( + meta["size"], meta["source_size"] + ) + ) + + # get the rotate metadata + reo_rotate = re.compile(r"rotate\s+:\s([0-9]+)") + match = reo_rotate.search(text) + rotate = 0 + if match is not None: + rotate = match.groups()[0] + meta["rotate"] = int(rotate) + + # get duration (in seconds) + line = [l for l in lines if "Duration: " in l][0] + match = re.search(" [0-9][0-9]:[0-9][0-9]:[0-9][0-9].[0-9][0-9]", line) + duration = 0 + if match is not None: + hms = line[match.start() + 1 : match.end()].split(":") + duration = cvsecs(*hms) + meta["duration"] = duration + + return meta diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg/_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg/_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..be6f916ab5739aec1e2ebf25a82a1fe993011d85 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/imageio_ffmpeg/_utils.py @@ -0,0 +1,127 @@ +import logging +import os +import subprocess +import sys +from functools import lru_cache +import importlib.resources + +from ._definitions import FNAME_PER_PLATFORM, get_platform + +logger = logging.getLogger("imageio_ffmpeg") + + +def get_ffmpeg_exe(): + """ + Get the ffmpeg executable file. This can be the binary defined by + the IMAGEIO_FFMPEG_EXE environment variable, the binary distributed + with imageio-ffmpeg, an ffmpeg binary installed with conda, or the + system ffmpeg (in that order). A RuntimeError is raised if no valid + ffmpeg could be found. + """ + + # 1. Try environment variable. - Dont test it: the user is explicit here! + exe = os.getenv("IMAGEIO_FFMPEG_EXE", None) + if exe: + return exe + + # Auto-detect + exe = _get_ffmpeg_exe() + if exe: + return exe + + # Nothing was found + raise RuntimeError( + "No ffmpeg exe could be found. Install ffmpeg on your system, " + "or set the IMAGEIO_FFMPEG_EXE environment variable." + ) + + +@lru_cache() +def _get_ffmpeg_exe(): + plat = get_platform() + + # 2. Try from here + exe = os.path.join(_get_bin_dir(), FNAME_PER_PLATFORM.get(plat, "")) + if exe and os.path.isfile(exe) and _is_valid_exe(exe): + return exe + + # 3. Try binary from conda package + # (installed e.g. via `conda install ffmpeg -c conda-forge`) + if plat.startswith("win"): + exe = os.path.join(sys.prefix, "Library", "bin", "ffmpeg.exe") + else: + exe = os.path.join(sys.prefix, "bin", "ffmpeg") + if exe and os.path.isfile(exe) and _is_valid_exe(exe): + return exe + + # 4. Try system ffmpeg command + exe = "ffmpeg" + if _is_valid_exe(exe): + return exe + + return None + + +def _get_bin_dir(): + if sys.version_info < (3, 9): + context = importlib.resources.path("imageio_ffmpeg.binaries", "__init__.py") + else: + ref = importlib.resources.files("imageio_ffmpeg.binaries") / "__init__.py" + context = importlib.resources.as_file(ref) + with context as path: + pass + # Return the dir. We assume that the data files are on a normal dir on the fs. + return str(path.parent) + + +def _popen_kwargs(prevent_sigint=False): + startupinfo = None + preexec_fn = None + creationflags = 0 + if sys.platform.startswith("win"): + # Stops executable from flashing on Windows (see #22) + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + if prevent_sigint: + # Prevent propagation of sigint (see #4) + # https://stackoverflow.com/questions/5045771 + if sys.platform.startswith("win"): + creationflags = 0x00000200 + else: + preexec_fn = os.setpgrp # the _pre_exec does not seem to work + + falsy = ("", "0", "false", "no") + if os.getenv("IMAGEIO_FFMPEG_NO_PREVENT_SIGINT", "").lower() not in falsy: + # Unset preexec_fn to work around a strange hang on fork() (see #58) + preexec_fn = None + + return { + "startupinfo": startupinfo, + "creationflags": creationflags, + "preexec_fn": preexec_fn, + } + + +def _is_valid_exe(exe): + cmd = [exe, "-version"] + try: + with open(os.devnull, "w") as null: + subprocess.check_call( + cmd, stdout=null, stderr=subprocess.STDOUT, **_popen_kwargs() + ) + return True + except (OSError, ValueError, subprocess.CalledProcessError): + return False + + +def get_ffmpeg_version(): + """ + Get the version of the used ffmpeg executable (as a string). + """ + exe = get_ffmpeg_exe() + line = subprocess.check_output([exe, "-version"], **_popen_kwargs()).split( + b"\n", 1 + )[0] + line = line.decode(errors="ignore").strip() + version = line.split("version", 1)[-1].lstrip().split(" ", 1)[0].strip() + return version diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3ea37578d0bc12ef92b08b38de7aecb70b95ef9f Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_adapters.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_adapters.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b0fad5a8eafae2379aa63ab0a20fa8272c74039e Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_adapters.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_collections.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_collections.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..71b29107fd21bc0b8d24e9107170d6a81734a9ea Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_collections.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_compat.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_compat.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f42eaeff6dac244c91afb571fa3f02aab5afbab0 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_compat.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_functools.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_functools.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e3f4596199481937e529c48e15438cf9e156d12f Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_functools.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_itertools.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_itertools.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7052058c57590301e7e37a18d28d4ec56137c0c2 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_itertools.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_meta.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_meta.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4740503dbd1178dceb8d9648939e572d5b9972db Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_meta.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_text.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_text.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c6944d93eadd5d51978a3d564f12d1f7db476ebb Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/_text.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/diagnose.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/diagnose.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..037ca3cbf44b171b215fbd4be60b9410aaeea3e8 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/__pycache__/diagnose.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/compat/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/compat/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/compat/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/compat/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f0fcb35e7094ec130785b321d6be726c8d3802ee Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/compat/__pycache__/__init__.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/compat/__pycache__/py311.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/compat/__pycache__/py311.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a806afa6f9a8917ab59a071edb0a42664541d7f8 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/compat/__pycache__/py311.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/compat/__pycache__/py39.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/compat/__pycache__/py39.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bb90ce65467110ea9c232616b639ace84a5ed5f2 Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/compat/__pycache__/py39.cpython-312.pyc differ diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/compat/py311.py b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/compat/py311.py new file mode 100644 index 0000000000000000000000000000000000000000..3a5327436f9b1d9eae371e321c491a270634b3cf --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/compat/py311.py @@ -0,0 +1,22 @@ +import os +import pathlib +import sys +import types + + +def wrap(path): # pragma: no cover + """ + Workaround for https://github.com/python/cpython/issues/84538 + to add backward compatibility for walk_up=True. + An example affected package is dask-labextension, which uses + jupyter-packaging to install JupyterLab javascript files outside + of site-packages. + """ + + def relative_to(root, *, walk_up=False): + return pathlib.Path(os.path.relpath(path, root)) + + return types.SimpleNamespace(relative_to=relative_to) + + +relative_fix = wrap if sys.version_info < (3, 12) else lambda x: x diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/compat/py39.py b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/compat/py39.py new file mode 100644 index 0000000000000000000000000000000000000000..1f15bd97e6aa028d3e86734dd08c0eb5c06d79bc --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/importlib_metadata/compat/py39.py @@ -0,0 +1,36 @@ +""" +Compatibility layer with Python 3.8/3.9 +""" + +from typing import TYPE_CHECKING, Any, Optional + +if TYPE_CHECKING: # pragma: no cover + # Prevent circular imports on runtime. + from .. import Distribution, EntryPoint +else: + Distribution = EntryPoint = Any + + +def normalized_name(dist: Distribution) -> Optional[str]: + """ + Honor name normalization for distributions that don't provide ``_normalized_name``. + """ + try: + return dist._normalized_name + except AttributeError: + from .. import Prepared # -> delay to prevent circular imports. + + return Prepared.normalize(getattr(dist, "name", None) or dist.metadata['Name']) + + +def ep_matches(ep: EntryPoint, **params) -> bool: + """ + Workaround for ``EntryPoint`` objects without the ``matches`` method. + """ + try: + return ep.matches(**params) + except AttributeError: + from .. import EntryPoint # -> delay to prevent circular imports. + + # Reconstruct the EntryPoint object to make sure it is compatible. + return EntryPoint(ep.name, ep.value, ep.group).matches(**params) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a423a3eac16ac550e8e4008d7f5d79401b50e0f --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/__init__.py @@ -0,0 +1,38 @@ +"""Jinja is a template engine written in pure Python. It provides a +non-XML syntax that supports inline expressions and an optional +sandboxed environment. +""" + +from .bccache import BytecodeCache as BytecodeCache +from .bccache import FileSystemBytecodeCache as FileSystemBytecodeCache +from .bccache import MemcachedBytecodeCache as MemcachedBytecodeCache +from .environment import Environment as Environment +from .environment import Template as Template +from .exceptions import TemplateAssertionError as TemplateAssertionError +from .exceptions import TemplateError as TemplateError +from .exceptions import TemplateNotFound as TemplateNotFound +from .exceptions import TemplateRuntimeError as TemplateRuntimeError +from .exceptions import TemplatesNotFound as TemplatesNotFound +from .exceptions import TemplateSyntaxError as TemplateSyntaxError +from .exceptions import UndefinedError as UndefinedError +from .loaders import BaseLoader as BaseLoader +from .loaders import ChoiceLoader as ChoiceLoader +from .loaders import DictLoader as DictLoader +from .loaders import FileSystemLoader as FileSystemLoader +from .loaders import FunctionLoader as FunctionLoader +from .loaders import ModuleLoader as ModuleLoader +from .loaders import PackageLoader as PackageLoader +from .loaders import PrefixLoader as PrefixLoader +from .runtime import ChainableUndefined as ChainableUndefined +from .runtime import DebugUndefined as DebugUndefined +from .runtime import make_logging_undefined as make_logging_undefined +from .runtime import StrictUndefined as StrictUndefined +from .runtime import Undefined as Undefined +from .utils import clear_caches as clear_caches +from .utils import is_undefined as is_undefined +from .utils import pass_context as pass_context +from .utils import pass_environment as pass_environment +from .utils import pass_eval_context as pass_eval_context +from .utils import select_autoescape as select_autoescape + +__version__ = "3.1.6" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/_identifier.py b/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/_identifier.py new file mode 100644 index 0000000000000000000000000000000000000000..928c1503c7d414a8a86bbf5a82c68d42cb089bd2 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/_identifier.py @@ -0,0 +1,6 @@ +import re + +# generated by scripts/generate_identifier_pattern.py +pattern = re.compile( + r"[\w·̀-ͯ·҃-֑҇-ׇֽֿׁׂׅׄؐ-ًؚ-ٰٟۖ-ۜ۟-۪ۤۧۨ-ܑۭܰ-݊ަ-ް߫-߽߳ࠖ-࠙ࠛ-ࠣࠥ-ࠧࠩ-࡙࠭-࡛࣓-ࣣ࣡-ःऺ-़ा-ॏ॑-ॗॢॣঁ-ঃ়া-ৄেৈো-্ৗৢৣ৾ਁ-ਃ਼ਾ-ੂੇੈੋ-੍ੑੰੱੵઁ-ઃ઼ા-ૅે-ૉો-્ૢૣૺ-૿ଁ-ଃ଼ା-ୄେୈୋ-୍ୖୗୢୣஂா-ூெ-ைொ-்ௗఀ-ఄా-ౄె-ైొ-్ౕౖౢౣಁ-ಃ಼ಾ-ೄೆ-ೈೊ-್ೕೖೢೣഀ-ഃ഻഼ാ-ൄെ-ൈൊ-്ൗൢൣංඃ්ා-ුූෘ-ෟෲෳัิ-ฺ็-๎ັິ-ູົຼ່-ໍ༹༘༙༵༷༾༿ཱ-྄྆྇ྍ-ྗྙ-ྼ࿆ါ-ှၖ-ၙၞ-ၠၢ-ၤၧ-ၭၱ-ၴႂ-ႍႏႚ-ႝ፝-፟ᜒ-᜔ᜲ-᜴ᝒᝓᝲᝳ឴-៓៝᠋-᠍ᢅᢆᢩᤠ-ᤫᤰ-᤻ᨗ-ᨛᩕ-ᩞ᩠-᩿᩼᪰-᪽ᬀ-ᬄ᬴-᭄᭫-᭳ᮀ-ᮂᮡ-ᮭ᯦-᯳ᰤ-᰷᳐-᳔᳒-᳨᳭ᳲ-᳴᳷-᳹᷀-᷹᷻-᷿‿⁀⁔⃐-⃥⃜⃡-⃰℘℮⳯-⵿⳱ⷠ-〪ⷿ-゙゚〯꙯ꙴ-꙽ꚞꚟ꛰꛱ꠂ꠆ꠋꠣ-ꠧꢀꢁꢴ-ꣅ꣠-꣱ꣿꤦ-꤭ꥇ-꥓ꦀ-ꦃ꦳-꧀ꧥꨩ-ꨶꩃꩌꩍꩻ-ꩽꪰꪲ-ꪴꪷꪸꪾ꪿꫁ꫫ-ꫯꫵ꫶ꯣ-ꯪ꯬꯭ﬞ︀-️︠-︯︳︴﹍-﹏_𐇽𐋠𐍶-𐍺𐨁-𐨃𐨅𐨆𐨌-𐨏𐨸-𐨿𐨺𐫦𐫥𐴤-𐽆𐴧-𐽐𑀀-𑀂𑀸-𑁆𑁿-𑂂𑂰-𑂺𑄀-𑄂𑄧-𑄴𑅅𑅆𑅳𑆀-𑆂𑆳-𑇀𑇉-𑇌𑈬-𑈷𑈾𑋟-𑋪𑌀-𑌃𑌻𑌼𑌾-𑍄𑍇𑍈𑍋-𑍍𑍗𑍢𑍣𑍦-𑍬𑍰-𑍴𑐵-𑑆𑑞𑒰-𑓃𑖯-𑖵𑖸-𑗀𑗜𑗝𑘰-𑙀𑚫-𑚷𑜝-𑜫𑠬-𑠺𑨁-𑨊𑨳-𑨹𑨻-𑨾𑩇𑩑-𑩛𑪊-𑪙𑰯-𑰶𑰸-𑰿𑲒-𑲧𑲩-𑲶𑴱-𑴶𑴺𑴼𑴽𑴿-𑵅𑵇𑶊-𑶎𑶐𑶑𑶓-𑶗𑻳-𑻶𖫰-𖫴𖬰-𖬶𖽑-𖽾𖾏-𖾒𛲝𛲞𝅥-𝅩𝅭-𝅲𝅻-𝆂𝆅-𝆋𝆪-𝆭𝉂-𝉄𝨀-𝨶𝨻-𝩬𝩵𝪄𝪛-𝪟𝪡-𝪯𞀀-𞀆𞀈-𞀘𞀛-𞀡𞀣𞀤𞀦-𞣐𞀪-𞣖𞥄-𞥊󠄀-󠇯]+" # noqa: B950 +) diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/async_utils.py b/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/async_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f0c140205c50a3df9863ce1ab610b0c62a483f1b --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/async_utils.py @@ -0,0 +1,99 @@ +import inspect +import typing as t +from functools import WRAPPER_ASSIGNMENTS +from functools import wraps + +from .utils import _PassArg +from .utils import pass_eval_context + +if t.TYPE_CHECKING: + import typing_extensions as te + +V = t.TypeVar("V") + + +def async_variant(normal_func): # type: ignore + def decorator(async_func): # type: ignore + pass_arg = _PassArg.from_obj(normal_func) + need_eval_context = pass_arg is None + + if pass_arg is _PassArg.environment: + + def is_async(args: t.Any) -> bool: + return t.cast(bool, args[0].is_async) + + else: + + def is_async(args: t.Any) -> bool: + return t.cast(bool, args[0].environment.is_async) + + # Take the doc and annotations from the sync function, but the + # name from the async function. Pallets-Sphinx-Themes + # build_function_directive expects __wrapped__ to point to the + # sync function. + async_func_attrs = ("__module__", "__name__", "__qualname__") + normal_func_attrs = tuple(set(WRAPPER_ASSIGNMENTS).difference(async_func_attrs)) + + @wraps(normal_func, assigned=normal_func_attrs) + @wraps(async_func, assigned=async_func_attrs, updated=()) + def wrapper(*args, **kwargs): # type: ignore + b = is_async(args) + + if need_eval_context: + args = args[1:] + + if b: + return async_func(*args, **kwargs) + + return normal_func(*args, **kwargs) + + if need_eval_context: + wrapper = pass_eval_context(wrapper) + + wrapper.jinja_async_variant = True # type: ignore[attr-defined] + return wrapper + + return decorator + + +_common_primitives = {int, float, bool, str, list, dict, tuple, type(None)} + + +async def auto_await(value: t.Union[t.Awaitable["V"], "V"]) -> "V": + # Avoid a costly call to isawaitable + if type(value) in _common_primitives: + return t.cast("V", value) + + if inspect.isawaitable(value): + return await t.cast("t.Awaitable[V]", value) + + return value + + +class _IteratorToAsyncIterator(t.Generic[V]): + def __init__(self, iterator: "t.Iterator[V]"): + self._iterator = iterator + + def __aiter__(self) -> "te.Self": + return self + + async def __anext__(self) -> V: + try: + return next(self._iterator) + except StopIteration as e: + raise StopAsyncIteration(e.value) from e + + +def auto_aiter( + iterable: "t.Union[t.AsyncIterable[V], t.Iterable[V]]", +) -> "t.AsyncIterator[V]": + if hasattr(iterable, "__aiter__"): + return iterable.__aiter__() + else: + return _IteratorToAsyncIterator(iter(iterable)) + + +async def auto_to_list( + value: "t.Union[t.AsyncIterable[V], t.Iterable[V]]", +) -> t.List["V"]: + return [x async for x in auto_aiter(value)] diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/bccache.py b/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/bccache.py new file mode 100644 index 0000000000000000000000000000000000000000..ada8b099ff251ea9c6da4c42e1383f37e359f06a --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/bccache.py @@ -0,0 +1,408 @@ +"""The optional bytecode cache system. This is useful if you have very +complex template situations and the compilation of all those templates +slows down your application too much. + +Situations where this is useful are often forking web applications that +are initialized on the first request. +""" + +import errno +import fnmatch +import marshal +import os +import pickle +import stat +import sys +import tempfile +import typing as t +from hashlib import sha1 +from io import BytesIO +from types import CodeType + +if t.TYPE_CHECKING: + import typing_extensions as te + + from .environment import Environment + + class _MemcachedClient(te.Protocol): + def get(self, key: str) -> bytes: ... + + def set( + self, key: str, value: bytes, timeout: t.Optional[int] = None + ) -> None: ... + + +bc_version = 5 +# Magic bytes to identify Jinja bytecode cache files. Contains the +# Python major and minor version to avoid loading incompatible bytecode +# if a project upgrades its Python version. +bc_magic = ( + b"j2" + + pickle.dumps(bc_version, 2) + + pickle.dumps((sys.version_info[0] << 24) | sys.version_info[1], 2) +) + + +class Bucket: + """Buckets are used to store the bytecode for one template. It's created + and initialized by the bytecode cache and passed to the loading functions. + + The buckets get an internal checksum from the cache assigned and use this + to automatically reject outdated cache material. Individual bytecode + cache subclasses don't have to care about cache invalidation. + """ + + def __init__(self, environment: "Environment", key: str, checksum: str) -> None: + self.environment = environment + self.key = key + self.checksum = checksum + self.reset() + + def reset(self) -> None: + """Resets the bucket (unloads the bytecode).""" + self.code: t.Optional[CodeType] = None + + def load_bytecode(self, f: t.BinaryIO) -> None: + """Loads bytecode from a file or file like object.""" + # make sure the magic header is correct + magic = f.read(len(bc_magic)) + if magic != bc_magic: + self.reset() + return + # the source code of the file changed, we need to reload + checksum = pickle.load(f) + if self.checksum != checksum: + self.reset() + return + # if marshal_load fails then we need to reload + try: + self.code = marshal.load(f) + except (EOFError, ValueError, TypeError): + self.reset() + return + + def write_bytecode(self, f: t.IO[bytes]) -> None: + """Dump the bytecode into the file or file like object passed.""" + if self.code is None: + raise TypeError("can't write empty bucket") + f.write(bc_magic) + pickle.dump(self.checksum, f, 2) + marshal.dump(self.code, f) + + def bytecode_from_string(self, string: bytes) -> None: + """Load bytecode from bytes.""" + self.load_bytecode(BytesIO(string)) + + def bytecode_to_string(self) -> bytes: + """Return the bytecode as bytes.""" + out = BytesIO() + self.write_bytecode(out) + return out.getvalue() + + +class BytecodeCache: + """To implement your own bytecode cache you have to subclass this class + and override :meth:`load_bytecode` and :meth:`dump_bytecode`. Both of + these methods are passed a :class:`~jinja2.bccache.Bucket`. + + A very basic bytecode cache that saves the bytecode on the file system:: + + from os import path + + class MyCache(BytecodeCache): + + def __init__(self, directory): + self.directory = directory + + def load_bytecode(self, bucket): + filename = path.join(self.directory, bucket.key) + if path.exists(filename): + with open(filename, 'rb') as f: + bucket.load_bytecode(f) + + def dump_bytecode(self, bucket): + filename = path.join(self.directory, bucket.key) + with open(filename, 'wb') as f: + bucket.write_bytecode(f) + + A more advanced version of a filesystem based bytecode cache is part of + Jinja. + """ + + def load_bytecode(self, bucket: Bucket) -> None: + """Subclasses have to override this method to load bytecode into a + bucket. If they are not able to find code in the cache for the + bucket, it must not do anything. + """ + raise NotImplementedError() + + def dump_bytecode(self, bucket: Bucket) -> None: + """Subclasses have to override this method to write the bytecode + from a bucket back to the cache. If it unable to do so it must not + fail silently but raise an exception. + """ + raise NotImplementedError() + + def clear(self) -> None: + """Clears the cache. This method is not used by Jinja but should be + implemented to allow applications to clear the bytecode cache used + by a particular environment. + """ + + def get_cache_key( + self, name: str, filename: t.Optional[t.Union[str]] = None + ) -> str: + """Returns the unique hash key for this template name.""" + hash = sha1(name.encode("utf-8")) + + if filename is not None: + hash.update(f"|{filename}".encode()) + + return hash.hexdigest() + + def get_source_checksum(self, source: str) -> str: + """Returns a checksum for the source.""" + return sha1(source.encode("utf-8")).hexdigest() + + def get_bucket( + self, + environment: "Environment", + name: str, + filename: t.Optional[str], + source: str, + ) -> Bucket: + """Return a cache bucket for the given template. All arguments are + mandatory but filename may be `None`. + """ + key = self.get_cache_key(name, filename) + checksum = self.get_source_checksum(source) + bucket = Bucket(environment, key, checksum) + self.load_bytecode(bucket) + return bucket + + def set_bucket(self, bucket: Bucket) -> None: + """Put the bucket into the cache.""" + self.dump_bytecode(bucket) + + +class FileSystemBytecodeCache(BytecodeCache): + """A bytecode cache that stores bytecode on the filesystem. It accepts + two arguments: The directory where the cache items are stored and a + pattern string that is used to build the filename. + + If no directory is specified a default cache directory is selected. On + Windows the user's temp directory is used, on UNIX systems a directory + is created for the user in the system temp directory. + + The pattern can be used to have multiple separate caches operate on the + same directory. The default pattern is ``'__jinja2_%s.cache'``. ``%s`` + is replaced with the cache key. + + >>> bcc = FileSystemBytecodeCache('/tmp/jinja_cache', '%s.cache') + + This bytecode cache supports clearing of the cache using the clear method. + """ + + def __init__( + self, directory: t.Optional[str] = None, pattern: str = "__jinja2_%s.cache" + ) -> None: + if directory is None: + directory = self._get_default_cache_dir() + self.directory = directory + self.pattern = pattern + + def _get_default_cache_dir(self) -> str: + def _unsafe_dir() -> "te.NoReturn": + raise RuntimeError( + "Cannot determine safe temp directory. You " + "need to explicitly provide one." + ) + + tmpdir = tempfile.gettempdir() + + # On windows the temporary directory is used specific unless + # explicitly forced otherwise. We can just use that. + if os.name == "nt": + return tmpdir + if not hasattr(os, "getuid"): + _unsafe_dir() + + dirname = f"_jinja2-cache-{os.getuid()}" + actual_dir = os.path.join(tmpdir, dirname) + + try: + os.mkdir(actual_dir, stat.S_IRWXU) + except OSError as e: + if e.errno != errno.EEXIST: + raise + try: + os.chmod(actual_dir, stat.S_IRWXU) + actual_dir_stat = os.lstat(actual_dir) + if ( + actual_dir_stat.st_uid != os.getuid() + or not stat.S_ISDIR(actual_dir_stat.st_mode) + or stat.S_IMODE(actual_dir_stat.st_mode) != stat.S_IRWXU + ): + _unsafe_dir() + except OSError as e: + if e.errno != errno.EEXIST: + raise + + actual_dir_stat = os.lstat(actual_dir) + if ( + actual_dir_stat.st_uid != os.getuid() + or not stat.S_ISDIR(actual_dir_stat.st_mode) + or stat.S_IMODE(actual_dir_stat.st_mode) != stat.S_IRWXU + ): + _unsafe_dir() + + return actual_dir + + def _get_cache_filename(self, bucket: Bucket) -> str: + return os.path.join(self.directory, self.pattern % (bucket.key,)) + + def load_bytecode(self, bucket: Bucket) -> None: + filename = self._get_cache_filename(bucket) + + # Don't test for existence before opening the file, since the + # file could disappear after the test before the open. + try: + f = open(filename, "rb") + except (FileNotFoundError, IsADirectoryError, PermissionError): + # PermissionError can occur on Windows when an operation is + # in progress, such as calling clear(). + return + + with f: + bucket.load_bytecode(f) + + def dump_bytecode(self, bucket: Bucket) -> None: + # Write to a temporary file, then rename to the real name after + # writing. This avoids another process reading the file before + # it is fully written. + name = self._get_cache_filename(bucket) + f = tempfile.NamedTemporaryFile( + mode="wb", + dir=os.path.dirname(name), + prefix=os.path.basename(name), + suffix=".tmp", + delete=False, + ) + + def remove_silent() -> None: + try: + os.remove(f.name) + except OSError: + # Another process may have called clear(). On Windows, + # another program may be holding the file open. + pass + + try: + with f: + bucket.write_bytecode(f) + except BaseException: + remove_silent() + raise + + try: + os.replace(f.name, name) + except OSError: + # Another process may have called clear(). On Windows, + # another program may be holding the file open. + remove_silent() + except BaseException: + remove_silent() + raise + + def clear(self) -> None: + # imported lazily here because google app-engine doesn't support + # write access on the file system and the function does not exist + # normally. + from os import remove + + files = fnmatch.filter(os.listdir(self.directory), self.pattern % ("*",)) + for filename in files: + try: + remove(os.path.join(self.directory, filename)) + except OSError: + pass + + +class MemcachedBytecodeCache(BytecodeCache): + """This class implements a bytecode cache that uses a memcache cache for + storing the information. It does not enforce a specific memcache library + (tummy's memcache or cmemcache) but will accept any class that provides + the minimal interface required. + + Libraries compatible with this class: + + - `cachelib `_ + - `python-memcached `_ + + (Unfortunately the django cache interface is not compatible because it + does not support storing binary data, only text. You can however pass + the underlying cache client to the bytecode cache which is available + as `django.core.cache.cache._client`.) + + The minimal interface for the client passed to the constructor is this: + + .. class:: MinimalClientInterface + + .. method:: set(key, value[, timeout]) + + Stores the bytecode in the cache. `value` is a string and + `timeout` the timeout of the key. If timeout is not provided + a default timeout or no timeout should be assumed, if it's + provided it's an integer with the number of seconds the cache + item should exist. + + .. method:: get(key) + + Returns the value for the cache key. If the item does not + exist in the cache the return value must be `None`. + + The other arguments to the constructor are the prefix for all keys that + is added before the actual cache key and the timeout for the bytecode in + the cache system. We recommend a high (or no) timeout. + + This bytecode cache does not support clearing of used items in the cache. + The clear method is a no-operation function. + + .. versionadded:: 2.7 + Added support for ignoring memcache errors through the + `ignore_memcache_errors` parameter. + """ + + def __init__( + self, + client: "_MemcachedClient", + prefix: str = "jinja2/bytecode/", + timeout: t.Optional[int] = None, + ignore_memcache_errors: bool = True, + ): + self.client = client + self.prefix = prefix + self.timeout = timeout + self.ignore_memcache_errors = ignore_memcache_errors + + def load_bytecode(self, bucket: Bucket) -> None: + try: + code = self.client.get(self.prefix + bucket.key) + except Exception: + if not self.ignore_memcache_errors: + raise + else: + bucket.bytecode_from_string(code) + + def dump_bytecode(self, bucket: Bucket) -> None: + key = self.prefix + bucket.key + value = bucket.bytecode_to_string() + + try: + if self.timeout is not None: + self.client.set(key, value, self.timeout) + else: + self.client.set(key, value) + except Exception: + if not self.ignore_memcache_errors: + raise diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/compiler.py b/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/compiler.py new file mode 100644 index 0000000000000000000000000000000000000000..a4ff6a1b11af3e1a868d1a74c48d842390259b43 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/compiler.py @@ -0,0 +1,1998 @@ +"""Compiles nodes from the parser into Python code.""" + +import typing as t +from contextlib import contextmanager +from functools import update_wrapper +from io import StringIO +from itertools import chain +from keyword import iskeyword as is_python_keyword + +from markupsafe import escape +from markupsafe import Markup + +from . import nodes +from .exceptions import TemplateAssertionError +from .idtracking import Symbols +from .idtracking import VAR_LOAD_ALIAS +from .idtracking import VAR_LOAD_PARAMETER +from .idtracking import VAR_LOAD_RESOLVE +from .idtracking import VAR_LOAD_UNDEFINED +from .nodes import EvalContext +from .optimizer import Optimizer +from .utils import _PassArg +from .utils import concat +from .visitor import NodeVisitor + +if t.TYPE_CHECKING: + import typing_extensions as te + + from .environment import Environment + +F = t.TypeVar("F", bound=t.Callable[..., t.Any]) + +operators = { + "eq": "==", + "ne": "!=", + "gt": ">", + "gteq": ">=", + "lt": "<", + "lteq": "<=", + "in": "in", + "notin": "not in", +} + + +def optimizeconst(f: F) -> F: + def new_func( + self: "CodeGenerator", node: nodes.Expr, frame: "Frame", **kwargs: t.Any + ) -> t.Any: + # Only optimize if the frame is not volatile + if self.optimizer is not None and not frame.eval_ctx.volatile: + new_node = self.optimizer.visit(node, frame.eval_ctx) + + if new_node != node: + return self.visit(new_node, frame) + + return f(self, node, frame, **kwargs) + + return update_wrapper(new_func, f) # type: ignore[return-value] + + +def _make_binop(op: str) -> t.Callable[["CodeGenerator", nodes.BinExpr, "Frame"], None]: + @optimizeconst + def visitor(self: "CodeGenerator", node: nodes.BinExpr, frame: Frame) -> None: + if ( + self.environment.sandboxed and op in self.environment.intercepted_binops # type: ignore + ): + self.write(f"environment.call_binop(context, {op!r}, ") + self.visit(node.left, frame) + self.write(", ") + self.visit(node.right, frame) + else: + self.write("(") + self.visit(node.left, frame) + self.write(f" {op} ") + self.visit(node.right, frame) + + self.write(")") + + return visitor + + +def _make_unop( + op: str, +) -> t.Callable[["CodeGenerator", nodes.UnaryExpr, "Frame"], None]: + @optimizeconst + def visitor(self: "CodeGenerator", node: nodes.UnaryExpr, frame: Frame) -> None: + if ( + self.environment.sandboxed and op in self.environment.intercepted_unops # type: ignore + ): + self.write(f"environment.call_unop(context, {op!r}, ") + self.visit(node.node, frame) + else: + self.write("(" + op) + self.visit(node.node, frame) + + self.write(")") + + return visitor + + +def generate( + node: nodes.Template, + environment: "Environment", + name: t.Optional[str], + filename: t.Optional[str], + stream: t.Optional[t.TextIO] = None, + defer_init: bool = False, + optimized: bool = True, +) -> t.Optional[str]: + """Generate the python source for a node tree.""" + if not isinstance(node, nodes.Template): + raise TypeError("Can't compile non template nodes") + + generator = environment.code_generator_class( + environment, name, filename, stream, defer_init, optimized + ) + generator.visit(node) + + if stream is None: + return generator.stream.getvalue() # type: ignore + + return None + + +def has_safe_repr(value: t.Any) -> bool: + """Does the node have a safe representation?""" + if value is None or value is NotImplemented or value is Ellipsis: + return True + + if type(value) in {bool, int, float, complex, range, str, Markup}: + return True + + if type(value) in {tuple, list, set, frozenset}: + return all(has_safe_repr(v) for v in value) + + if type(value) is dict: # noqa E721 + return all(has_safe_repr(k) and has_safe_repr(v) for k, v in value.items()) + + return False + + +def find_undeclared( + nodes: t.Iterable[nodes.Node], names: t.Iterable[str] +) -> t.Set[str]: + """Check if the names passed are accessed undeclared. The return value + is a set of all the undeclared names from the sequence of names found. + """ + visitor = UndeclaredNameVisitor(names) + try: + for node in nodes: + visitor.visit(node) + except VisitorExit: + pass + return visitor.undeclared + + +class MacroRef: + def __init__(self, node: t.Union[nodes.Macro, nodes.CallBlock]) -> None: + self.node = node + self.accesses_caller = False + self.accesses_kwargs = False + self.accesses_varargs = False + + +class Frame: + """Holds compile time information for us.""" + + def __init__( + self, + eval_ctx: EvalContext, + parent: t.Optional["Frame"] = None, + level: t.Optional[int] = None, + ) -> None: + self.eval_ctx = eval_ctx + + # the parent of this frame + self.parent = parent + + if parent is None: + self.symbols = Symbols(level=level) + + # in some dynamic inheritance situations the compiler needs to add + # write tests around output statements. + self.require_output_check = False + + # inside some tags we are using a buffer rather than yield statements. + # this for example affects {% filter %} or {% macro %}. If a frame + # is buffered this variable points to the name of the list used as + # buffer. + self.buffer: t.Optional[str] = None + + # the name of the block we're in, otherwise None. + self.block: t.Optional[str] = None + + else: + self.symbols = Symbols(parent.symbols, level=level) + self.require_output_check = parent.require_output_check + self.buffer = parent.buffer + self.block = parent.block + + # a toplevel frame is the root + soft frames such as if conditions. + self.toplevel = False + + # the root frame is basically just the outermost frame, so no if + # conditions. This information is used to optimize inheritance + # situations. + self.rootlevel = False + + # variables set inside of loops and blocks should not affect outer frames, + # but they still needs to be kept track of as part of the active context. + self.loop_frame = False + self.block_frame = False + + # track whether the frame is being used in an if-statement or conditional + # expression as it determines which errors should be raised during runtime + # or compile time. + self.soft_frame = False + + def copy(self) -> "te.Self": + """Create a copy of the current one.""" + rv = object.__new__(self.__class__) + rv.__dict__.update(self.__dict__) + rv.symbols = self.symbols.copy() + return rv + + def inner(self, isolated: bool = False) -> "Frame": + """Return an inner frame.""" + if isolated: + return Frame(self.eval_ctx, level=self.symbols.level + 1) + return Frame(self.eval_ctx, self) + + def soft(self) -> "te.Self": + """Return a soft frame. A soft frame may not be modified as + standalone thing as it shares the resources with the frame it + was created of, but it's not a rootlevel frame any longer. + + This is only used to implement if-statements and conditional + expressions. + """ + rv = self.copy() + rv.rootlevel = False + rv.soft_frame = True + return rv + + __copy__ = copy + + +class VisitorExit(RuntimeError): + """Exception used by the `UndeclaredNameVisitor` to signal a stop.""" + + +class DependencyFinderVisitor(NodeVisitor): + """A visitor that collects filter and test calls.""" + + def __init__(self) -> None: + self.filters: t.Set[str] = set() + self.tests: t.Set[str] = set() + + def visit_Filter(self, node: nodes.Filter) -> None: + self.generic_visit(node) + self.filters.add(node.name) + + def visit_Test(self, node: nodes.Test) -> None: + self.generic_visit(node) + self.tests.add(node.name) + + def visit_Block(self, node: nodes.Block) -> None: + """Stop visiting at blocks.""" + + +class UndeclaredNameVisitor(NodeVisitor): + """A visitor that checks if a name is accessed without being + declared. This is different from the frame visitor as it will + not stop at closure frames. + """ + + def __init__(self, names: t.Iterable[str]) -> None: + self.names = set(names) + self.undeclared: t.Set[str] = set() + + def visit_Name(self, node: nodes.Name) -> None: + if node.ctx == "load" and node.name in self.names: + self.undeclared.add(node.name) + if self.undeclared == self.names: + raise VisitorExit() + else: + self.names.discard(node.name) + + def visit_Block(self, node: nodes.Block) -> None: + """Stop visiting a blocks.""" + + +class CompilerExit(Exception): + """Raised if the compiler encountered a situation where it just + doesn't make sense to further process the code. Any block that + raises such an exception is not further processed. + """ + + +class CodeGenerator(NodeVisitor): + def __init__( + self, + environment: "Environment", + name: t.Optional[str], + filename: t.Optional[str], + stream: t.Optional[t.TextIO] = None, + defer_init: bool = False, + optimized: bool = True, + ) -> None: + if stream is None: + stream = StringIO() + self.environment = environment + self.name = name + self.filename = filename + self.stream = stream + self.created_block_context = False + self.defer_init = defer_init + self.optimizer: t.Optional[Optimizer] = None + + if optimized: + self.optimizer = Optimizer(environment) + + # aliases for imports + self.import_aliases: t.Dict[str, str] = {} + + # a registry for all blocks. Because blocks are moved out + # into the global python scope they are registered here + self.blocks: t.Dict[str, nodes.Block] = {} + + # the number of extends statements so far + self.extends_so_far = 0 + + # some templates have a rootlevel extends. In this case we + # can safely assume that we're a child template and do some + # more optimizations. + self.has_known_extends = False + + # the current line number + self.code_lineno = 1 + + # registry of all filters and tests (global, not block local) + self.tests: t.Dict[str, str] = {} + self.filters: t.Dict[str, str] = {} + + # the debug information + self.debug_info: t.List[t.Tuple[int, int]] = [] + self._write_debug_info: t.Optional[int] = None + + # the number of new lines before the next write() + self._new_lines = 0 + + # the line number of the last written statement + self._last_line = 0 + + # true if nothing was written so far. + self._first_write = True + + # used by the `temporary_identifier` method to get new + # unique, temporary identifier + self._last_identifier = 0 + + # the current indentation + self._indentation = 0 + + # Tracks toplevel assignments + self._assign_stack: t.List[t.Set[str]] = [] + + # Tracks parameter definition blocks + self._param_def_block: t.List[t.Set[str]] = [] + + # Tracks the current context. + self._context_reference_stack = ["context"] + + @property + def optimized(self) -> bool: + return self.optimizer is not None + + # -- Various compilation helpers + + def fail(self, msg: str, lineno: int) -> "te.NoReturn": + """Fail with a :exc:`TemplateAssertionError`.""" + raise TemplateAssertionError(msg, lineno, self.name, self.filename) + + def temporary_identifier(self) -> str: + """Get a new unique identifier.""" + self._last_identifier += 1 + return f"t_{self._last_identifier}" + + def buffer(self, frame: Frame) -> None: + """Enable buffering for the frame from that point onwards.""" + frame.buffer = self.temporary_identifier() + self.writeline(f"{frame.buffer} = []") + + def return_buffer_contents( + self, frame: Frame, force_unescaped: bool = False + ) -> None: + """Return the buffer contents of the frame.""" + if not force_unescaped: + if frame.eval_ctx.volatile: + self.writeline("if context.eval_ctx.autoescape:") + self.indent() + self.writeline(f"return Markup(concat({frame.buffer}))") + self.outdent() + self.writeline("else:") + self.indent() + self.writeline(f"return concat({frame.buffer})") + self.outdent() + return + elif frame.eval_ctx.autoescape: + self.writeline(f"return Markup(concat({frame.buffer}))") + return + self.writeline(f"return concat({frame.buffer})") + + def indent(self) -> None: + """Indent by one.""" + self._indentation += 1 + + def outdent(self, step: int = 1) -> None: + """Outdent by step.""" + self._indentation -= step + + def start_write(self, frame: Frame, node: t.Optional[nodes.Node] = None) -> None: + """Yield or write into the frame buffer.""" + if frame.buffer is None: + self.writeline("yield ", node) + else: + self.writeline(f"{frame.buffer}.append(", node) + + def end_write(self, frame: Frame) -> None: + """End the writing process started by `start_write`.""" + if frame.buffer is not None: + self.write(")") + + def simple_write( + self, s: str, frame: Frame, node: t.Optional[nodes.Node] = None + ) -> None: + """Simple shortcut for start_write + write + end_write.""" + self.start_write(frame, node) + self.write(s) + self.end_write(frame) + + def blockvisit(self, nodes: t.Iterable[nodes.Node], frame: Frame) -> None: + """Visit a list of nodes as block in a frame. If the current frame + is no buffer a dummy ``if 0: yield None`` is written automatically. + """ + try: + self.writeline("pass") + for node in nodes: + self.visit(node, frame) + except CompilerExit: + pass + + def write(self, x: str) -> None: + """Write a string into the output stream.""" + if self._new_lines: + if not self._first_write: + self.stream.write("\n" * self._new_lines) + self.code_lineno += self._new_lines + if self._write_debug_info is not None: + self.debug_info.append((self._write_debug_info, self.code_lineno)) + self._write_debug_info = None + self._first_write = False + self.stream.write(" " * self._indentation) + self._new_lines = 0 + self.stream.write(x) + + def writeline( + self, x: str, node: t.Optional[nodes.Node] = None, extra: int = 0 + ) -> None: + """Combination of newline and write.""" + self.newline(node, extra) + self.write(x) + + def newline(self, node: t.Optional[nodes.Node] = None, extra: int = 0) -> None: + """Add one or more newlines before the next write.""" + self._new_lines = max(self._new_lines, 1 + extra) + if node is not None and node.lineno != self._last_line: + self._write_debug_info = node.lineno + self._last_line = node.lineno + + def signature( + self, + node: t.Union[nodes.Call, nodes.Filter, nodes.Test], + frame: Frame, + extra_kwargs: t.Optional[t.Mapping[str, t.Any]] = None, + ) -> None: + """Writes a function call to the stream for the current node. + A leading comma is added automatically. The extra keyword + arguments may not include python keywords otherwise a syntax + error could occur. The extra keyword arguments should be given + as python dict. + """ + # if any of the given keyword arguments is a python keyword + # we have to make sure that no invalid call is created. + kwarg_workaround = any( + is_python_keyword(t.cast(str, k)) + for k in chain((x.key for x in node.kwargs), extra_kwargs or ()) + ) + + for arg in node.args: + self.write(", ") + self.visit(arg, frame) + + if not kwarg_workaround: + for kwarg in node.kwargs: + self.write(", ") + self.visit(kwarg, frame) + if extra_kwargs is not None: + for key, value in extra_kwargs.items(): + self.write(f", {key}={value}") + if node.dyn_args: + self.write(", *") + self.visit(node.dyn_args, frame) + + if kwarg_workaround: + if node.dyn_kwargs is not None: + self.write(", **dict({") + else: + self.write(", **{") + for kwarg in node.kwargs: + self.write(f"{kwarg.key!r}: ") + self.visit(kwarg.value, frame) + self.write(", ") + if extra_kwargs is not None: + for key, value in extra_kwargs.items(): + self.write(f"{key!r}: {value}, ") + if node.dyn_kwargs is not None: + self.write("}, **") + self.visit(node.dyn_kwargs, frame) + self.write(")") + else: + self.write("}") + + elif node.dyn_kwargs is not None: + self.write(", **") + self.visit(node.dyn_kwargs, frame) + + def pull_dependencies(self, nodes: t.Iterable[nodes.Node]) -> None: + """Find all filter and test names used in the template and + assign them to variables in the compiled namespace. Checking + that the names are registered with the environment is done when + compiling the Filter and Test nodes. If the node is in an If or + CondExpr node, the check is done at runtime instead. + + .. versionchanged:: 3.0 + Filters and tests in If and CondExpr nodes are checked at + runtime instead of compile time. + """ + visitor = DependencyFinderVisitor() + + for node in nodes: + visitor.visit(node) + + for id_map, names, dependency in ( + (self.filters, visitor.filters, "filters"), + ( + self.tests, + visitor.tests, + "tests", + ), + ): + for name in sorted(names): + if name not in id_map: + id_map[name] = self.temporary_identifier() + + # add check during runtime that dependencies used inside of executed + # blocks are defined, as this step may be skipped during compile time + self.writeline("try:") + self.indent() + self.writeline(f"{id_map[name]} = environment.{dependency}[{name!r}]") + self.outdent() + self.writeline("except KeyError:") + self.indent() + self.writeline("@internalcode") + self.writeline(f"def {id_map[name]}(*unused):") + self.indent() + self.writeline( + f'raise TemplateRuntimeError("No {dependency[:-1]}' + f' named {name!r} found.")' + ) + self.outdent() + self.outdent() + + def enter_frame(self, frame: Frame) -> None: + undefs = [] + for target, (action, param) in frame.symbols.loads.items(): + if action == VAR_LOAD_PARAMETER: + pass + elif action == VAR_LOAD_RESOLVE: + self.writeline(f"{target} = {self.get_resolve_func()}({param!r})") + elif action == VAR_LOAD_ALIAS: + self.writeline(f"{target} = {param}") + elif action == VAR_LOAD_UNDEFINED: + undefs.append(target) + else: + raise NotImplementedError("unknown load instruction") + if undefs: + self.writeline(f"{' = '.join(undefs)} = missing") + + def leave_frame(self, frame: Frame, with_python_scope: bool = False) -> None: + if not with_python_scope: + undefs = [] + for target in frame.symbols.loads: + undefs.append(target) + if undefs: + self.writeline(f"{' = '.join(undefs)} = missing") + + def choose_async(self, async_value: str = "async ", sync_value: str = "") -> str: + return async_value if self.environment.is_async else sync_value + + def func(self, name: str) -> str: + return f"{self.choose_async()}def {name}" + + def macro_body( + self, node: t.Union[nodes.Macro, nodes.CallBlock], frame: Frame + ) -> t.Tuple[Frame, MacroRef]: + """Dump the function def of a macro or call block.""" + frame = frame.inner() + frame.symbols.analyze_node(node) + macro_ref = MacroRef(node) + + explicit_caller = None + skip_special_params = set() + args = [] + + for idx, arg in enumerate(node.args): + if arg.name == "caller": + explicit_caller = idx + if arg.name in ("kwargs", "varargs"): + skip_special_params.add(arg.name) + args.append(frame.symbols.ref(arg.name)) + + undeclared = find_undeclared(node.body, ("caller", "kwargs", "varargs")) + + if "caller" in undeclared: + # In older Jinja versions there was a bug that allowed caller + # to retain the special behavior even if it was mentioned in + # the argument list. However thankfully this was only really + # working if it was the last argument. So we are explicitly + # checking this now and error out if it is anywhere else in + # the argument list. + if explicit_caller is not None: + try: + node.defaults[explicit_caller - len(node.args)] + except IndexError: + self.fail( + "When defining macros or call blocks the " + 'special "caller" argument must be omitted ' + "or be given a default.", + node.lineno, + ) + else: + args.append(frame.symbols.declare_parameter("caller")) + macro_ref.accesses_caller = True + if "kwargs" in undeclared and "kwargs" not in skip_special_params: + args.append(frame.symbols.declare_parameter("kwargs")) + macro_ref.accesses_kwargs = True + if "varargs" in undeclared and "varargs" not in skip_special_params: + args.append(frame.symbols.declare_parameter("varargs")) + macro_ref.accesses_varargs = True + + # macros are delayed, they never require output checks + frame.require_output_check = False + frame.symbols.analyze_node(node) + self.writeline(f"{self.func('macro')}({', '.join(args)}):", node) + self.indent() + + self.buffer(frame) + self.enter_frame(frame) + + self.push_parameter_definitions(frame) + for idx, arg in enumerate(node.args): + ref = frame.symbols.ref(arg.name) + self.writeline(f"if {ref} is missing:") + self.indent() + try: + default = node.defaults[idx - len(node.args)] + except IndexError: + self.writeline( + f'{ref} = undefined("parameter {arg.name!r} was not provided",' + f" name={arg.name!r})" + ) + else: + self.writeline(f"{ref} = ") + self.visit(default, frame) + self.mark_parameter_stored(ref) + self.outdent() + self.pop_parameter_definitions() + + self.blockvisit(node.body, frame) + self.return_buffer_contents(frame, force_unescaped=True) + self.leave_frame(frame, with_python_scope=True) + self.outdent() + + return frame, macro_ref + + def macro_def(self, macro_ref: MacroRef, frame: Frame) -> None: + """Dump the macro definition for the def created by macro_body.""" + arg_tuple = ", ".join(repr(x.name) for x in macro_ref.node.args) + name = getattr(macro_ref.node, "name", None) + if len(macro_ref.node.args) == 1: + arg_tuple += "," + self.write( + f"Macro(environment, macro, {name!r}, ({arg_tuple})," + f" {macro_ref.accesses_kwargs!r}, {macro_ref.accesses_varargs!r}," + f" {macro_ref.accesses_caller!r}, context.eval_ctx.autoescape)" + ) + + def position(self, node: nodes.Node) -> str: + """Return a human readable position for the node.""" + rv = f"line {node.lineno}" + if self.name is not None: + rv = f"{rv} in {self.name!r}" + return rv + + def dump_local_context(self, frame: Frame) -> str: + items_kv = ", ".join( + f"{name!r}: {target}" + for name, target in frame.symbols.dump_stores().items() + ) + return f"{{{items_kv}}}" + + def write_commons(self) -> None: + """Writes a common preamble that is used by root and block functions. + Primarily this sets up common local helpers and enforces a generator + through a dead branch. + """ + self.writeline("resolve = context.resolve_or_missing") + self.writeline("undefined = environment.undefined") + self.writeline("concat = environment.concat") + # always use the standard Undefined class for the implicit else of + # conditional expressions + self.writeline("cond_expr_undefined = Undefined") + self.writeline("if 0: yield None") + + def push_parameter_definitions(self, frame: Frame) -> None: + """Pushes all parameter targets from the given frame into a local + stack that permits tracking of yet to be assigned parameters. In + particular this enables the optimization from `visit_Name` to skip + undefined expressions for parameters in macros as macros can reference + otherwise unbound parameters. + """ + self._param_def_block.append(frame.symbols.dump_param_targets()) + + def pop_parameter_definitions(self) -> None: + """Pops the current parameter definitions set.""" + self._param_def_block.pop() + + def mark_parameter_stored(self, target: str) -> None: + """Marks a parameter in the current parameter definitions as stored. + This will skip the enforced undefined checks. + """ + if self._param_def_block: + self._param_def_block[-1].discard(target) + + def push_context_reference(self, target: str) -> None: + self._context_reference_stack.append(target) + + def pop_context_reference(self) -> None: + self._context_reference_stack.pop() + + def get_context_ref(self) -> str: + return self._context_reference_stack[-1] + + def get_resolve_func(self) -> str: + target = self._context_reference_stack[-1] + if target == "context": + return "resolve" + return f"{target}.resolve" + + def derive_context(self, frame: Frame) -> str: + return f"{self.get_context_ref()}.derived({self.dump_local_context(frame)})" + + def parameter_is_undeclared(self, target: str) -> bool: + """Checks if a given target is an undeclared parameter.""" + if not self._param_def_block: + return False + return target in self._param_def_block[-1] + + def push_assign_tracking(self) -> None: + """Pushes a new layer for assignment tracking.""" + self._assign_stack.append(set()) + + def pop_assign_tracking(self, frame: Frame) -> None: + """Pops the topmost level for assignment tracking and updates the + context variables if necessary. + """ + vars = self._assign_stack.pop() + if ( + not frame.block_frame + and not frame.loop_frame + and not frame.toplevel + or not vars + ): + return + public_names = [x for x in vars if x[:1] != "_"] + if len(vars) == 1: + name = next(iter(vars)) + ref = frame.symbols.ref(name) + if frame.loop_frame: + self.writeline(f"_loop_vars[{name!r}] = {ref}") + return + if frame.block_frame: + self.writeline(f"_block_vars[{name!r}] = {ref}") + return + self.writeline(f"context.vars[{name!r}] = {ref}") + else: + if frame.loop_frame: + self.writeline("_loop_vars.update({") + elif frame.block_frame: + self.writeline("_block_vars.update({") + else: + self.writeline("context.vars.update({") + for idx, name in enumerate(sorted(vars)): + if idx: + self.write(", ") + ref = frame.symbols.ref(name) + self.write(f"{name!r}: {ref}") + self.write("})") + if not frame.block_frame and not frame.loop_frame and public_names: + if len(public_names) == 1: + self.writeline(f"context.exported_vars.add({public_names[0]!r})") + else: + names_str = ", ".join(map(repr, sorted(public_names))) + self.writeline(f"context.exported_vars.update(({names_str}))") + + # -- Statement Visitors + + def visit_Template( + self, node: nodes.Template, frame: t.Optional[Frame] = None + ) -> None: + assert frame is None, "no root frame allowed" + eval_ctx = EvalContext(self.environment, self.name) + + from .runtime import async_exported + from .runtime import exported + + if self.environment.is_async: + exported_names = sorted(exported + async_exported) + else: + exported_names = sorted(exported) + + self.writeline("from jinja2.runtime import " + ", ".join(exported_names)) + + # if we want a deferred initialization we cannot move the + # environment into a local name + envenv = "" if self.defer_init else ", environment=environment" + + # do we have an extends tag at all? If not, we can save some + # overhead by just not processing any inheritance code. + have_extends = node.find(nodes.Extends) is not None + + # find all blocks + for block in node.find_all(nodes.Block): + if block.name in self.blocks: + self.fail(f"block {block.name!r} defined twice", block.lineno) + self.blocks[block.name] = block + + # find all imports and import them + for import_ in node.find_all(nodes.ImportedName): + if import_.importname not in self.import_aliases: + imp = import_.importname + self.import_aliases[imp] = alias = self.temporary_identifier() + if "." in imp: + module, obj = imp.rsplit(".", 1) + self.writeline(f"from {module} import {obj} as {alias}") + else: + self.writeline(f"import {imp} as {alias}") + + # add the load name + self.writeline(f"name = {self.name!r}") + + # generate the root render function. + self.writeline( + f"{self.func('root')}(context, missing=missing{envenv}):", extra=1 + ) + self.indent() + self.write_commons() + + # process the root + frame = Frame(eval_ctx) + if "self" in find_undeclared(node.body, ("self",)): + ref = frame.symbols.declare_parameter("self") + self.writeline(f"{ref} = TemplateReference(context)") + frame.symbols.analyze_node(node) + frame.toplevel = frame.rootlevel = True + frame.require_output_check = have_extends and not self.has_known_extends + if have_extends: + self.writeline("parent_template = None") + self.enter_frame(frame) + self.pull_dependencies(node.body) + self.blockvisit(node.body, frame) + self.leave_frame(frame, with_python_scope=True) + self.outdent() + + # make sure that the parent root is called. + if have_extends: + if not self.has_known_extends: + self.indent() + self.writeline("if parent_template is not None:") + self.indent() + if not self.environment.is_async: + self.writeline("yield from parent_template.root_render_func(context)") + else: + self.writeline("agen = parent_template.root_render_func(context)") + self.writeline("try:") + self.indent() + self.writeline("async for event in agen:") + self.indent() + self.writeline("yield event") + self.outdent() + self.outdent() + self.writeline("finally: await agen.aclose()") + self.outdent(1 + (not self.has_known_extends)) + + # at this point we now have the blocks collected and can visit them too. + for name, block in self.blocks.items(): + self.writeline( + f"{self.func('block_' + name)}(context, missing=missing{envenv}):", + block, + 1, + ) + self.indent() + self.write_commons() + # It's important that we do not make this frame a child of the + # toplevel template. This would cause a variety of + # interesting issues with identifier tracking. + block_frame = Frame(eval_ctx) + block_frame.block_frame = True + undeclared = find_undeclared(block.body, ("self", "super")) + if "self" in undeclared: + ref = block_frame.symbols.declare_parameter("self") + self.writeline(f"{ref} = TemplateReference(context)") + if "super" in undeclared: + ref = block_frame.symbols.declare_parameter("super") + self.writeline(f"{ref} = context.super({name!r}, block_{name})") + block_frame.symbols.analyze_node(block) + block_frame.block = name + self.writeline("_block_vars = {}") + self.enter_frame(block_frame) + self.pull_dependencies(block.body) + self.blockvisit(block.body, block_frame) + self.leave_frame(block_frame, with_python_scope=True) + self.outdent() + + blocks_kv_str = ", ".join(f"{x!r}: block_{x}" for x in self.blocks) + self.writeline(f"blocks = {{{blocks_kv_str}}}", extra=1) + debug_kv_str = "&".join(f"{k}={v}" for k, v in self.debug_info) + self.writeline(f"debug_info = {debug_kv_str!r}") + + def visit_Block(self, node: nodes.Block, frame: Frame) -> None: + """Call a block and register it for the template.""" + level = 0 + if frame.toplevel: + # if we know that we are a child template, there is no need to + # check if we are one + if self.has_known_extends: + return + if self.extends_so_far > 0: + self.writeline("if parent_template is None:") + self.indent() + level += 1 + + if node.scoped: + context = self.derive_context(frame) + else: + context = self.get_context_ref() + + if node.required: + self.writeline(f"if len(context.blocks[{node.name!r}]) <= 1:", node) + self.indent() + self.writeline( + f'raise TemplateRuntimeError("Required block {node.name!r} not found")', + node, + ) + self.outdent() + + if not self.environment.is_async and frame.buffer is None: + self.writeline( + f"yield from context.blocks[{node.name!r}][0]({context})", node + ) + else: + self.writeline(f"gen = context.blocks[{node.name!r}][0]({context})") + self.writeline("try:") + self.indent() + self.writeline( + f"{self.choose_async()}for event in gen:", + node, + ) + self.indent() + self.simple_write("event", frame) + self.outdent() + self.outdent() + self.writeline( + f"finally: {self.choose_async('await gen.aclose()', 'gen.close()')}" + ) + + self.outdent(level) + + def visit_Extends(self, node: nodes.Extends, frame: Frame) -> None: + """Calls the extender.""" + if not frame.toplevel: + self.fail("cannot use extend from a non top-level scope", node.lineno) + + # if the number of extends statements in general is zero so + # far, we don't have to add a check if something extended + # the template before this one. + if self.extends_so_far > 0: + # if we have a known extends we just add a template runtime + # error into the generated code. We could catch that at compile + # time too, but i welcome it not to confuse users by throwing the + # same error at different times just "because we can". + if not self.has_known_extends: + self.writeline("if parent_template is not None:") + self.indent() + self.writeline('raise TemplateRuntimeError("extended multiple times")') + + # if we have a known extends already we don't need that code here + # as we know that the template execution will end here. + if self.has_known_extends: + raise CompilerExit() + else: + self.outdent() + + self.writeline("parent_template = environment.get_template(", node) + self.visit(node.template, frame) + self.write(f", {self.name!r})") + self.writeline("for name, parent_block in parent_template.blocks.items():") + self.indent() + self.writeline("context.blocks.setdefault(name, []).append(parent_block)") + self.outdent() + + # if this extends statement was in the root level we can take + # advantage of that information and simplify the generated code + # in the top level from this point onwards + if frame.rootlevel: + self.has_known_extends = True + + # and now we have one more + self.extends_so_far += 1 + + def visit_Include(self, node: nodes.Include, frame: Frame) -> None: + """Handles includes.""" + if node.ignore_missing: + self.writeline("try:") + self.indent() + + func_name = "get_or_select_template" + if isinstance(node.template, nodes.Const): + if isinstance(node.template.value, str): + func_name = "get_template" + elif isinstance(node.template.value, (tuple, list)): + func_name = "select_template" + elif isinstance(node.template, (nodes.Tuple, nodes.List)): + func_name = "select_template" + + self.writeline(f"template = environment.{func_name}(", node) + self.visit(node.template, frame) + self.write(f", {self.name!r})") + if node.ignore_missing: + self.outdent() + self.writeline("except TemplateNotFound:") + self.indent() + self.writeline("pass") + self.outdent() + self.writeline("else:") + self.indent() + + def loop_body() -> None: + self.indent() + self.simple_write("event", frame) + self.outdent() + + if node.with_context: + self.writeline( + f"gen = template.root_render_func(" + "template.new_context(context.get_all(), True," + f" {self.dump_local_context(frame)}))" + ) + self.writeline("try:") + self.indent() + self.writeline(f"{self.choose_async()}for event in gen:") + loop_body() + self.outdent() + self.writeline( + f"finally: {self.choose_async('await gen.aclose()', 'gen.close()')}" + ) + elif self.environment.is_async: + self.writeline( + "for event in (await template._get_default_module_async())" + "._body_stream:" + ) + loop_body() + else: + self.writeline("yield from template._get_default_module()._body_stream") + + if node.ignore_missing: + self.outdent() + + def _import_common( + self, node: t.Union[nodes.Import, nodes.FromImport], frame: Frame + ) -> None: + self.write(f"{self.choose_async('await ')}environment.get_template(") + self.visit(node.template, frame) + self.write(f", {self.name!r}).") + + if node.with_context: + f_name = f"make_module{self.choose_async('_async')}" + self.write( + f"{f_name}(context.get_all(), True, {self.dump_local_context(frame)})" + ) + else: + self.write(f"_get_default_module{self.choose_async('_async')}(context)") + + def visit_Import(self, node: nodes.Import, frame: Frame) -> None: + """Visit regular imports.""" + self.writeline(f"{frame.symbols.ref(node.target)} = ", node) + if frame.toplevel: + self.write(f"context.vars[{node.target!r}] = ") + + self._import_common(node, frame) + + if frame.toplevel and not node.target.startswith("_"): + self.writeline(f"context.exported_vars.discard({node.target!r})") + + def visit_FromImport(self, node: nodes.FromImport, frame: Frame) -> None: + """Visit named imports.""" + self.newline(node) + self.write("included_template = ") + self._import_common(node, frame) + var_names = [] + discarded_names = [] + for name in node.names: + if isinstance(name, tuple): + name, alias = name + else: + alias = name + self.writeline( + f"{frame.symbols.ref(alias)} =" + f" getattr(included_template, {name!r}, missing)" + ) + self.writeline(f"if {frame.symbols.ref(alias)} is missing:") + self.indent() + # The position will contain the template name, and will be formatted + # into a string that will be compiled into an f-string. Curly braces + # in the name must be replaced with escapes so that they will not be + # executed as part of the f-string. + position = self.position(node).replace("{", "{{").replace("}", "}}") + message = ( + "the template {included_template.__name__!r}" + f" (imported on {position})" + f" does not export the requested name {name!r}" + ) + self.writeline( + f"{frame.symbols.ref(alias)} = undefined(f{message!r}, name={name!r})" + ) + self.outdent() + if frame.toplevel: + var_names.append(alias) + if not alias.startswith("_"): + discarded_names.append(alias) + + if var_names: + if len(var_names) == 1: + name = var_names[0] + self.writeline(f"context.vars[{name!r}] = {frame.symbols.ref(name)}") + else: + names_kv = ", ".join( + f"{name!r}: {frame.symbols.ref(name)}" for name in var_names + ) + self.writeline(f"context.vars.update({{{names_kv}}})") + if discarded_names: + if len(discarded_names) == 1: + self.writeline(f"context.exported_vars.discard({discarded_names[0]!r})") + else: + names_str = ", ".join(map(repr, discarded_names)) + self.writeline( + f"context.exported_vars.difference_update(({names_str}))" + ) + + def visit_For(self, node: nodes.For, frame: Frame) -> None: + loop_frame = frame.inner() + loop_frame.loop_frame = True + test_frame = frame.inner() + else_frame = frame.inner() + + # try to figure out if we have an extended loop. An extended loop + # is necessary if the loop is in recursive mode if the special loop + # variable is accessed in the body if the body is a scoped block. + extended_loop = ( + node.recursive + or "loop" + in find_undeclared(node.iter_child_nodes(only=("body",)), ("loop",)) + or any(block.scoped for block in node.find_all(nodes.Block)) + ) + + loop_ref = None + if extended_loop: + loop_ref = loop_frame.symbols.declare_parameter("loop") + + loop_frame.symbols.analyze_node(node, for_branch="body") + if node.else_: + else_frame.symbols.analyze_node(node, for_branch="else") + + if node.test: + loop_filter_func = self.temporary_identifier() + test_frame.symbols.analyze_node(node, for_branch="test") + self.writeline(f"{self.func(loop_filter_func)}(fiter):", node.test) + self.indent() + self.enter_frame(test_frame) + self.writeline(self.choose_async("async for ", "for ")) + self.visit(node.target, loop_frame) + self.write(" in ") + self.write(self.choose_async("auto_aiter(fiter)", "fiter")) + self.write(":") + self.indent() + self.writeline("if ", node.test) + self.visit(node.test, test_frame) + self.write(":") + self.indent() + self.writeline("yield ") + self.visit(node.target, loop_frame) + self.outdent(3) + self.leave_frame(test_frame, with_python_scope=True) + + # if we don't have an recursive loop we have to find the shadowed + # variables at that point. Because loops can be nested but the loop + # variable is a special one we have to enforce aliasing for it. + if node.recursive: + self.writeline( + f"{self.func('loop')}(reciter, loop_render_func, depth=0):", node + ) + self.indent() + self.buffer(loop_frame) + + # Use the same buffer for the else frame + else_frame.buffer = loop_frame.buffer + + # make sure the loop variable is a special one and raise a template + # assertion error if a loop tries to write to loop + if extended_loop: + self.writeline(f"{loop_ref} = missing") + + for name in node.find_all(nodes.Name): + if name.ctx == "store" and name.name == "loop": + self.fail( + "Can't assign to special loop variable in for-loop target", + name.lineno, + ) + + if node.else_: + iteration_indicator = self.temporary_identifier() + self.writeline(f"{iteration_indicator} = 1") + + self.writeline(self.choose_async("async for ", "for "), node) + self.visit(node.target, loop_frame) + if extended_loop: + self.write(f", {loop_ref} in {self.choose_async('Async')}LoopContext(") + else: + self.write(" in ") + + if node.test: + self.write(f"{loop_filter_func}(") + if node.recursive: + self.write("reciter") + else: + if self.environment.is_async and not extended_loop: + self.write("auto_aiter(") + self.visit(node.iter, frame) + if self.environment.is_async and not extended_loop: + self.write(")") + if node.test: + self.write(")") + + if node.recursive: + self.write(", undefined, loop_render_func, depth):") + else: + self.write(", undefined):" if extended_loop else ":") + + self.indent() + self.enter_frame(loop_frame) + + self.writeline("_loop_vars = {}") + self.blockvisit(node.body, loop_frame) + if node.else_: + self.writeline(f"{iteration_indicator} = 0") + self.outdent() + self.leave_frame( + loop_frame, with_python_scope=node.recursive and not node.else_ + ) + + if node.else_: + self.writeline(f"if {iteration_indicator}:") + self.indent() + self.enter_frame(else_frame) + self.blockvisit(node.else_, else_frame) + self.leave_frame(else_frame) + self.outdent() + + # if the node was recursive we have to return the buffer contents + # and start the iteration code + if node.recursive: + self.return_buffer_contents(loop_frame) + self.outdent() + self.start_write(frame, node) + self.write(f"{self.choose_async('await ')}loop(") + if self.environment.is_async: + self.write("auto_aiter(") + self.visit(node.iter, frame) + if self.environment.is_async: + self.write(")") + self.write(", loop)") + self.end_write(frame) + + # at the end of the iteration, clear any assignments made in the + # loop from the top level + if self._assign_stack: + self._assign_stack[-1].difference_update(loop_frame.symbols.stores) + + def visit_If(self, node: nodes.If, frame: Frame) -> None: + if_frame = frame.soft() + self.writeline("if ", node) + self.visit(node.test, if_frame) + self.write(":") + self.indent() + self.blockvisit(node.body, if_frame) + self.outdent() + for elif_ in node.elif_: + self.writeline("elif ", elif_) + self.visit(elif_.test, if_frame) + self.write(":") + self.indent() + self.blockvisit(elif_.body, if_frame) + self.outdent() + if node.else_: + self.writeline("else:") + self.indent() + self.blockvisit(node.else_, if_frame) + self.outdent() + + def visit_Macro(self, node: nodes.Macro, frame: Frame) -> None: + macro_frame, macro_ref = self.macro_body(node, frame) + self.newline() + if frame.toplevel: + if not node.name.startswith("_"): + self.write(f"context.exported_vars.add({node.name!r})") + self.writeline(f"context.vars[{node.name!r}] = ") + self.write(f"{frame.symbols.ref(node.name)} = ") + self.macro_def(macro_ref, macro_frame) + + def visit_CallBlock(self, node: nodes.CallBlock, frame: Frame) -> None: + call_frame, macro_ref = self.macro_body(node, frame) + self.writeline("caller = ") + self.macro_def(macro_ref, call_frame) + self.start_write(frame, node) + self.visit_Call(node.call, frame, forward_caller=True) + self.end_write(frame) + + def visit_FilterBlock(self, node: nodes.FilterBlock, frame: Frame) -> None: + filter_frame = frame.inner() + filter_frame.symbols.analyze_node(node) + self.enter_frame(filter_frame) + self.buffer(filter_frame) + self.blockvisit(node.body, filter_frame) + self.start_write(frame, node) + self.visit_Filter(node.filter, filter_frame) + self.end_write(frame) + self.leave_frame(filter_frame) + + def visit_With(self, node: nodes.With, frame: Frame) -> None: + with_frame = frame.inner() + with_frame.symbols.analyze_node(node) + self.enter_frame(with_frame) + for target, expr in zip(node.targets, node.values): + self.newline() + self.visit(target, with_frame) + self.write(" = ") + self.visit(expr, frame) + self.blockvisit(node.body, with_frame) + self.leave_frame(with_frame) + + def visit_ExprStmt(self, node: nodes.ExprStmt, frame: Frame) -> None: + self.newline(node) + self.visit(node.node, frame) + + class _FinalizeInfo(t.NamedTuple): + const: t.Optional[t.Callable[..., str]] + src: t.Optional[str] + + @staticmethod + def _default_finalize(value: t.Any) -> t.Any: + """The default finalize function if the environment isn't + configured with one. Or, if the environment has one, this is + called on that function's output for constants. + """ + return str(value) + + _finalize: t.Optional[_FinalizeInfo] = None + + def _make_finalize(self) -> _FinalizeInfo: + """Build the finalize function to be used on constants and at + runtime. Cached so it's only created once for all output nodes. + + Returns a ``namedtuple`` with the following attributes: + + ``const`` + A function to finalize constant data at compile time. + + ``src`` + Source code to output around nodes to be evaluated at + runtime. + """ + if self._finalize is not None: + return self._finalize + + finalize: t.Optional[t.Callable[..., t.Any]] + finalize = default = self._default_finalize + src = None + + if self.environment.finalize: + src = "environment.finalize(" + env_finalize = self.environment.finalize + pass_arg = { + _PassArg.context: "context", + _PassArg.eval_context: "context.eval_ctx", + _PassArg.environment: "environment", + }.get( + _PassArg.from_obj(env_finalize) # type: ignore + ) + finalize = None + + if pass_arg is None: + + def finalize(value: t.Any) -> t.Any: # noqa: F811 + return default(env_finalize(value)) + + else: + src = f"{src}{pass_arg}, " + + if pass_arg == "environment": + + def finalize(value: t.Any) -> t.Any: # noqa: F811 + return default(env_finalize(self.environment, value)) + + self._finalize = self._FinalizeInfo(finalize, src) + return self._finalize + + def _output_const_repr(self, group: t.Iterable[t.Any]) -> str: + """Given a group of constant values converted from ``Output`` + child nodes, produce a string to write to the template module + source. + """ + return repr(concat(group)) + + def _output_child_to_const( + self, node: nodes.Expr, frame: Frame, finalize: _FinalizeInfo + ) -> str: + """Try to optimize a child of an ``Output`` node by trying to + convert it to constant, finalized data at compile time. + + If :exc:`Impossible` is raised, the node is not constant and + will be evaluated at runtime. Any other exception will also be + evaluated at runtime for easier debugging. + """ + const = node.as_const(frame.eval_ctx) + + if frame.eval_ctx.autoescape: + const = escape(const) + + # Template data doesn't go through finalize. + if isinstance(node, nodes.TemplateData): + return str(const) + + return finalize.const(const) # type: ignore + + def _output_child_pre( + self, node: nodes.Expr, frame: Frame, finalize: _FinalizeInfo + ) -> None: + """Output extra source code before visiting a child of an + ``Output`` node. + """ + if frame.eval_ctx.volatile: + self.write("(escape if context.eval_ctx.autoescape else str)(") + elif frame.eval_ctx.autoescape: + self.write("escape(") + else: + self.write("str(") + + if finalize.src is not None: + self.write(finalize.src) + + def _output_child_post( + self, node: nodes.Expr, frame: Frame, finalize: _FinalizeInfo + ) -> None: + """Output extra source code after visiting a child of an + ``Output`` node. + """ + self.write(")") + + if finalize.src is not None: + self.write(")") + + def visit_Output(self, node: nodes.Output, frame: Frame) -> None: + # If an extends is active, don't render outside a block. + if frame.require_output_check: + # A top-level extends is known to exist at compile time. + if self.has_known_extends: + return + + self.writeline("if parent_template is None:") + self.indent() + + finalize = self._make_finalize() + body: t.List[t.Union[t.List[t.Any], nodes.Expr]] = [] + + # Evaluate constants at compile time if possible. Each item in + # body will be either a list of static data or a node to be + # evaluated at runtime. + for child in node.nodes: + try: + if not ( + # If the finalize function requires runtime context, + # constants can't be evaluated at compile time. + finalize.const + # Unless it's basic template data that won't be + # finalized anyway. + or isinstance(child, nodes.TemplateData) + ): + raise nodes.Impossible() + + const = self._output_child_to_const(child, frame, finalize) + except (nodes.Impossible, Exception): + # The node was not constant and needs to be evaluated at + # runtime. Or another error was raised, which is easier + # to debug at runtime. + body.append(child) + continue + + if body and isinstance(body[-1], list): + body[-1].append(const) + else: + body.append([const]) + + if frame.buffer is not None: + if len(body) == 1: + self.writeline(f"{frame.buffer}.append(") + else: + self.writeline(f"{frame.buffer}.extend((") + + self.indent() + + for item in body: + if isinstance(item, list): + # A group of constant data to join and output. + val = self._output_const_repr(item) + + if frame.buffer is None: + self.writeline("yield " + val) + else: + self.writeline(val + ",") + else: + if frame.buffer is None: + self.writeline("yield ", item) + else: + self.newline(item) + + # A node to be evaluated at runtime. + self._output_child_pre(item, frame, finalize) + self.visit(item, frame) + self._output_child_post(item, frame, finalize) + + if frame.buffer is not None: + self.write(",") + + if frame.buffer is not None: + self.outdent() + self.writeline(")" if len(body) == 1 else "))") + + if frame.require_output_check: + self.outdent() + + def visit_Assign(self, node: nodes.Assign, frame: Frame) -> None: + self.push_assign_tracking() + + # ``a.b`` is allowed for assignment, and is parsed as an NSRef. However, + # it is only valid if it references a Namespace object. Emit a check for + # that for each ref here, before assignment code is emitted. This can't + # be done in visit_NSRef as the ref could be in the middle of a tuple. + seen_refs: t.Set[str] = set() + + for nsref in node.find_all(nodes.NSRef): + if nsref.name in seen_refs: + # Only emit the check for each reference once, in case the same + # ref is used multiple times in a tuple, `ns.a, ns.b = c, d`. + continue + + seen_refs.add(nsref.name) + ref = frame.symbols.ref(nsref.name) + self.writeline(f"if not isinstance({ref}, Namespace):") + self.indent() + self.writeline( + "raise TemplateRuntimeError" + '("cannot assign attribute on non-namespace object")' + ) + self.outdent() + + self.newline(node) + self.visit(node.target, frame) + self.write(" = ") + self.visit(node.node, frame) + self.pop_assign_tracking(frame) + + def visit_AssignBlock(self, node: nodes.AssignBlock, frame: Frame) -> None: + self.push_assign_tracking() + block_frame = frame.inner() + # This is a special case. Since a set block always captures we + # will disable output checks. This way one can use set blocks + # toplevel even in extended templates. + block_frame.require_output_check = False + block_frame.symbols.analyze_node(node) + self.enter_frame(block_frame) + self.buffer(block_frame) + self.blockvisit(node.body, block_frame) + self.newline(node) + self.visit(node.target, frame) + self.write(" = (Markup if context.eval_ctx.autoescape else identity)(") + if node.filter is not None: + self.visit_Filter(node.filter, block_frame) + else: + self.write(f"concat({block_frame.buffer})") + self.write(")") + self.pop_assign_tracking(frame) + self.leave_frame(block_frame) + + # -- Expression Visitors + + def visit_Name(self, node: nodes.Name, frame: Frame) -> None: + if node.ctx == "store" and ( + frame.toplevel or frame.loop_frame or frame.block_frame + ): + if self._assign_stack: + self._assign_stack[-1].add(node.name) + ref = frame.symbols.ref(node.name) + + # If we are looking up a variable we might have to deal with the + # case where it's undefined. We can skip that case if the load + # instruction indicates a parameter which are always defined. + if node.ctx == "load": + load = frame.symbols.find_load(ref) + if not ( + load is not None + and load[0] == VAR_LOAD_PARAMETER + and not self.parameter_is_undeclared(ref) + ): + self.write( + f"(undefined(name={node.name!r}) if {ref} is missing else {ref})" + ) + return + + self.write(ref) + + def visit_NSRef(self, node: nodes.NSRef, frame: Frame) -> None: + # NSRef is a dotted assignment target a.b=c, but uses a[b]=c internally. + # visit_Assign emits code to validate that each ref is to a Namespace + # object only. That can't be emitted here as the ref could be in the + # middle of a tuple assignment. + ref = frame.symbols.ref(node.name) + self.writeline(f"{ref}[{node.attr!r}]") + + def visit_Const(self, node: nodes.Const, frame: Frame) -> None: + val = node.as_const(frame.eval_ctx) + if isinstance(val, float): + self.write(str(val)) + else: + self.write(repr(val)) + + def visit_TemplateData(self, node: nodes.TemplateData, frame: Frame) -> None: + try: + self.write(repr(node.as_const(frame.eval_ctx))) + except nodes.Impossible: + self.write( + f"(Markup if context.eval_ctx.autoescape else identity)({node.data!r})" + ) + + def visit_Tuple(self, node: nodes.Tuple, frame: Frame) -> None: + self.write("(") + idx = -1 + for idx, item in enumerate(node.items): + if idx: + self.write(", ") + self.visit(item, frame) + self.write(",)" if idx == 0 else ")") + + def visit_List(self, node: nodes.List, frame: Frame) -> None: + self.write("[") + for idx, item in enumerate(node.items): + if idx: + self.write(", ") + self.visit(item, frame) + self.write("]") + + def visit_Dict(self, node: nodes.Dict, frame: Frame) -> None: + self.write("{") + for idx, item in enumerate(node.items): + if idx: + self.write(", ") + self.visit(item.key, frame) + self.write(": ") + self.visit(item.value, frame) + self.write("}") + + visit_Add = _make_binop("+") + visit_Sub = _make_binop("-") + visit_Mul = _make_binop("*") + visit_Div = _make_binop("/") + visit_FloorDiv = _make_binop("//") + visit_Pow = _make_binop("**") + visit_Mod = _make_binop("%") + visit_And = _make_binop("and") + visit_Or = _make_binop("or") + visit_Pos = _make_unop("+") + visit_Neg = _make_unop("-") + visit_Not = _make_unop("not ") + + @optimizeconst + def visit_Concat(self, node: nodes.Concat, frame: Frame) -> None: + if frame.eval_ctx.volatile: + func_name = "(markup_join if context.eval_ctx.volatile else str_join)" + elif frame.eval_ctx.autoescape: + func_name = "markup_join" + else: + func_name = "str_join" + self.write(f"{func_name}((") + for arg in node.nodes: + self.visit(arg, frame) + self.write(", ") + self.write("))") + + @optimizeconst + def visit_Compare(self, node: nodes.Compare, frame: Frame) -> None: + self.write("(") + self.visit(node.expr, frame) + for op in node.ops: + self.visit(op, frame) + self.write(")") + + def visit_Operand(self, node: nodes.Operand, frame: Frame) -> None: + self.write(f" {operators[node.op]} ") + self.visit(node.expr, frame) + + @optimizeconst + def visit_Getattr(self, node: nodes.Getattr, frame: Frame) -> None: + if self.environment.is_async: + self.write("(await auto_await(") + + self.write("environment.getattr(") + self.visit(node.node, frame) + self.write(f", {node.attr!r})") + + if self.environment.is_async: + self.write("))") + + @optimizeconst + def visit_Getitem(self, node: nodes.Getitem, frame: Frame) -> None: + # slices bypass the environment getitem method. + if isinstance(node.arg, nodes.Slice): + self.visit(node.node, frame) + self.write("[") + self.visit(node.arg, frame) + self.write("]") + else: + if self.environment.is_async: + self.write("(await auto_await(") + + self.write("environment.getitem(") + self.visit(node.node, frame) + self.write(", ") + self.visit(node.arg, frame) + self.write(")") + + if self.environment.is_async: + self.write("))") + + def visit_Slice(self, node: nodes.Slice, frame: Frame) -> None: + if node.start is not None: + self.visit(node.start, frame) + self.write(":") + if node.stop is not None: + self.visit(node.stop, frame) + if node.step is not None: + self.write(":") + self.visit(node.step, frame) + + @contextmanager + def _filter_test_common( + self, node: t.Union[nodes.Filter, nodes.Test], frame: Frame, is_filter: bool + ) -> t.Iterator[None]: + if self.environment.is_async: + self.write("(await auto_await(") + + if is_filter: + self.write(f"{self.filters[node.name]}(") + func = self.environment.filters.get(node.name) + else: + self.write(f"{self.tests[node.name]}(") + func = self.environment.tests.get(node.name) + + # When inside an If or CondExpr frame, allow the filter to be + # undefined at compile time and only raise an error if it's + # actually called at runtime. See pull_dependencies. + if func is None and not frame.soft_frame: + type_name = "filter" if is_filter else "test" + self.fail(f"No {type_name} named {node.name!r}.", node.lineno) + + pass_arg = { + _PassArg.context: "context", + _PassArg.eval_context: "context.eval_ctx", + _PassArg.environment: "environment", + }.get( + _PassArg.from_obj(func) # type: ignore + ) + + if pass_arg is not None: + self.write(f"{pass_arg}, ") + + # Back to the visitor function to handle visiting the target of + # the filter or test. + yield + + self.signature(node, frame) + self.write(")") + + if self.environment.is_async: + self.write("))") + + @optimizeconst + def visit_Filter(self, node: nodes.Filter, frame: Frame) -> None: + with self._filter_test_common(node, frame, True): + # if the filter node is None we are inside a filter block + # and want to write to the current buffer + if node.node is not None: + self.visit(node.node, frame) + elif frame.eval_ctx.volatile: + self.write( + f"(Markup(concat({frame.buffer}))" + f" if context.eval_ctx.autoescape else concat({frame.buffer}))" + ) + elif frame.eval_ctx.autoescape: + self.write(f"Markup(concat({frame.buffer}))") + else: + self.write(f"concat({frame.buffer})") + + @optimizeconst + def visit_Test(self, node: nodes.Test, frame: Frame) -> None: + with self._filter_test_common(node, frame, False): + self.visit(node.node, frame) + + @optimizeconst + def visit_CondExpr(self, node: nodes.CondExpr, frame: Frame) -> None: + frame = frame.soft() + + def write_expr2() -> None: + if node.expr2 is not None: + self.visit(node.expr2, frame) + return + + self.write( + f'cond_expr_undefined("the inline if-expression on' + f" {self.position(node)} evaluated to false and no else" + f' section was defined.")' + ) + + self.write("(") + self.visit(node.expr1, frame) + self.write(" if ") + self.visit(node.test, frame) + self.write(" else ") + write_expr2() + self.write(")") + + @optimizeconst + def visit_Call( + self, node: nodes.Call, frame: Frame, forward_caller: bool = False + ) -> None: + if self.environment.is_async: + self.write("(await auto_await(") + if self.environment.sandboxed: + self.write("environment.call(context, ") + else: + self.write("context.call(") + self.visit(node.node, frame) + extra_kwargs = {"caller": "caller"} if forward_caller else None + loop_kwargs = {"_loop_vars": "_loop_vars"} if frame.loop_frame else {} + block_kwargs = {"_block_vars": "_block_vars"} if frame.block_frame else {} + if extra_kwargs: + extra_kwargs.update(loop_kwargs, **block_kwargs) + elif loop_kwargs or block_kwargs: + extra_kwargs = dict(loop_kwargs, **block_kwargs) + self.signature(node, frame, extra_kwargs) + self.write(")") + if self.environment.is_async: + self.write("))") + + def visit_Keyword(self, node: nodes.Keyword, frame: Frame) -> None: + self.write(node.key + "=") + self.visit(node.value, frame) + + # -- Unused nodes for extensions + + def visit_MarkSafe(self, node: nodes.MarkSafe, frame: Frame) -> None: + self.write("Markup(") + self.visit(node.expr, frame) + self.write(")") + + def visit_MarkSafeIfAutoescape( + self, node: nodes.MarkSafeIfAutoescape, frame: Frame + ) -> None: + self.write("(Markup if context.eval_ctx.autoescape else identity)(") + self.visit(node.expr, frame) + self.write(")") + + def visit_EnvironmentAttribute( + self, node: nodes.EnvironmentAttribute, frame: Frame + ) -> None: + self.write("environment." + node.name) + + def visit_ExtensionAttribute( + self, node: nodes.ExtensionAttribute, frame: Frame + ) -> None: + self.write(f"environment.extensions[{node.identifier!r}].{node.name}") + + def visit_ImportedName(self, node: nodes.ImportedName, frame: Frame) -> None: + self.write(self.import_aliases[node.importname]) + + def visit_InternalName(self, node: nodes.InternalName, frame: Frame) -> None: + self.write(node.name) + + def visit_ContextReference( + self, node: nodes.ContextReference, frame: Frame + ) -> None: + self.write("context") + + def visit_DerivedContextReference( + self, node: nodes.DerivedContextReference, frame: Frame + ) -> None: + self.write(self.derive_context(frame)) + + def visit_Continue(self, node: nodes.Continue, frame: Frame) -> None: + self.writeline("continue", node) + + def visit_Break(self, node: nodes.Break, frame: Frame) -> None: + self.writeline("break", node) + + def visit_Scope(self, node: nodes.Scope, frame: Frame) -> None: + scope_frame = frame.inner() + scope_frame.symbols.analyze_node(node) + self.enter_frame(scope_frame) + self.blockvisit(node.body, scope_frame) + self.leave_frame(scope_frame) + + def visit_OverlayScope(self, node: nodes.OverlayScope, frame: Frame) -> None: + ctx = self.temporary_identifier() + self.writeline(f"{ctx} = {self.derive_context(frame)}") + self.writeline(f"{ctx}.vars = ") + self.visit(node.context, frame) + self.push_context_reference(ctx) + + scope_frame = frame.inner(isolated=True) + scope_frame.symbols.analyze_node(node) + self.enter_frame(scope_frame) + self.blockvisit(node.body, scope_frame) + self.leave_frame(scope_frame) + self.pop_context_reference() + + def visit_EvalContextModifier( + self, node: nodes.EvalContextModifier, frame: Frame + ) -> None: + for keyword in node.options: + self.writeline(f"context.eval_ctx.{keyword.key} = ") + self.visit(keyword.value, frame) + try: + val = keyword.value.as_const(frame.eval_ctx) + except nodes.Impossible: + frame.eval_ctx.volatile = True + else: + setattr(frame.eval_ctx, keyword.key, val) + + def visit_ScopedEvalContextModifier( + self, node: nodes.ScopedEvalContextModifier, frame: Frame + ) -> None: + old_ctx_name = self.temporary_identifier() + saved_ctx = frame.eval_ctx.save() + self.writeline(f"{old_ctx_name} = context.eval_ctx.save()") + self.visit_EvalContextModifier(node, frame) + for child in node.body: + self.visit(child, frame) + frame.eval_ctx.revert(saved_ctx) + self.writeline(f"context.eval_ctx.revert({old_ctx_name})") diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/constants.py b/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..41a1c23b0a7fe134b1f662545876eb65b31b071e --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/constants.py @@ -0,0 +1,20 @@ +#: list of lorem ipsum words used by the lipsum() helper function +LOREM_IPSUM_WORDS = """\ +a ac accumsan ad adipiscing aenean aliquam aliquet amet ante aptent arcu at +auctor augue bibendum blandit class commodo condimentum congue consectetuer +consequat conubia convallis cras cubilia cum curabitur curae cursus dapibus +diam dictum dictumst dignissim dis dolor donec dui duis egestas eget eleifend +elementum elit enim erat eros est et etiam eu euismod facilisi facilisis fames +faucibus felis fermentum feugiat fringilla fusce gravida habitant habitasse hac +hendrerit hymenaeos iaculis id imperdiet in inceptos integer interdum ipsum +justo lacinia lacus laoreet lectus leo libero ligula litora lobortis lorem +luctus maecenas magna magnis malesuada massa mattis mauris metus mi molestie +mollis montes morbi mus nam nascetur natoque nec neque netus nibh nisi nisl non +nonummy nostra nulla nullam nunc odio orci ornare parturient pede pellentesque +penatibus per pharetra phasellus placerat platea porta porttitor posuere +potenti praesent pretium primis proin pulvinar purus quam quis quisque rhoncus +ridiculus risus rutrum sagittis sapien scelerisque sed sem semper senectus sit +sociis sociosqu sodales sollicitudin suscipit suspendisse taciti tellus tempor +tempus tincidunt torquent tortor tristique turpis ullamcorper ultrices +ultricies urna ut varius vehicula vel velit venenatis vestibulum vitae vivamus +viverra volutpat vulputate""" diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/debug.py b/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/debug.py new file mode 100644 index 0000000000000000000000000000000000000000..eeeeee78b620f5d0745133b4629647973cd7af87 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/debug.py @@ -0,0 +1,191 @@ +import sys +import typing as t +from types import CodeType +from types import TracebackType + +from .exceptions import TemplateSyntaxError +from .utils import internal_code +from .utils import missing + +if t.TYPE_CHECKING: + from .runtime import Context + + +def rewrite_traceback_stack(source: t.Optional[str] = None) -> BaseException: + """Rewrite the current exception to replace any tracebacks from + within compiled template code with tracebacks that look like they + came from the template source. + + This must be called within an ``except`` block. + + :param source: For ``TemplateSyntaxError``, the original source if + known. + :return: The original exception with the rewritten traceback. + """ + _, exc_value, tb = sys.exc_info() + exc_value = t.cast(BaseException, exc_value) + tb = t.cast(TracebackType, tb) + + if isinstance(exc_value, TemplateSyntaxError) and not exc_value.translated: + exc_value.translated = True + exc_value.source = source + # Remove the old traceback, otherwise the frames from the + # compiler still show up. + exc_value.with_traceback(None) + # Outside of runtime, so the frame isn't executing template + # code, but it still needs to point at the template. + tb = fake_traceback( + exc_value, None, exc_value.filename or "", exc_value.lineno + ) + else: + # Skip the frame for the render function. + tb = tb.tb_next + + stack = [] + + # Build the stack of traceback object, replacing any in template + # code with the source file and line information. + while tb is not None: + # Skip frames decorated with @internalcode. These are internal + # calls that aren't useful in template debugging output. + if tb.tb_frame.f_code in internal_code: + tb = tb.tb_next + continue + + template = tb.tb_frame.f_globals.get("__jinja_template__") + + if template is not None: + lineno = template.get_corresponding_lineno(tb.tb_lineno) + fake_tb = fake_traceback(exc_value, tb, template.filename, lineno) + stack.append(fake_tb) + else: + stack.append(tb) + + tb = tb.tb_next + + tb_next = None + + # Assign tb_next in reverse to avoid circular references. + for tb in reversed(stack): + tb.tb_next = tb_next + tb_next = tb + + return exc_value.with_traceback(tb_next) + + +def fake_traceback( # type: ignore + exc_value: BaseException, tb: t.Optional[TracebackType], filename: str, lineno: int +) -> TracebackType: + """Produce a new traceback object that looks like it came from the + template source instead of the compiled code. The filename, line + number, and location name will point to the template, and the local + variables will be the current template context. + + :param exc_value: The original exception to be re-raised to create + the new traceback. + :param tb: The original traceback to get the local variables and + code info from. + :param filename: The template filename. + :param lineno: The line number in the template source. + """ + if tb is not None: + # Replace the real locals with the context that would be + # available at that point in the template. + locals = get_template_locals(tb.tb_frame.f_locals) + locals.pop("__jinja_exception__", None) + else: + locals = {} + + globals = { + "__name__": filename, + "__file__": filename, + "__jinja_exception__": exc_value, + } + # Raise an exception at the correct line number. + code: CodeType = compile( + "\n" * (lineno - 1) + "raise __jinja_exception__", filename, "exec" + ) + + # Build a new code object that points to the template file and + # replaces the location with a block name. + location = "template" + + if tb is not None: + function = tb.tb_frame.f_code.co_name + + if function == "root": + location = "top-level template code" + elif function.startswith("block_"): + location = f"block {function[6:]!r}" + + if sys.version_info >= (3, 8): + code = code.replace(co_name=location) + else: + code = CodeType( + code.co_argcount, + code.co_kwonlyargcount, + code.co_nlocals, + code.co_stacksize, + code.co_flags, + code.co_code, + code.co_consts, + code.co_names, + code.co_varnames, + code.co_filename, + location, + code.co_firstlineno, + code.co_lnotab, + code.co_freevars, + code.co_cellvars, + ) + + # Execute the new code, which is guaranteed to raise, and return + # the new traceback without this frame. + try: + exec(code, globals, locals) + except BaseException: + return sys.exc_info()[2].tb_next # type: ignore + + +def get_template_locals(real_locals: t.Mapping[str, t.Any]) -> t.Dict[str, t.Any]: + """Based on the runtime locals, get the context that would be + available at that point in the template. + """ + # Start with the current template context. + ctx: t.Optional[Context] = real_locals.get("context") + + if ctx is not None: + data: t.Dict[str, t.Any] = ctx.get_all().copy() + else: + data = {} + + # Might be in a derived context that only sets local variables + # rather than pushing a context. Local variables follow the scheme + # l_depth_name. Find the highest-depth local that has a value for + # each name. + local_overrides: t.Dict[str, t.Tuple[int, t.Any]] = {} + + for name, value in real_locals.items(): + if not name.startswith("l_") or value is missing: + # Not a template variable, or no longer relevant. + continue + + try: + _, depth_str, name = name.split("_", 2) + depth = int(depth_str) + except ValueError: + continue + + cur_depth = local_overrides.get(name, (-1,))[0] + + if cur_depth < depth: + local_overrides[name] = (depth, value) + + # Modify the context with any derived context. + for name, (_, value) in local_overrides.items(): + if value is missing: + data.pop(name, None) + else: + data[name] = value + + return data diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/defaults.py b/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/defaults.py new file mode 100644 index 0000000000000000000000000000000000000000..638cad3d2d8907330bde56e2b76c9b185c523b45 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/defaults.py @@ -0,0 +1,48 @@ +import typing as t + +from .filters import FILTERS as DEFAULT_FILTERS # noqa: F401 +from .tests import TESTS as DEFAULT_TESTS # noqa: F401 +from .utils import Cycler +from .utils import generate_lorem_ipsum +from .utils import Joiner +from .utils import Namespace + +if t.TYPE_CHECKING: + import typing_extensions as te + +# defaults for the parser / lexer +BLOCK_START_STRING = "{%" +BLOCK_END_STRING = "%}" +VARIABLE_START_STRING = "{{" +VARIABLE_END_STRING = "}}" +COMMENT_START_STRING = "{#" +COMMENT_END_STRING = "#}" +LINE_STATEMENT_PREFIX: t.Optional[str] = None +LINE_COMMENT_PREFIX: t.Optional[str] = None +TRIM_BLOCKS = False +LSTRIP_BLOCKS = False +NEWLINE_SEQUENCE: "te.Literal['\\n', '\\r\\n', '\\r']" = "\n" +KEEP_TRAILING_NEWLINE = False + +# default filters, tests and namespace + +DEFAULT_NAMESPACE = { + "range": range, + "dict": dict, + "lipsum": generate_lorem_ipsum, + "cycler": Cycler, + "joiner": Joiner, + "namespace": Namespace, +} + +# default policies +DEFAULT_POLICIES: t.Dict[str, t.Any] = { + "compiler.ascii_str": True, + "urlize.rel": "noopener", + "urlize.target": None, + "urlize.extra_schemes": None, + "truncate.leeway": 5, + "json.dumps_function": None, + "json.dumps_kwargs": {"sort_keys": True}, + "ext.i18n.trimmed": False, +} diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/environment.py b/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/environment.py new file mode 100644 index 0000000000000000000000000000000000000000..0fc6e5be87ab8273f6056ddfede07e1be28f1495 --- /dev/null +++ b/URSA/.venv_ursa/lib/python3.12/site-packages/jinja2/environment.py @@ -0,0 +1,1672 @@ +"""Classes for managing templates and their runtime and compile time +options. +""" + +import os +import typing +import typing as t +import weakref +from collections import ChainMap +from functools import lru_cache +from functools import partial +from functools import reduce +from types import CodeType + +from markupsafe import Markup + +from . import nodes +from .compiler import CodeGenerator +from .compiler import generate +from .defaults import BLOCK_END_STRING +from .defaults import BLOCK_START_STRING +from .defaults import COMMENT_END_STRING +from .defaults import COMMENT_START_STRING +from .defaults import DEFAULT_FILTERS # type: ignore[attr-defined] +from .defaults import DEFAULT_NAMESPACE +from .defaults import DEFAULT_POLICIES +from .defaults import DEFAULT_TESTS # type: ignore[attr-defined] +from .defaults import KEEP_TRAILING_NEWLINE +from .defaults import LINE_COMMENT_PREFIX +from .defaults import LINE_STATEMENT_PREFIX +from .defaults import LSTRIP_BLOCKS +from .defaults import NEWLINE_SEQUENCE +from .defaults import TRIM_BLOCKS +from .defaults import VARIABLE_END_STRING +from .defaults import VARIABLE_START_STRING +from .exceptions import TemplateNotFound +from .exceptions import TemplateRuntimeError +from .exceptions import TemplatesNotFound +from .exceptions import TemplateSyntaxError +from .exceptions import UndefinedError +from .lexer import get_lexer +from .lexer import Lexer +from .lexer import TokenStream +from .nodes import EvalContext +from .parser import Parser +from .runtime import Context +from .runtime import new_context +from .runtime import Undefined +from .utils import _PassArg +from .utils import concat +from .utils import consume +from .utils import import_string +from .utils import internalcode +from .utils import LRUCache +from .utils import missing + +if t.TYPE_CHECKING: + import typing_extensions as te + + from .bccache import BytecodeCache + from .ext import Extension + from .loaders import BaseLoader + +_env_bound = t.TypeVar("_env_bound", bound="Environment") + + +# for direct template usage we have up to ten living environments +@lru_cache(maxsize=10) +def get_spontaneous_environment(cls: t.Type[_env_bound], *args: t.Any) -> _env_bound: + """Return a new spontaneous environment. A spontaneous environment + is used for templates created directly rather than through an + existing environment. + + :param cls: Environment class to create. + :param args: Positional arguments passed to environment. + """ + env = cls(*args) + env.shared = True + return env + + +def create_cache( + size: int, +) -> t.Optional[t.MutableMapping[t.Tuple["weakref.ref[t.Any]", str], "Template"]]: + """Return the cache class for the given size.""" + if size == 0: + return None + + if size < 0: + return {} + + return LRUCache(size) # type: ignore + + +def copy_cache( + cache: t.Optional[t.MutableMapping[t.Any, t.Any]], +) -> t.Optional[t.MutableMapping[t.Tuple["weakref.ref[t.Any]", str], "Template"]]: + """Create an empty copy of the given cache.""" + if cache is None: + return None + + if type(cache) is dict: # noqa E721 + return {} + + return LRUCache(cache.capacity) # type: ignore + + +def load_extensions( + environment: "Environment", + extensions: t.Sequence[t.Union[str, t.Type["Extension"]]], +) -> t.Dict[str, "Extension"]: + """Load the extensions from the list and bind it to the environment. + Returns a dict of instantiated extensions. + """ + result = {} + + for extension in extensions: + if isinstance(extension, str): + extension = t.cast(t.Type["Extension"], import_string(extension)) + + result[extension.identifier] = extension(environment) + + return result + + +def _environment_config_check(environment: _env_bound) -> _env_bound: + """Perform a sanity check on the environment.""" + assert issubclass( + environment.undefined, Undefined + ), "'undefined' must be a subclass of 'jinja2.Undefined'." + assert ( + environment.block_start_string + != environment.variable_start_string + != environment.comment_start_string + ), "block, variable and comment start strings must be different." + assert environment.newline_sequence in { + "\r", + "\r\n", + "\n", + }, "'newline_sequence' must be one of '\\n', '\\r\\n', or '\\r'." + return environment + + +class Environment: + r"""The core component of Jinja is the `Environment`. It contains + important shared variables like configuration, filters, tests, + globals and others. Instances of this class may be modified if + they are not shared and if no template was loaded so far. + Modifications on environments after the first template was loaded + will lead to surprising effects and undefined behavior. + + Here are the possible initialization parameters: + + `block_start_string` + The string marking the beginning of a block. Defaults to ``'{%'``. + + `block_end_string` + The string marking the end of a block. Defaults to ``'%}'``. + + `variable_start_string` + The string marking the beginning of a print statement. + Defaults to ``'{{'``. + + `variable_end_string` + The string marking the end of a print statement. Defaults to + ``'}}'``. + + `comment_start_string` + The string marking the beginning of a comment. Defaults to ``'{#'``. + + `comment_end_string` + The string marking the end of a comment. Defaults to ``'#}'``. + + `line_statement_prefix` + If given and a string, this will be used as prefix for line based + statements. See also :ref:`line-statements`. + + `line_comment_prefix` + If given and a string, this will be used as prefix for line based + comments. See also :ref:`line-statements`. + + .. versionadded:: 2.2 + + `trim_blocks` + If this is set to ``True`` the first newline after a block is + removed (block, not variable tag!). Defaults to `False`. + + `lstrip_blocks` + If this is set to ``True`` leading spaces and tabs are stripped + from the start of a line to a block. Defaults to `False`. + + `newline_sequence` + The sequence that starts a newline. Must be one of ``'\r'``, + ``'\n'`` or ``'\r\n'``. The default is ``'\n'`` which is a + useful default for Linux and OS X systems as well as web + applications. + + `keep_trailing_newline` + Preserve the trailing newline when rendering templates. + The default is ``False``, which causes a single newline, + if present, to be stripped from the end of the template. + + .. versionadded:: 2.7 + + `extensions` + List of Jinja extensions to use. This can either be import paths + as strings or extension classes. For more information have a + look at :ref:`the extensions documentation `. + + `optimized` + should the optimizer be enabled? Default is ``True``. + + `undefined` + :class:`Undefined` or a subclass of it that is used to represent + undefined values in the template. + + `finalize` + A callable that can be used to process the result of a variable + expression before it is output. For example one can convert + ``None`` implicitly into an empty string here. + + `autoescape` + If set to ``True`` the XML/HTML autoescaping feature is enabled by + default. For more details about autoescaping see + :class:`~markupsafe.Markup`. As of Jinja 2.4 this can also + be a callable that is passed the template name and has to + return ``True`` or ``False`` depending on autoescape should be + enabled by default. + + .. versionchanged:: 2.4 + `autoescape` can now be a function + + `loader` + The template loader for this environment. + + `cache_size` + The size of the cache. Per default this is ``400`` which means + that if more than 400 templates are loaded the loader will clean + out the least recently used template. If the cache size is set to + ``0`` templates are recompiled all the time, if the cache size is + ``-1`` the cache will not be cleaned. + + .. versionchanged:: 2.8 + The cache size was increased to 400 from a low 50. + + `auto_reload` + Some loaders load templates from locations where the template + sources may change (ie: file system or database). If + ``auto_reload`` is set to ``True`` (default) every time a template is + requested the loader checks if the source changed and if yes, it + will reload the template. For higher performance it's possible to + disable that. + + `bytecode_cache` + If set to a bytecode cache object, this object will provide a + cache for the internal Jinja bytecode so that templates don't + have to be parsed if they were not changed. + + See :ref:`bytecode-cache` for more information. + + `enable_async` + If set to true this enables async template execution which + allows using async functions and generators. + """ + + #: if this environment is sandboxed. Modifying this variable won't make + #: the environment sandboxed though. For a real sandboxed environment + #: have a look at jinja2.sandbox. This flag alone controls the code + #: generation by the compiler. + sandboxed = False + + #: True if the environment is just an overlay + overlayed = False + + #: the environment this environment is linked to if it is an overlay + linked_to: t.Optional["Environment"] = None + + #: shared environments have this set to `True`. A shared environment + #: must not be modified + shared = False + + #: the class that is used for code generation. See + #: :class:`~jinja2.compiler.CodeGenerator` for more information. + code_generator_class: t.Type["CodeGenerator"] = CodeGenerator + + concat = "".join + + #: the context class that is used for templates. See + #: :class:`~jinja2.runtime.Context` for more information. + context_class: t.Type[Context] = Context + + template_class: t.Type["Template"] + + def __init__( + self, + block_start_string: str = BLOCK_START_STRING, + block_end_string: str = BLOCK_END_STRING, + variable_start_string: str = VARIABLE_START_STRING, + variable_end_string: str = VARIABLE_END_STRING, + comment_start_string: str = COMMENT_START_STRING, + comment_end_string: str = COMMENT_END_STRING, + line_statement_prefix: t.Optional[str] = LINE_STATEMENT_PREFIX, + line_comment_prefix: t.Optional[str] = LINE_COMMENT_PREFIX, + trim_blocks: bool = TRIM_BLOCKS, + lstrip_blocks: bool = LSTRIP_BLOCKS, + newline_sequence: "te.Literal['\\n', '\\r\\n', '\\r']" = NEWLINE_SEQUENCE, + keep_trailing_newline: bool = KEEP_TRAILING_NEWLINE, + extensions: t.Sequence[t.Union[str, t.Type["Extension"]]] = (), + optimized: bool = True, + undefined: t.Type[Undefined] = Undefined, + finalize: t.Optional[t.Callable[..., t.Any]] = None, + autoescape: t.Union[bool, t.Callable[[t.Optional[str]], bool]] = False, + loader: t.Optional["BaseLoader"] = None, + cache_size: int = 400, + auto_reload: bool = True, + bytecode_cache: t.Optional["BytecodeCache"] = None, + enable_async: bool = False, + ): + # !!Important notice!! + # The constructor accepts quite a few arguments that should be + # passed by keyword rather than position. However it's important to + # not change the order of arguments because it's used at least + # internally in those cases: + # - spontaneous environments (i18n extension and Template) + # - unittests + # If parameter changes are required only add parameters at the end + # and don't change the arguments (or the defaults!) of the arguments + # existing already. + + # lexer / parser information + self.block_start_string = block_start_string + self.block_end_string = block_end_string + self.variable_start_string = variable_start_string + self.variable_end_string = variable_end_string + self.comment_start_string = comment_start_string + self.comment_end_string = comment_end_string + self.line_statement_prefix = line_statement_prefix + self.line_comment_prefix = line_comment_prefix + self.trim_blocks = trim_blocks + self.lstrip_blocks = lstrip_blocks + self.newline_sequence = newline_sequence + self.keep_trailing_newline = keep_trailing_newline + + # runtime information + self.undefined: t.Type[Undefined] = undefined + self.optimized = optimized + self.finalize = finalize + self.autoescape = autoescape + + # defaults + self.filters = DEFAULT_FILTERS.copy() + self.tests = DEFAULT_TESTS.copy() + self.globals = DEFAULT_NAMESPACE.copy() + + # set the loader provided + self.loader = loader + self.cache = create_cache(cache_size) + self.bytecode_cache = bytecode_cache + self.auto_reload = auto_reload + + # configurable policies + self.policies = DEFAULT_POLICIES.copy() + + # load extensions + self.extensions = load_extensions(self, extensions) + + self.is_async = enable_async + _environment_config_check(self) + + def add_extension(self, extension: t.Union[str, t.Type["Extension"]]) -> None: + """Adds an extension after the environment was created. + + .. versionadded:: 2.5 + """ + self.extensions.update(load_extensions(self, [extension])) + + def extend(self, **attributes: t.Any) -> None: + """Add the items to the instance of the environment if they do not exist + yet. This is used by :ref:`extensions ` to register + callbacks and configuration values without breaking inheritance. + """ + for key, value in attributes.items(): + if not hasattr(self, key): + setattr(self, key, value) + + def overlay( + self, + block_start_string: str = missing, + block_end_string: str = missing, + variable_start_string: str = missing, + variable_end_string: str = missing, + comment_start_string: str = missing, + comment_end_string: str = missing, + line_statement_prefix: t.Optional[str] = missing, + line_comment_prefix: t.Optional[str] = missing, + trim_blocks: bool = missing, + lstrip_blocks: bool = missing, + newline_sequence: "te.Literal['\\n', '\\r\\n', '\\r']" = missing, + keep_trailing_newline: bool = missing, + extensions: t.Sequence[t.Union[str, t.Type["Extension"]]] = missing, + optimized: bool = missing, + undefined: t.Type[Undefined] = missing, + finalize: t.Optional[t.Callable[..., t.Any]] = missing, + autoescape: t.Union[bool, t.Callable[[t.Optional[str]], bool]] = missing, + loader: t.Optional["BaseLoader"] = missing, + cache_size: int = missing, + auto_reload: bool = missing, + bytecode_cache: t.Optional["BytecodeCache"] = missing, + enable_async: bool = missing, + ) -> "te.Self": + """Create a new overlay environment that shares all the data with the + current environment except for cache and the overridden attributes. + Extensions cannot be removed for an overlayed environment. An overlayed + environment automatically gets all the extensions of the environment it + is linked to plus optional extra extensions. + + Creating overlays should happen after the initial environment was set + up completely. Not all attributes are truly linked, some are just + copied over so modifications on the original environment may not shine + through. + + .. versionchanged:: 3.1.5 + ``enable_async`` is applied correctly. + + .. versionchanged:: 3.1.2 + Added the ``newline_sequence``, ``keep_trailing_newline``, + and ``enable_async`` parameters to match ``__init__``. + """ + args = dict(locals()) + del args["self"], args["cache_size"], args["extensions"], args["enable_async"] + + rv = object.__new__(self.__class__) + rv.__dict__.update(self.__dict__) + rv.overlayed = True + rv.linked_to = self + + for key, value in args.items(): + if value is not missing: + setattr(rv, key, value) + + if cache_size is not missing: + rv.cache = create_cache(cache_size) + else: + rv.cache = copy_cache(self.cache) + + rv.extensions = {} + for key, value in self.extensions.items(): + rv.extensions[key] = value.bind(rv) + if extensions is not missing: + rv.extensions.update(load_extensions(rv, extensions)) + + if enable_async is not missing: + rv.is_async = enable_async + + return _environment_config_check(rv) + + @property + def lexer(self) -> Lexer: + """The lexer for this environment.""" + return get_lexer(self) + + def iter_extensions(self) -> t.Iterator["Extension"]: + """Iterates over the extensions by priority.""" + return iter(sorted(self.extensions.values(), key=lambda x: x.priority)) + + def getitem( + self, obj: t.Any, argument: t.Union[str, t.Any] + ) -> t.Union[t.Any, Undefined]: + """Get an item or attribute of an object but prefer the item.""" + try: + return obj[argument] + except (AttributeError, TypeError, LookupError): + if isinstance(argument, str): + try: + attr = str(argument) + except Exception: + pass + else: + try: + return getattr(obj, attr) + except AttributeError: + pass + return self.undefined(obj=obj, name=argument) + + def getattr(self, obj: t.Any, attribute: str) -> t.Any: + """Get an item or attribute of an object but prefer the attribute. + Unlike :meth:`getitem` the attribute *must* be a string. + """ + try: + return getattr(obj, attribute) + except AttributeError: + pass + try: + return obj[attribute] + except (TypeError, LookupError, AttributeError): + return self.undefined(obj=obj, name=attribute) + + def _filter_test_common( + self, + name: t.Union[str, Undefined], + value: t.Any, + args: t.Optional[t.Sequence[t.Any]], + kwargs: t.Optional[t.Mapping[str, t.Any]], + context: t.Optional[Context], + eval_ctx: t.Optional[EvalContext], + is_filter: bool, + ) -> t.Any: + if is_filter: + env_map = self.filters + type_name = "filter" + else: + env_map = self.tests + type_name = "test" + + func = env_map.get(name) # type: ignore + + if func is None: + msg = f"No {type_name} named {name!r}." + + if isinstance(name, Undefined): + try: + name._fail_with_undefined_error() + except Exception as e: + msg = f"{msg} ({e}; did you forget to quote the callable name?)" + + raise TemplateRuntimeError(msg) + + args = [value, *(args if args is not None else ())] + kwargs = kwargs if kwargs is not None else {} + pass_arg = _PassArg.from_obj(func) + + if pass_arg is _PassArg.context: + if context is None: + raise TemplateRuntimeError( + f"Attempted to invoke a context {type_name} without context." + ) + + args.insert(0, context) + elif pass_arg is _PassArg.eval_context: + if eval_ctx is None: + if context is not None: + eval_ctx = context.eval_ctx + else: + eval_ctx = EvalContext(self) + + args.insert(0, eval_ctx) + elif pass_arg is _PassArg.environment: + args.insert(0, self) + + return func(*args, **kwargs) + + def call_filter( + self, + name: str, + value: t.Any, + args: t.Optional[t.Sequence[t.Any]] = None, + kwargs: t.Optional[t.Mapping[str, t.Any]] = None, + context: t.Optional[Context] = None, + eval_ctx: t.Optional[EvalContext] = None, + ) -> t.Any: + """Invoke a filter on a value the same way the compiler does. + + This might return a coroutine if the filter is running from an + environment in async mode and the filter supports async + execution. It's your responsibility to await this if needed. + + .. versionadded:: 2.7 + """ + return self._filter_test_common( + name, value, args, kwargs, context, eval_ctx, True + ) + + def call_test( + self, + name: str, + value: t.Any, + args: t.Optional[t.Sequence[t.Any]] = None, + kwargs: t.Optional[t.Mapping[str, t.Any]] = None, + context: t.Optional[Context] = None, + eval_ctx: t.Optional[EvalContext] = None, + ) -> t.Any: + """Invoke a test on a value the same way the compiler does. + + This might return a coroutine if the test is running from an + environment in async mode and the test supports async execution. + It's your responsibility to await this if needed. + + .. versionchanged:: 3.0 + Tests support ``@pass_context``, etc. decorators. Added + the ``context`` and ``eval_ctx`` parameters. + + .. versionadded:: 2.7 + """ + return self._filter_test_common( + name, value, args, kwargs, context, eval_ctx, False + ) + + @internalcode + def parse( + self, + source: str, + name: t.Optional[str] = None, + filename: t.Optional[str] = None, + ) -> nodes.Template: + """Parse the sourcecode and return the abstract syntax tree. This + tree of nodes is used by the compiler to convert the template into + executable source- or bytecode. This is useful for debugging or to + extract information from templates. + + If you are :ref:`developing Jinja extensions ` + this gives you a good overview of the node tree generated. + """ + try: + return self._parse(source, name, filename) + except TemplateSyntaxError: + self.handle_exception(source=source) + + def _parse( + self, source: str, name: t.Optional[str], filename: t.Optional[str] + ) -> nodes.Template: + """Internal parsing function used by `parse` and `compile`.""" + return Parser(self, source, name, filename).parse() + + def lex( + self, + source: str, + name: t.Optional[str] = None, + filename: t.Optional[str] = None, + ) -> t.Iterator[t.Tuple[int, str, str]]: + """Lex the given sourcecode and return a generator that yields + tokens as tuples in the form ``(lineno, token_type, value)``. + This can be useful for :ref:`extension development ` + and debugging templates. + + This does not perform preprocessing. If you want the preprocessing + of the extensions to be applied you have to filter source through + the :meth:`preprocess` method. + """ + source = str(source) + try: + return self.lexer.tokeniter(source, name, filename) + except TemplateSyntaxError: + self.handle_exception(source=source) + + def preprocess( + self, + source: str, + name: t.Optional[str] = None, + filename: t.Optional[str] = None, + ) -> str: + """Preprocesses the source with all extensions. This is automatically + called for all parsing and compiling methods but *not* for :meth:`lex` + because there you usually only want the actual source tokenized. + """ + return reduce( + lambda s, e: e.preprocess(s, name, filename), + self.iter_extensions(), + str(source), + ) + + def _tokenize( + self, + source: str, + name: t.Optional[str], + filename: t.Optional[str] = None, + state: t.Optional[str] = None, + ) -> TokenStream: + """Called by the parser to do the preprocessing and filtering + for all the extensions. Returns a :class:`~jinja2.lexer.TokenStream`. + """ + source = self.preprocess(source, name, filename) + stream = self.lexer.tokenize(source, name, filename, state) + + for ext in self.iter_extensions(): + stream = ext.filter_stream(stream) # type: ignore + + if not isinstance(stream, TokenStream): + stream = TokenStream(stream, name, filename) + + return stream + + def _generate( + self, + source: nodes.Template, + name: t.Optional[str], + filename: t.Optional[str], + defer_init: bool = False, + ) -> str: + """Internal hook that can be overridden to hook a different generate + method in. + + .. versionadded:: 2.5 + """ + return generate( # type: ignore + source, + self, + name, + filename, + defer_init=defer_init, + optimized=self.optimized, + ) + + def _compile(self, source: str, filename: str) -> CodeType: + """Internal hook that can be overridden to hook a different compile + method in. + + .. versionadded:: 2.5 + """ + return compile(source, filename, "exec") + + @typing.overload + def compile( + self, + source: t.Union[str, nodes.Template], + name: t.Optional[str] = None, + filename: t.Optional[str] = None, + raw: "te.Literal[False]" = False, + defer_init: bool = False, + ) -> CodeType: ... + + @typing.overload + def compile( + self, + source: t.Union[str, nodes.Template], + name: t.Optional[str] = None, + filename: t.Optional[str] = None, + raw: "te.Literal[True]" = ..., + defer_init: bool = False, + ) -> str: ... + + @internalcode + def compile( + self, + source: t.Union[str, nodes.Template], + name: t.Optional[str] = None, + filename: t.Optional[str] = None, + raw: bool = False, + defer_init: bool = False, + ) -> t.Union[str, CodeType]: + """Compile a node or template source code. The `name` parameter is + the load name of the template after it was joined using + :meth:`join_path` if necessary, not the filename on the file system. + the `filename` parameter is the estimated filename of the template on + the file system. If the template came from a database or memory this + can be omitted. + + The return value of this method is a python code object. If the `raw` + parameter is `True` the return value will be a string with python + code equivalent to the bytecode returned otherwise. This method is + mainly used internally. + + `defer_init` is use internally to aid the module code generator. This + causes the generated code to be able to import without the global + environment variable to be set. + + .. versionadded:: 2.4 + `defer_init` parameter added. + """ + source_hint = None + try: + if isinstance(source, str): + source_hint = source + source = self._parse(source, name, filename) + source = self._generate(source, name, filename, defer_init=defer_init) + if raw: + return source + if filename is None: + filename = "