Spaces:

kushalExplores
/

flatmate_rl

Sleeping

File size: 28,013 Bytes

53dbcc1

import inspect
import itertools
import os
import shlex
import sys
from collections.abc import Callable, Iterable, Sequence
from contextlib import suppress
from functools import partial
from typing import TYPE_CHECKING, Any, NamedTuple, get_origin

from cyclopts._convert import _bool
from cyclopts.annotations import resolve_optional
from cyclopts.argument import Argument, ArgumentCollection
from cyclopts.exceptions import (
    ArgumentOrderError,
    CoercionError,
    CombinedShortOptionError,
    ConsumeMultipleError,
    CycloptsError,
    MissingArgumentError,
    RequiresEqualsError,
    UnknownOptionError,
    ValidationError,
)
from cyclopts.field_info import POSITIONAL_ONLY, POSITIONAL_OR_KEYWORD
from cyclopts.token import Token
from cyclopts.utils import UNSET, is_option_like

if sys.version_info < (3, 11):  # pragma: no cover
    pass
else:  # pragma: no cover
    pass


if TYPE_CHECKING:
    from cyclopts.group import Group

CliToken = partial(Token, source="cli")


class _KeywordMatch(NamedTuple):
    """Represents a matched CLI token with its corresponding argument."""

    matched_token: str
    """The actual CLI token that was matched (e.g., '-o', '--option')."""

    argument: Argument
    """The matched Argument object."""

    keys: tuple[str, ...]
    """Leftover keys for nested arguments."""

    implicit_value: Any
    """Implicit value if this is a flag, otherwise UNSET."""


def normalize_tokens(tokens: None | str | Iterable[str]) -> list[str]:
    if tokens is None:
        tokens = sys.argv[1:]  # Remove the executable
    elif isinstance(tokens, str):
        tokens = shlex.split(tokens)
    else:
        tokens = list(tokens)
    return tokens


def _common_root_keys(argument_collection) -> tuple[str, ...]:
    if not argument_collection:
        return ()
    common = argument_collection[0].keys
    for argument in argument_collection[1:]:
        if not argument.keys:
            return ()
        for i, (common_key, argument_key) in enumerate(zip(common, argument.keys, strict=False)):
            if common_key != argument_key:
                if i == 0:
                    return ()

                common = argument.keys[:i]
                break
        common = common[: len(argument.keys)]
    return common


def _parse_kw_and_flags(
    argument_collection: ArgumentCollection,
    tokens: Sequence[str],
    *,
    end_of_options_delimiter: str = "--",
    stop_at_first_unknown: bool = False,
) -> tuple[list[str], int | None]:
    """Extract keyword arguments and flags from the token stream.

    Returns
    -------
    unused_tokens: list[str]
        Tokens not consumed by any keyword or flag.
    contiguous_positional_count: int | None
        Number of leading contiguous non-option tokens before the first gap
        caused by keyword extraction. ``None`` if all non-option tokens are
        contiguous (i.e. no keywords were interleaved among positional tokens).

        For example, given ``a b c --bar 8 --baz 10 d``, the unused tokens are
        ``['a', 'b', 'c', 'd']`` with original indices ``[0, 1, 2, 6]``.
        The gap between indices 2 and 6 yields ``contiguous_positional_count=3``.
        This is used by ``_parse_pos`` to prevent positional-only list parameters
        from consuming tokens that appeared after keyword arguments.
    """
    unused_tokens, positional_only_tokens = [], []
    unused_token_original_indices: list[int] = []
    skip_next_iterations = 0
    if end_of_options_delimiter:
        try:
            delimiter_index = tokens.index(end_of_options_delimiter)
        except ValueError:
            pass  # end_of_options_delimiter not in token stream
        else:
            positional_only_tokens = tokens[delimiter_index:]
            tokens = tokens[:delimiter_index]
    for i, token in enumerate(tokens):
        # If the previous argument was a keyword, then this is its value
        if skip_next_iterations > 0:
            skip_next_iterations -= 1
            continue

        if not is_option_like(token, allow_numbers=True):
            if stop_at_first_unknown:
                # Stop parsing and return all remaining tokens as unused
                unused_tokens.extend(tokens[i:])
                unused_token_original_indices.extend(range(i, len(tokens)))
                break
            unused_tokens.append(token)
            unused_token_original_indices.append(i)
            continue

        cli_values: list[str] = []
        consume_count = 0

        # startswith("-") is redundant, but it's cheap safety.
        allow_combined_flags = token.startswith("-") and not token.startswith("--")

        # Try splitting on "=" for long options or short options that match exactly
        if "=" in token:
            cli_option, cli_value = token.split("=", 1)
            # Try to match the part before "="
            try:
                argument_collection.match(cli_option)
                # Matched! Use the split
                cli_values.append(cli_value)
                consume_count -= 1
                allow_combined_flags = False
            except ValueError:
                # No match - might be GNU-style like "-pfile=value"
                # Don't split, treat whole token as the option
                cli_option = token
        else:
            cli_option = token

        matches: list[_KeywordMatch] = []
        attached_value: str | None = None  # Track value attached to a GNU-style combined option
        try:
            matches.append(_KeywordMatch(cli_option, *argument_collection.match(cli_option)))
        except ValueError:
            # Length has to be greater than 2 (hyphen + character) to be exploded.
            # Also exclude numeric values (e.g., -10, -3.14) from combined flag parsing.
            if allow_combined_flags and len(token) > 2 and is_option_like(token, allow_numbers=False):
                # GNU-style combined short options: process left-to-right
                # Once we hit an option that takes a value, the rest is the value
                chars = cli_option.lstrip("-")
                position = 0

                while position < len(chars):
                    char = chars[position]
                    test_flag = f"-{char}"

                    try:
                        arg, keys, implicit = argument_collection.match(test_flag)

                        if implicit is not UNSET or arg.parameter.count:
                            # This is a flag (boolean or counting) - consume just this character
                            matches.append(_KeywordMatch(test_flag, arg, keys, implicit))
                            position += 1
                        else:
                            # This option takes a value - rest of the string is the value
                            remainder = chars[position + 1 :]
                            matches.append(_KeywordMatch(test_flag, arg, keys, implicit))
                            if remainder:
                                # Value is attached: -uroot or -fvuroot
                                # Store it separately, will be added to cli_values when processing this match
                                attached_value = remainder
                                consume_count -= 1
                            # Stop processing further characters
                            break

                    except ValueError:
                        # Unknown flag
                        if stop_at_first_unknown:
                            unused_tokens.extend(tokens[i:])
                            return unused_tokens, None
                        unused_tokens.append(test_flag)
                        unused_token_original_indices.append(i)
                        position += 1

                if not matches:
                    # No valid matches found at all
                    continue
            else:
                if stop_at_first_unknown:
                    # Unknown option, stop parsing and return all remaining tokens
                    unused_tokens.extend(tokens[i:])
                    return unused_tokens, None
                unused_tokens.append(token)
                unused_token_original_indices.append(i)
                continue
        for match_index, match in enumerate(matches):
            # For GNU-style combined options, add the attached value only when processing
            # the last match (the value-taking option), not for preceding flags
            if attached_value is not None and match_index == len(matches) - 1:
                cli_values.append(attached_value)

            if match.argument.parameter.count:
                match.argument.append(CliToken(keyword=match.matched_token, implicit_value=1))
            elif match.implicit_value is not UNSET:
                # A flag was parsed
                if cli_values:
                    try:
                        coerced_value = _bool(cli_values[-1])
                    except CoercionError as e:
                        if e.token is None:
                            e.token = CliToken(keyword=match.matched_token)
                        if e.argument is None:
                            e.argument = match.argument
                        raise
                    if coerced_value:  # --positive-flag=true or --negative-flag=true or --empty-flag=true
                        match.argument.append(
                            CliToken(keyword=match.matched_token, implicit_value=match.implicit_value)
                        )
                    else:  # --positive-flag=false or --negative-flag=false or --empty-flag=false
                        if isinstance(match.implicit_value, bool):
                            match.argument.append(
                                CliToken(keyword=match.matched_token, implicit_value=not match.implicit_value)
                            )
                        else:
                            # A negative for a non-bool field doesn't really make sense;
                            # e.g. --empty-list=False
                            # So we'll just silently skip it, as it may make bash scripting easier.
                            pass
                else:
                    match.argument.append(CliToken(keyword=match.matched_token, implicit_value=match.implicit_value))
            else:
                # This is a value-taking option (not a flag or counting parameter)
                # Error only if we're trying to combine multiple value-taking options without values
                # (e.g., -fu where both -f and -u take values would be invalid)
                # But -fu where -f is a flag and -u takes a value is valid (GNU-style)
                if len(matches) > 1:
                    # Count how many value-taking options we have
                    value_taking_count = sum(
                        1 for m in matches if m.implicit_value is UNSET and not m.argument.parameter.count
                    )
                    if value_taking_count > 1:
                        raise CombinedShortOptionError(
                            msg=f"Cannot combine multiple value-taking options in token {cli_option}"
                        )
                tokens_per_element, consume_all = match.argument.token_count(match.keys)

                if match.argument.parameter.requires_equals and match.matched_token.startswith("--") and not cli_values:
                    raise RequiresEqualsError(
                        argument=match.argument,
                        keyword=match.matched_token,
                    )

                # Consume the appropriate number of tokens
                # cm_bounds is either None or (min, max) — guaranteed by _consume_multiple_converter
                cm_bounds = match.argument.parameter.consume_multiple
                assert cm_bounds is None or isinstance(cm_bounds, tuple)
                cm_min, cm_max = cm_bounds if cm_bounds is not None else (0, None)
                with suppress(IndexError):
                    if consume_all and cm_bounds is not None:
                        for j in itertools.count():
                            token = tokens[i + 1 + j]
                            if not match.argument.parameter.allow_leading_hyphen and is_option_like(token):
                                break
                            cli_values.append(token)
                            skip_next_iterations += 1
                    else:
                        consume_count += tokens_per_element
                        for j in range(consume_count):
                            if len(cli_values) == 1 and (
                                match.argument._should_attempt_json_dict(cli_values)
                                or match.argument._should_attempt_json_list(cli_values, match.keys)
                            ):
                                tokens_per_element = 1
                                # Assume that the contents are json and that we shouldn't
                                # consume any additional tokens.
                                break

                            token = tokens[i + 1 + j]
                            if not match.argument.parameter.allow_leading_hyphen and is_option_like(token):
                                raise MissingArgumentError(
                                    argument=match.argument,
                                    tokens_so_far=cli_values,
                                    keyword=match.matched_token,
                                )
                            cli_values.append(token)
                            skip_next_iterations += 1

                if not cli_values:
                    # No values were consumed after the keyword
                    if consume_all and cm_bounds is not None:
                        if cm_min > 0:
                            # Minimum count not met — treat as missing argument
                            raise ConsumeMultipleError(
                                argument=match.argument,
                                tokens_so_far=cli_values,
                                keyword=match.matched_token,
                                min_required=cm_min,
                                max_allowed=cm_max,
                                actual_count=0,
                            )
                        # Allow empty iterables (e.g., --urls with no values behaves like --empty-urls)
                        hint = resolve_optional(match.argument.hint)
                        empty_container = (get_origin(hint) or hint)()
                        match.argument.append(
                            CliToken(keyword=match.matched_token, implicit_value=empty_container, keys=match.keys)
                        )
                    else:
                        # Non-iterables or consume_multiple=False require at least one value
                        raise MissingArgumentError(
                            argument=match.argument, tokens_so_far=cli_values, keyword=match.matched_token
                        )
                elif len(cli_values) % tokens_per_element:
                    # For multi-token elements (e.g., tuples), ensure we have complete sets
                    raise MissingArgumentError(
                        argument=match.argument, tokens_so_far=cli_values, keyword=match.matched_token
                    )
                else:
                    # Check min/max count for consume_multiple
                    if cm_bounds is not None:
                        n_elements = len(cli_values) // max(1, tokens_per_element)
                        if n_elements < cm_min:
                            raise ConsumeMultipleError(
                                argument=match.argument,
                                tokens_so_far=cli_values,
                                keyword=match.matched_token,
                                min_required=cm_min,
                                max_allowed=cm_max,
                                actual_count=n_elements,
                            )
                        if cm_max is not None and n_elements > cm_max:
                            raise ConsumeMultipleError(
                                argument=match.argument,
                                tokens_so_far=cli_values,
                                keyword=match.matched_token,
                                min_required=cm_min,
                                max_allowed=cm_max,
                                actual_count=n_elements,
                            )
                    # Normal case: append the consumed values
                    for index, cli_value in enumerate(cli_values):
                        match.argument.append(
                            CliToken(keyword=match.matched_token, value=cli_value, index=index, keys=match.keys)
                        )

    # Compute the number of contiguous positional (non-option-like) unused tokens
    # before the first gap caused by keyword extraction. This prevents positional-only
    # list parameters from consuming tokens that appeared after keyword arguments.
    # Only set when a gap is detected; None means no gap (all tokens are contiguous).
    contiguous_positional_count: int | None = None
    for j in range(1, len(unused_token_original_indices)):
        if unused_token_original_indices[j] != unused_token_original_indices[j - 1] + 1:
            contiguous_positional_count = j
            break

    unused_tokens.extend(positional_only_tokens)
    return unused_tokens, contiguous_positional_count


def _future_positional_only_token_count(argument_collection: ArgumentCollection, starting_index: int) -> int:
    n_tokens_to_leave = 0
    for i in itertools.count():
        try:
            argument, _, _ = argument_collection.match(starting_index + i)
        except ValueError:
            break
        if argument.field_info.kind is not POSITIONAL_ONLY:
            break
        future_tokens_per_element, future_consume_all = argument.token_count()
        if future_consume_all:
            raise ValueError("Cannot have 2 all-consuming positional arguments.")
        n_tokens_to_leave += future_tokens_per_element
    return n_tokens_to_leave


def _preprocess_positional_tokens(tokens: Sequence[str], end_of_options_delimiter: str) -> list[tuple[str, bool]]:
    try:
        delimiter_index = tokens.index(end_of_options_delimiter)
        return [(t, False) for t in tokens[:delimiter_index]] + [(t, True) for t in tokens[delimiter_index + 1 :]]
    except ValueError:  # delimiter not found
        return [(t, False) for t in tokens]


def _parse_pos(
    argument_collection: ArgumentCollection,
    tokens: list[str],
    *,
    end_of_options_delimiter: str = "--",
    contiguous_positional_count: int | None = None,
) -> list[str]:
    """Assign positional tokens to positional parameters.

    Parameters
    ----------
    argument_collection: ArgumentCollection
        Arguments whose keyword/flag tokens have already been consumed.
    tokens: list[str]
        Unused tokens from ``_parse_kw_and_flags``.
    end_of_options_delimiter: str
        Delimiter after which all tokens are forced positional.
    contiguous_positional_count: int | None
        If not ``None``, the number of leading contiguous positional tokens
        that were adjacent in the original CLI input (before keyword extraction
        created a gap). Used to cap how many tokens a ``POSITIONAL_ONLY``
        list/iterable parameter may consume, preventing it from greedily
        swallowing tokens that originally appeared after keyword arguments.
        See ``_parse_kw_and_flags`` for how this value is computed.
    """
    prior_positional_or_keyword_supplied_as_keyword_arguments = []

    if not tokens:
        return []

    tokens_and_force_positional = _preprocess_positional_tokens(tokens, end_of_options_delimiter)

    for i in itertools.count():
        try:
            argument, _, _ = argument_collection.match(i)
        except ValueError:
            break
        if argument.field_info.kind is POSITIONAL_OR_KEYWORD:
            if argument.tokens and argument.tokens[0].keyword is not None:
                prior_positional_or_keyword_supplied_as_keyword_arguments.append(argument)
                # Continue in case we hit a VAR_POSITIONAL argument.
                continue
            if prior_positional_or_keyword_supplied_as_keyword_arguments:
                token = tokens[0]
                if not argument.parameter.allow_leading_hyphen and is_option_like(token):
                    # It's more meaningful to interpret the token as an intended option,
                    # rather than an intended positional value for ``argument``.
                    raise UnknownOptionError(token=CliToken(value=token), argument_collection=argument_collection)
                else:
                    raise ArgumentOrderError(
                        argument=argument,
                        prior_positional_or_keyword_supplied_as_keyword_arguments=prior_positional_or_keyword_supplied_as_keyword_arguments,
                        token=tokens_and_force_positional[0][0],
                    )

        tokens_per_element, consume_all = argument.token_count()
        tokens_per_element = max(1, tokens_per_element)

        if consume_all and argument.field_info.kind is POSITIONAL_ONLY:
            # POSITIONAL_ONLY parameters can come after a POSITIONAL_ONLY list/iterable.
            # This makes it easier to create programs that do something like:
            #    $ python my-program.py input_folder/*.csv output.csv

            # Need to see how many tokens we need to leave for subsequent POSITIONAL_ONLY parameters.
            n_tokens_to_leave = _future_positional_only_token_count(argument_collection, i + 1)

            # Cap at the contiguous positional count to prevent consuming tokens
            # that appeared after keyword arguments (issue #763).
            if contiguous_positional_count is not None:
                n_tokens_to_leave = max(
                    n_tokens_to_leave, len(tokens_and_force_positional) - contiguous_positional_count
                )
        else:
            n_tokens_to_leave = 0

        new_tokens = []
        while (len(tokens_and_force_positional) - n_tokens_to_leave) > 0:
            if (len(tokens_and_force_positional) - n_tokens_to_leave) < tokens_per_element:
                raise MissingArgumentError(
                    argument=argument,
                    tokens_so_far=[x[0] for x in tokens_and_force_positional],
                )

            for index, (token, force_positional) in enumerate(tokens_and_force_positional[:tokens_per_element]):
                if not force_positional and not argument.parameter.allow_leading_hyphen and is_option_like(token):
                    raise UnknownOptionError(token=CliToken(value=token), argument_collection=argument_collection)
                new_tokens.append(CliToken(value=token, index=index))
            tokens_and_force_positional = tokens_and_force_positional[tokens_per_element:]
            if not consume_all:
                break
        argument.tokens[:0] = new_tokens  # Prepend the new tokens to the argument.
        if not tokens_and_force_positional:
            break

    return [x[0] for x in tokens_and_force_positional]


def _parse_env(argument_collection: ArgumentCollection):
    for argument in argument_collection:
        if argument.tokens:
            # Don't check environment variables for parameters that already have values from CLI.
            continue
        assert argument.parameter.env_var is not None
        for env_var_name in argument.parameter.env_var:
            try:
                env_var_value = os.environ[env_var_name]
            except KeyError:
                pass
            else:
                argument.tokens.append(Token(keyword=env_var_name, value=env_var_value, source="env"))
                break


def _bind(
    argument_collection: ArgumentCollection,
    func: Callable,
):
    """Bind the mapping to the function signature."""
    bound = inspect.signature(func).bind_partial()
    for argument in argument_collection._root_arguments:
        if argument.value is not UNSET:
            bound.arguments[argument.field_info.name] = argument.value
    return bound


def _parse_configs(argument_collection: ArgumentCollection, configs):
    for config in configs:
        # Each ``config`` is a partial that already has apps and commands provided.
        config(argument_collection)


def _sort_group(argument_collection) -> list[tuple["Group", ArgumentCollection]]:
    """Sort groups into "deepest common-root-keys first" order.

    This is imperfect, but probably works sufficiently well for practical use-cases.
    """
    out = {}
    # Sort alphabetically by group-name to enfroce some determinism.
    for i, group in enumerate(sorted(argument_collection.groups, key=lambda x: x.name)):
        group_arguments = argument_collection.filter_by(group=group)
        common_root_keys = _common_root_keys(group_arguments)
        # Add i to key so that we don't get collisions.
        out[(common_root_keys, i)] = (group, group_arguments.filter_by(keys_prefix=common_root_keys))
    return [ga for _, ga in sorted(out.items(), reverse=True)]


def create_bound_arguments(
    func: Callable,
    argument_collection: ArgumentCollection,
    tokens: list[str],
    configs: Iterable[Callable],
    *,
    end_of_options_delimiter: str = "--",
) -> tuple[inspect.BoundArguments, list[str]]:
    """Parse and coerce CLI tokens to match a function's signature.

    Parameters
    ----------
    func: Callable
        Function.
    argument_collection: ArgumentCollection
    tokens: list[str]
        CLI tokens to parse and coerce to match ``f``'s signature.
    configs: Iterable[Callable]
    end_of_options_delimiter: str
        Everything after this special token is forced to be supplied as a positional argument.

    Returns
    -------
    bound: inspect.BoundArguments
        The converted and bound positional and keyword arguments for ``f``.

    unused_tokens: list[str]
        Remaining tokens that couldn't be matched to ``f``'s signature.
    """
    unused_tokens = tokens

    try:
        unused_tokens, contiguous_positional_count = _parse_kw_and_flags(
            argument_collection, unused_tokens, end_of_options_delimiter=end_of_options_delimiter
        )
        unused_tokens = _parse_pos(
            argument_collection,
            unused_tokens,
            end_of_options_delimiter=end_of_options_delimiter,
            contiguous_positional_count=contiguous_positional_count,
        )

        _parse_env(argument_collection)
        _parse_configs(argument_collection, configs)

        argument_collection._convert()
        groups_with_arguments = _sort_group(argument_collection)
        try:
            for group, group_arguments in groups_with_arguments:
                for validator in group.validator:  # pyright: ignore
                    validator(group_arguments)  # pyright: ignore[reportOptionalCall]
        except (AssertionError, ValueError, TypeError) as e:
            raise ValidationError(exception_message=e.args[0] if e.args else "", group=group) from e  # pyright: ignore

        for argument in argument_collection:
            # if a dict-like argument is missing, raise a MissingArgumentError on the first
            # required child (as opposed generically to the root dict-like object).
            if argument.parse and argument.field_info.required and not argument.keys and not argument.has_tokens:
                raise MissingArgumentError(argument=argument)

        bound = _bind(argument_collection, func)
    except CycloptsError as e:
        e.root_input_tokens = tokens
        e.unused_tokens = unused_tokens
        raise

    return bound, unused_tokens