Spaces:
Sleeping
Sleeping
File size: 28,013 Bytes
53dbcc1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 | import inspect
import itertools
import os
import shlex
import sys
from collections.abc import Callable, Iterable, Sequence
from contextlib import suppress
from functools import partial
from typing import TYPE_CHECKING, Any, NamedTuple, get_origin
from cyclopts._convert import _bool
from cyclopts.annotations import resolve_optional
from cyclopts.argument import Argument, ArgumentCollection
from cyclopts.exceptions import (
ArgumentOrderError,
CoercionError,
CombinedShortOptionError,
ConsumeMultipleError,
CycloptsError,
MissingArgumentError,
RequiresEqualsError,
UnknownOptionError,
ValidationError,
)
from cyclopts.field_info import POSITIONAL_ONLY, POSITIONAL_OR_KEYWORD
from cyclopts.token import Token
from cyclopts.utils import UNSET, is_option_like
if sys.version_info < (3, 11): # pragma: no cover
pass
else: # pragma: no cover
pass
if TYPE_CHECKING:
from cyclopts.group import Group
CliToken = partial(Token, source="cli")
class _KeywordMatch(NamedTuple):
"""Represents a matched CLI token with its corresponding argument."""
matched_token: str
"""The actual CLI token that was matched (e.g., '-o', '--option')."""
argument: Argument
"""The matched Argument object."""
keys: tuple[str, ...]
"""Leftover keys for nested arguments."""
implicit_value: Any
"""Implicit value if this is a flag, otherwise UNSET."""
def normalize_tokens(tokens: None | str | Iterable[str]) -> list[str]:
if tokens is None:
tokens = sys.argv[1:] # Remove the executable
elif isinstance(tokens, str):
tokens = shlex.split(tokens)
else:
tokens = list(tokens)
return tokens
def _common_root_keys(argument_collection) -> tuple[str, ...]:
if not argument_collection:
return ()
common = argument_collection[0].keys
for argument in argument_collection[1:]:
if not argument.keys:
return ()
for i, (common_key, argument_key) in enumerate(zip(common, argument.keys, strict=False)):
if common_key != argument_key:
if i == 0:
return ()
common = argument.keys[:i]
break
common = common[: len(argument.keys)]
return common
def _parse_kw_and_flags(
argument_collection: ArgumentCollection,
tokens: Sequence[str],
*,
end_of_options_delimiter: str = "--",
stop_at_first_unknown: bool = False,
) -> tuple[list[str], int | None]:
"""Extract keyword arguments and flags from the token stream.
Returns
-------
unused_tokens: list[str]
Tokens not consumed by any keyword or flag.
contiguous_positional_count: int | None
Number of leading contiguous non-option tokens before the first gap
caused by keyword extraction. ``None`` if all non-option tokens are
contiguous (i.e. no keywords were interleaved among positional tokens).
For example, given ``a b c --bar 8 --baz 10 d``, the unused tokens are
``['a', 'b', 'c', 'd']`` with original indices ``[0, 1, 2, 6]``.
The gap between indices 2 and 6 yields ``contiguous_positional_count=3``.
This is used by ``_parse_pos`` to prevent positional-only list parameters
from consuming tokens that appeared after keyword arguments.
"""
unused_tokens, positional_only_tokens = [], []
unused_token_original_indices: list[int] = []
skip_next_iterations = 0
if end_of_options_delimiter:
try:
delimiter_index = tokens.index(end_of_options_delimiter)
except ValueError:
pass # end_of_options_delimiter not in token stream
else:
positional_only_tokens = tokens[delimiter_index:]
tokens = tokens[:delimiter_index]
for i, token in enumerate(tokens):
# If the previous argument was a keyword, then this is its value
if skip_next_iterations > 0:
skip_next_iterations -= 1
continue
if not is_option_like(token, allow_numbers=True):
if stop_at_first_unknown:
# Stop parsing and return all remaining tokens as unused
unused_tokens.extend(tokens[i:])
unused_token_original_indices.extend(range(i, len(tokens)))
break
unused_tokens.append(token)
unused_token_original_indices.append(i)
continue
cli_values: list[str] = []
consume_count = 0
# startswith("-") is redundant, but it's cheap safety.
allow_combined_flags = token.startswith("-") and not token.startswith("--")
# Try splitting on "=" for long options or short options that match exactly
if "=" in token:
cli_option, cli_value = token.split("=", 1)
# Try to match the part before "="
try:
argument_collection.match(cli_option)
# Matched! Use the split
cli_values.append(cli_value)
consume_count -= 1
allow_combined_flags = False
except ValueError:
# No match - might be GNU-style like "-pfile=value"
# Don't split, treat whole token as the option
cli_option = token
else:
cli_option = token
matches: list[_KeywordMatch] = []
attached_value: str | None = None # Track value attached to a GNU-style combined option
try:
matches.append(_KeywordMatch(cli_option, *argument_collection.match(cli_option)))
except ValueError:
# Length has to be greater than 2 (hyphen + character) to be exploded.
# Also exclude numeric values (e.g., -10, -3.14) from combined flag parsing.
if allow_combined_flags and len(token) > 2 and is_option_like(token, allow_numbers=False):
# GNU-style combined short options: process left-to-right
# Once we hit an option that takes a value, the rest is the value
chars = cli_option.lstrip("-")
position = 0
while position < len(chars):
char = chars[position]
test_flag = f"-{char}"
try:
arg, keys, implicit = argument_collection.match(test_flag)
if implicit is not UNSET or arg.parameter.count:
# This is a flag (boolean or counting) - consume just this character
matches.append(_KeywordMatch(test_flag, arg, keys, implicit))
position += 1
else:
# This option takes a value - rest of the string is the value
remainder = chars[position + 1 :]
matches.append(_KeywordMatch(test_flag, arg, keys, implicit))
if remainder:
# Value is attached: -uroot or -fvuroot
# Store it separately, will be added to cli_values when processing this match
attached_value = remainder
consume_count -= 1
# Stop processing further characters
break
except ValueError:
# Unknown flag
if stop_at_first_unknown:
unused_tokens.extend(tokens[i:])
return unused_tokens, None
unused_tokens.append(test_flag)
unused_token_original_indices.append(i)
position += 1
if not matches:
# No valid matches found at all
continue
else:
if stop_at_first_unknown:
# Unknown option, stop parsing and return all remaining tokens
unused_tokens.extend(tokens[i:])
return unused_tokens, None
unused_tokens.append(token)
unused_token_original_indices.append(i)
continue
for match_index, match in enumerate(matches):
# For GNU-style combined options, add the attached value only when processing
# the last match (the value-taking option), not for preceding flags
if attached_value is not None and match_index == len(matches) - 1:
cli_values.append(attached_value)
if match.argument.parameter.count:
match.argument.append(CliToken(keyword=match.matched_token, implicit_value=1))
elif match.implicit_value is not UNSET:
# A flag was parsed
if cli_values:
try:
coerced_value = _bool(cli_values[-1])
except CoercionError as e:
if e.token is None:
e.token = CliToken(keyword=match.matched_token)
if e.argument is None:
e.argument = match.argument
raise
if coerced_value: # --positive-flag=true or --negative-flag=true or --empty-flag=true
match.argument.append(
CliToken(keyword=match.matched_token, implicit_value=match.implicit_value)
)
else: # --positive-flag=false or --negative-flag=false or --empty-flag=false
if isinstance(match.implicit_value, bool):
match.argument.append(
CliToken(keyword=match.matched_token, implicit_value=not match.implicit_value)
)
else:
# A negative for a non-bool field doesn't really make sense;
# e.g. --empty-list=False
# So we'll just silently skip it, as it may make bash scripting easier.
pass
else:
match.argument.append(CliToken(keyword=match.matched_token, implicit_value=match.implicit_value))
else:
# This is a value-taking option (not a flag or counting parameter)
# Error only if we're trying to combine multiple value-taking options without values
# (e.g., -fu where both -f and -u take values would be invalid)
# But -fu where -f is a flag and -u takes a value is valid (GNU-style)
if len(matches) > 1:
# Count how many value-taking options we have
value_taking_count = sum(
1 for m in matches if m.implicit_value is UNSET and not m.argument.parameter.count
)
if value_taking_count > 1:
raise CombinedShortOptionError(
msg=f"Cannot combine multiple value-taking options in token {cli_option}"
)
tokens_per_element, consume_all = match.argument.token_count(match.keys)
if match.argument.parameter.requires_equals and match.matched_token.startswith("--") and not cli_values:
raise RequiresEqualsError(
argument=match.argument,
keyword=match.matched_token,
)
# Consume the appropriate number of tokens
# cm_bounds is either None or (min, max) — guaranteed by _consume_multiple_converter
cm_bounds = match.argument.parameter.consume_multiple
assert cm_bounds is None or isinstance(cm_bounds, tuple)
cm_min, cm_max = cm_bounds if cm_bounds is not None else (0, None)
with suppress(IndexError):
if consume_all and cm_bounds is not None:
for j in itertools.count():
token = tokens[i + 1 + j]
if not match.argument.parameter.allow_leading_hyphen and is_option_like(token):
break
cli_values.append(token)
skip_next_iterations += 1
else:
consume_count += tokens_per_element
for j in range(consume_count):
if len(cli_values) == 1 and (
match.argument._should_attempt_json_dict(cli_values)
or match.argument._should_attempt_json_list(cli_values, match.keys)
):
tokens_per_element = 1
# Assume that the contents are json and that we shouldn't
# consume any additional tokens.
break
token = tokens[i + 1 + j]
if not match.argument.parameter.allow_leading_hyphen and is_option_like(token):
raise MissingArgumentError(
argument=match.argument,
tokens_so_far=cli_values,
keyword=match.matched_token,
)
cli_values.append(token)
skip_next_iterations += 1
if not cli_values:
# No values were consumed after the keyword
if consume_all and cm_bounds is not None:
if cm_min > 0:
# Minimum count not met — treat as missing argument
raise ConsumeMultipleError(
argument=match.argument,
tokens_so_far=cli_values,
keyword=match.matched_token,
min_required=cm_min,
max_allowed=cm_max,
actual_count=0,
)
# Allow empty iterables (e.g., --urls with no values behaves like --empty-urls)
hint = resolve_optional(match.argument.hint)
empty_container = (get_origin(hint) or hint)()
match.argument.append(
CliToken(keyword=match.matched_token, implicit_value=empty_container, keys=match.keys)
)
else:
# Non-iterables or consume_multiple=False require at least one value
raise MissingArgumentError(
argument=match.argument, tokens_so_far=cli_values, keyword=match.matched_token
)
elif len(cli_values) % tokens_per_element:
# For multi-token elements (e.g., tuples), ensure we have complete sets
raise MissingArgumentError(
argument=match.argument, tokens_so_far=cli_values, keyword=match.matched_token
)
else:
# Check min/max count for consume_multiple
if cm_bounds is not None:
n_elements = len(cli_values) // max(1, tokens_per_element)
if n_elements < cm_min:
raise ConsumeMultipleError(
argument=match.argument,
tokens_so_far=cli_values,
keyword=match.matched_token,
min_required=cm_min,
max_allowed=cm_max,
actual_count=n_elements,
)
if cm_max is not None and n_elements > cm_max:
raise ConsumeMultipleError(
argument=match.argument,
tokens_so_far=cli_values,
keyword=match.matched_token,
min_required=cm_min,
max_allowed=cm_max,
actual_count=n_elements,
)
# Normal case: append the consumed values
for index, cli_value in enumerate(cli_values):
match.argument.append(
CliToken(keyword=match.matched_token, value=cli_value, index=index, keys=match.keys)
)
# Compute the number of contiguous positional (non-option-like) unused tokens
# before the first gap caused by keyword extraction. This prevents positional-only
# list parameters from consuming tokens that appeared after keyword arguments.
# Only set when a gap is detected; None means no gap (all tokens are contiguous).
contiguous_positional_count: int | None = None
for j in range(1, len(unused_token_original_indices)):
if unused_token_original_indices[j] != unused_token_original_indices[j - 1] + 1:
contiguous_positional_count = j
break
unused_tokens.extend(positional_only_tokens)
return unused_tokens, contiguous_positional_count
def _future_positional_only_token_count(argument_collection: ArgumentCollection, starting_index: int) -> int:
n_tokens_to_leave = 0
for i in itertools.count():
try:
argument, _, _ = argument_collection.match(starting_index + i)
except ValueError:
break
if argument.field_info.kind is not POSITIONAL_ONLY:
break
future_tokens_per_element, future_consume_all = argument.token_count()
if future_consume_all:
raise ValueError("Cannot have 2 all-consuming positional arguments.")
n_tokens_to_leave += future_tokens_per_element
return n_tokens_to_leave
def _preprocess_positional_tokens(tokens: Sequence[str], end_of_options_delimiter: str) -> list[tuple[str, bool]]:
try:
delimiter_index = tokens.index(end_of_options_delimiter)
return [(t, False) for t in tokens[:delimiter_index]] + [(t, True) for t in tokens[delimiter_index + 1 :]]
except ValueError: # delimiter not found
return [(t, False) for t in tokens]
def _parse_pos(
argument_collection: ArgumentCollection,
tokens: list[str],
*,
end_of_options_delimiter: str = "--",
contiguous_positional_count: int | None = None,
) -> list[str]:
"""Assign positional tokens to positional parameters.
Parameters
----------
argument_collection: ArgumentCollection
Arguments whose keyword/flag tokens have already been consumed.
tokens: list[str]
Unused tokens from ``_parse_kw_and_flags``.
end_of_options_delimiter: str
Delimiter after which all tokens are forced positional.
contiguous_positional_count: int | None
If not ``None``, the number of leading contiguous positional tokens
that were adjacent in the original CLI input (before keyword extraction
created a gap). Used to cap how many tokens a ``POSITIONAL_ONLY``
list/iterable parameter may consume, preventing it from greedily
swallowing tokens that originally appeared after keyword arguments.
See ``_parse_kw_and_flags`` for how this value is computed.
"""
prior_positional_or_keyword_supplied_as_keyword_arguments = []
if not tokens:
return []
tokens_and_force_positional = _preprocess_positional_tokens(tokens, end_of_options_delimiter)
for i in itertools.count():
try:
argument, _, _ = argument_collection.match(i)
except ValueError:
break
if argument.field_info.kind is POSITIONAL_OR_KEYWORD:
if argument.tokens and argument.tokens[0].keyword is not None:
prior_positional_or_keyword_supplied_as_keyword_arguments.append(argument)
# Continue in case we hit a VAR_POSITIONAL argument.
continue
if prior_positional_or_keyword_supplied_as_keyword_arguments:
token = tokens[0]
if not argument.parameter.allow_leading_hyphen and is_option_like(token):
# It's more meaningful to interpret the token as an intended option,
# rather than an intended positional value for ``argument``.
raise UnknownOptionError(token=CliToken(value=token), argument_collection=argument_collection)
else:
raise ArgumentOrderError(
argument=argument,
prior_positional_or_keyword_supplied_as_keyword_arguments=prior_positional_or_keyword_supplied_as_keyword_arguments,
token=tokens_and_force_positional[0][0],
)
tokens_per_element, consume_all = argument.token_count()
tokens_per_element = max(1, tokens_per_element)
if consume_all and argument.field_info.kind is POSITIONAL_ONLY:
# POSITIONAL_ONLY parameters can come after a POSITIONAL_ONLY list/iterable.
# This makes it easier to create programs that do something like:
# $ python my-program.py input_folder/*.csv output.csv
# Need to see how many tokens we need to leave for subsequent POSITIONAL_ONLY parameters.
n_tokens_to_leave = _future_positional_only_token_count(argument_collection, i + 1)
# Cap at the contiguous positional count to prevent consuming tokens
# that appeared after keyword arguments (issue #763).
if contiguous_positional_count is not None:
n_tokens_to_leave = max(
n_tokens_to_leave, len(tokens_and_force_positional) - contiguous_positional_count
)
else:
n_tokens_to_leave = 0
new_tokens = []
while (len(tokens_and_force_positional) - n_tokens_to_leave) > 0:
if (len(tokens_and_force_positional) - n_tokens_to_leave) < tokens_per_element:
raise MissingArgumentError(
argument=argument,
tokens_so_far=[x[0] for x in tokens_and_force_positional],
)
for index, (token, force_positional) in enumerate(tokens_and_force_positional[:tokens_per_element]):
if not force_positional and not argument.parameter.allow_leading_hyphen and is_option_like(token):
raise UnknownOptionError(token=CliToken(value=token), argument_collection=argument_collection)
new_tokens.append(CliToken(value=token, index=index))
tokens_and_force_positional = tokens_and_force_positional[tokens_per_element:]
if not consume_all:
break
argument.tokens[:0] = new_tokens # Prepend the new tokens to the argument.
if not tokens_and_force_positional:
break
return [x[0] for x in tokens_and_force_positional]
def _parse_env(argument_collection: ArgumentCollection):
for argument in argument_collection:
if argument.tokens:
# Don't check environment variables for parameters that already have values from CLI.
continue
assert argument.parameter.env_var is not None
for env_var_name in argument.parameter.env_var:
try:
env_var_value = os.environ[env_var_name]
except KeyError:
pass
else:
argument.tokens.append(Token(keyword=env_var_name, value=env_var_value, source="env"))
break
def _bind(
argument_collection: ArgumentCollection,
func: Callable,
):
"""Bind the mapping to the function signature."""
bound = inspect.signature(func).bind_partial()
for argument in argument_collection._root_arguments:
if argument.value is not UNSET:
bound.arguments[argument.field_info.name] = argument.value
return bound
def _parse_configs(argument_collection: ArgumentCollection, configs):
for config in configs:
# Each ``config`` is a partial that already has apps and commands provided.
config(argument_collection)
def _sort_group(argument_collection) -> list[tuple["Group", ArgumentCollection]]:
"""Sort groups into "deepest common-root-keys first" order.
This is imperfect, but probably works sufficiently well for practical use-cases.
"""
out = {}
# Sort alphabetically by group-name to enfroce some determinism.
for i, group in enumerate(sorted(argument_collection.groups, key=lambda x: x.name)):
group_arguments = argument_collection.filter_by(group=group)
common_root_keys = _common_root_keys(group_arguments)
# Add i to key so that we don't get collisions.
out[(common_root_keys, i)] = (group, group_arguments.filter_by(keys_prefix=common_root_keys))
return [ga for _, ga in sorted(out.items(), reverse=True)]
def create_bound_arguments(
func: Callable,
argument_collection: ArgumentCollection,
tokens: list[str],
configs: Iterable[Callable],
*,
end_of_options_delimiter: str = "--",
) -> tuple[inspect.BoundArguments, list[str]]:
"""Parse and coerce CLI tokens to match a function's signature.
Parameters
----------
func: Callable
Function.
argument_collection: ArgumentCollection
tokens: list[str]
CLI tokens to parse and coerce to match ``f``'s signature.
configs: Iterable[Callable]
end_of_options_delimiter: str
Everything after this special token is forced to be supplied as a positional argument.
Returns
-------
bound: inspect.BoundArguments
The converted and bound positional and keyword arguments for ``f``.
unused_tokens: list[str]
Remaining tokens that couldn't be matched to ``f``'s signature.
"""
unused_tokens = tokens
try:
unused_tokens, contiguous_positional_count = _parse_kw_and_flags(
argument_collection, unused_tokens, end_of_options_delimiter=end_of_options_delimiter
)
unused_tokens = _parse_pos(
argument_collection,
unused_tokens,
end_of_options_delimiter=end_of_options_delimiter,
contiguous_positional_count=contiguous_positional_count,
)
_parse_env(argument_collection)
_parse_configs(argument_collection, configs)
argument_collection._convert()
groups_with_arguments = _sort_group(argument_collection)
try:
for group, group_arguments in groups_with_arguments:
for validator in group.validator: # pyright: ignore
validator(group_arguments) # pyright: ignore[reportOptionalCall]
except (AssertionError, ValueError, TypeError) as e:
raise ValidationError(exception_message=e.args[0] if e.args else "", group=group) from e # pyright: ignore
for argument in argument_collection:
# if a dict-like argument is missing, raise a MissingArgumentError on the first
# required child (as opposed generically to the root dict-like object).
if argument.parse and argument.field_info.required and not argument.keys and not argument.has_tokens:
raise MissingArgumentError(argument=argument)
bound = _bind(argument_collection, func)
except CycloptsError as e:
e.root_input_tokens = tokens
e.unused_tokens = unused_tokens
raise
return bound, unused_tokens
|