| from __future__ import annotations |
|
|
| from collections.abc import Sequence |
| from string import ascii_letters, digits, hexdigits |
| from urllib.parse import quote as encode_uri_component |
|
|
| ASCII_LETTERS_AND_DIGITS = ascii_letters + digits |
|
|
| ENCODE_DEFAULT_CHARS = ";/?:@&=+$,-_.!~*'()#" |
| ENCODE_COMPONENT_CHARS = "-_.!~*'()" |
|
|
| encode_cache: dict[str, list[str]] = {} |
|
|
|
|
| |
| |
| def get_encode_cache(exclude: str) -> Sequence[str]: |
| if exclude in encode_cache: |
| return encode_cache[exclude] |
|
|
| cache: list[str] = [] |
| encode_cache[exclude] = cache |
|
|
| for i in range(128): |
| ch = chr(i) |
|
|
| if ch in ASCII_LETTERS_AND_DIGITS: |
| |
| cache.append(ch) |
| else: |
| cache.append("%" + ("0" + hex(i)[2:].upper())[-2:]) |
|
|
| for i in range(len(exclude)): |
| cache[ord(exclude[i])] = exclude[i] |
|
|
| return cache |
|
|
|
|
| |
| |
| |
| |
| |
| |
| def encode( |
| string: str, exclude: str = ENCODE_DEFAULT_CHARS, *, keep_escaped: bool = True |
| ) -> str: |
| result = "" |
|
|
| cache = get_encode_cache(exclude) |
|
|
| l = len(string) |
| i = 0 |
| while i < l: |
| code = ord(string[i]) |
|
|
| |
| if keep_escaped and code == 0x25 and i + 2 < l: |
| if all(c in hexdigits for c in string[i + 1 : i + 3]): |
| result += string[i : i + 3] |
| i += 2 |
| i += 1 |
| continue |
|
|
| if code < 128: |
| result += cache[code] |
| i += 1 |
| continue |
|
|
| if code >= 0xD800 and code <= 0xDFFF: |
| if code >= 0xD800 and code <= 0xDBFF and i + 1 < l: |
| next_code = ord(string[i + 1]) |
| if next_code >= 0xDC00 and next_code <= 0xDFFF: |
| result += encode_uri_component(string[i] + string[i + 1]) |
| i += 1 |
| i += 1 |
| continue |
| result += "%EF%BF%BD" |
| i += 1 |
| continue |
|
|
| result += encode_uri_component(string[i]) |
| i += 1 |
|
|
| return result |
|
|