File size: 12,335 Bytes
5e9fb2f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 | from __future__ import annotations
from functools import lru_cache
from operator import itemgetter
from typing import Callable, NamedTuple, Sequence, Tuple
from rich._unicode_data import load as load_cell_table
CellSpan = Tuple[int, int, int]
_span_get_cell_len = itemgetter(2)
# Ranges of unicode ordinals that produce a 1-cell wide character
# This is non-exhaustive, but covers most common Western characters
_SINGLE_CELL_UNICODE_RANGES: list[tuple[int, int]] = [
(0x20, 0x7E), # Latin (excluding non-printable)
(0xA0, 0xAC),
(0xAE, 0x002FF),
(0x00370, 0x00482), # Greek / Cyrillic
(0x02500, 0x025FC), # Box drawing, box elements, geometric shapes
(0x02800, 0x028FF), # Braille
]
# A frozen set of characters that are a single cell wide
_SINGLE_CELLS = frozenset(
[
character
for _start, _end in _SINGLE_CELL_UNICODE_RANGES
for character in map(chr, range(_start, _end + 1))
]
)
# When called with a string this will return True if all
# characters are single-cell, otherwise False
_is_single_cell_widths: Callable[[str], bool] = _SINGLE_CELLS.issuperset
class CellTable(NamedTuple):
"""Contains unicode data required to measure the cell widths of glyphs."""
unicode_version: str
widths: Sequence[tuple[int, int, int]]
narrow_to_wide: frozenset[str]
@lru_cache(maxsize=4096)
def get_character_cell_size(character: str, unicode_version: str = "auto") -> int:
"""Get the cell size of a character.
Args:
character (str): A single character.
unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.
Returns:
int: Number of cells (0, 1 or 2) occupied by that character.
"""
codepoint = ord(character)
if codepoint and codepoint < 32 or 0x07F <= codepoint < 0x0A0:
return 0
table = load_cell_table(unicode_version).widths
last_entry = table[-1]
if codepoint > last_entry[1]:
return 1
lower_bound = 0
upper_bound = len(table) - 1
while lower_bound <= upper_bound:
index = (lower_bound + upper_bound) >> 1
start, end, width = table[index]
if codepoint < start:
upper_bound = index - 1
elif codepoint > end:
lower_bound = index + 1
else:
return width
return 1
@lru_cache(4096)
def cached_cell_len(text: str, unicode_version: str = "auto") -> int:
"""Get the number of cells required to display text.
This method always caches, which may use up a lot of memory. It is recommended to use
`cell_len` over this method.
Args:
text (str): Text to display.
unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.
Returns:
int: Get the number of cells required to display text.
"""
return _cell_len(text, unicode_version)
def cell_len(text: str, unicode_version: str = "auto") -> int:
"""Get the cell length of a string (length as it appears in the terminal).
Args:
text: String to measure.
unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.
Returns:
Length of string in terminal cells.
"""
if len(text) < 512:
return cached_cell_len(text, unicode_version)
return _cell_len(text, unicode_version)
def _cell_len(text: str, unicode_version: str) -> int:
"""Get the cell length of a string (length as it appears in the terminal).
Args:
text: String to measure.
unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.
Returns:
Length of string in terminal cells.
"""
if _is_single_cell_widths(text):
return len(text)
# "\u200d" is zero width joiner
# "\ufe0f" is variation selector 16
if "\u200d" not in text and "\ufe0f" not in text:
# Simplest case with no unicode stuff that changes the size
return sum(
get_character_cell_size(character, unicode_version) for character in text
)
cell_table = load_cell_table(unicode_version)
total_width = 0
last_measured_character: str | None = None
SPECIAL = {"\u200d", "\ufe0f"}
index = 0
character_count = len(text)
while index < character_count:
character = text[index]
if character in SPECIAL:
if character == "\u200d":
index += 1
elif last_measured_character:
total_width += last_measured_character in cell_table.narrow_to_wide
last_measured_character = None
else:
if character_width := get_character_cell_size(character, unicode_version):
last_measured_character = character
total_width += character_width
index += 1
return total_width
def split_graphemes(
text: str, unicode_version: str = "auto"
) -> "tuple[list[CellSpan], int]":
"""Divide text into spans that define a single grapheme, and additionally return the cell length of the whole string.
The returned spans will cover every index in the string, with no gaps. It is possible for some graphemes to have a cell length of zero.
This can occur for nonsense strings like two zero width joiners, or for control codes that don't contribute to the grapheme size.
Args:
text: String to split.
unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.
Returns:
A tuple of a list of *spans* and the cell length of the entire string. A span is a list of tuples
of three values consisting of (<START>, <END>, <CELL LENGTH>), where START and END are string indices,
and CELL LENGTH is the cell length of the single grapheme.
"""
cell_table = load_cell_table(unicode_version)
codepoint_count = len(text)
index = 0
last_measured_character: str | None = None
total_width = 0
spans: list[tuple[int, int, int]] = []
SPECIAL = {"\u200d", "\ufe0f"}
while index < codepoint_count:
if (character := text[index]) in SPECIAL:
if not spans:
# ZWJ or variation selector at the beginning of the string doesn't really make sense.
# But handle it, we must.
spans.append((index, index := index + 1, 0))
continue
if character == "\u200d":
# zero width joiner
# The condition handles the case where a ZWJ is at the end of the string, and has nothing to join
index += 2 if index < (codepoint_count - 1) else 1
start, _end, cell_length = spans[-1]
spans[-1] = (start, index, cell_length)
else:
# variation selector 16
index += 1
if last_measured_character:
start, _end, cell_length = spans[-1]
if last_measured_character in cell_table.narrow_to_wide:
last_measured_character = None
cell_length += 1
total_width += 1
spans[-1] = (start, index, cell_length)
else:
# No previous character to change the size of.
# Shouldn't occur in practice.
# But handle it, we must.
start, _end, cell_length = spans[-1]
spans[-1] = (start, index, cell_length)
continue
if character_width := get_character_cell_size(character, unicode_version):
last_measured_character = character
spans.append((index, index := index + 1, character_width))
total_width += character_width
else:
# Character has zero width
if spans:
# zero width characters are associated with the previous character
start, _end, cell_length = spans[-1]
spans[-1] = (start, index := index + 1, cell_length)
else:
# A zero width character with no prior spans
spans.append((index, index := index + 1, 0))
return (spans, total_width)
def _split_text(
text: str, cell_position: int, unicode_version: str = "auto"
) -> tuple[str, str]:
"""Split text by cell position.
If the cell position falls within a double width character, it is converted to two spaces.
Args:
text: Text to split.
cell_position Offset in cells.
unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.
Returns:
Tuple to two split strings.
"""
if cell_position <= 0:
return "", text
spans, cell_length = split_graphemes(text, unicode_version)
# Guess initial offset
offset = int((cell_position / cell_length) * len(spans))
left_size = sum(map(_span_get_cell_len, spans[:offset]))
while True:
if left_size == cell_position:
if offset >= len(spans):
return text, ""
split_index = spans[offset][0]
return text[:split_index], text[split_index:]
if left_size < cell_position:
start, end, cell_size = spans[offset]
if left_size + cell_size > cell_position:
return text[:start] + " ", " " + text[end:]
offset += 1
left_size += cell_size
else: # left_size > cell_position
start, end, cell_size = spans[offset - 1]
if left_size - cell_size < cell_position:
return text[:start] + " ", " " + text[end:]
offset -= 1
left_size -= cell_size
def split_text(
text: str, cell_position: int, unicode_version: str = "auto"
) -> tuple[str, str]:
"""Split text by cell position.
If the cell position falls within a double width character, it is converted to two spaces.
Args:
text: Text to split.
cell_position Offset in cells.
unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.
Returns:
Tuple to two split strings.
"""
if _is_single_cell_widths(text):
return text[:cell_position], text[cell_position:]
return _split_text(text, cell_position, unicode_version)
def set_cell_size(text: str, total: int, unicode_version: str = "auto") -> str:
"""Adjust a string by cropping or padding with spaces such that it fits within the given number of cells.
Args:
text: String to adjust.
total: Desired size in cells.
unicode_version: Unicode version.
Returns:
A string with cell size equal to total.
"""
if _is_single_cell_widths(text):
size = len(text)
if size < total:
return text + " " * (total - size)
return text[:total]
if total <= 0:
return ""
cell_size = cell_len(text)
if cell_size == total:
return text
if cell_size < total:
return text + " " * (total - cell_size)
text, _ = _split_text(text, total, unicode_version)
return text
def chop_cells(text: str, width: int, unicode_version: str = "auto") -> list[str]:
"""Split text into lines such that each line fits within the available (cell) width.
Args:
text: The text to fold such that it fits in the given width.
width: The width available (number of cells).
Returns:
A list of strings such that each string in the list has cell width
less than or equal to the available width.
"""
if _is_single_cell_widths(text):
return [text[index : index + width] for index in range(0, len(text), width)]
spans, _ = split_graphemes(text, unicode_version)
line_size = 0 # Size of line in cells
lines: list[str] = []
line_offset = 0 # Offset (in codepoints) of start of line
for start, end, cell_size in spans:
if line_size + cell_size > width:
lines.append(text[line_offset:start])
line_offset = start
line_size = 0
line_size += cell_size
if line_size:
lines.append(text[line_offset:])
return lines
|