stevenkhan commited on
Commit
b9ee308
·
verified ·
1 Parent(s): 9efa276

Upload spectral-core/src/ansi.rs

Browse files
Files changed (1) hide show
  1. spectral-core/src/ansi.rs +456 -0
spectral-core/src/ansi.rs ADDED
@@ -0,0 +1,456 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! ANSI / VT100 Parser — Paul Williams state machine with SIMD fast-path.
2
+ //!
3
+ //! Uses `memchr` for ESC byte scanning to skip large text runs,
4
+ //! then dispatches through a compact state machine.
5
+
6
+ use arrayvec::ArrayVec;
7
+ use memchr::memchr;
8
+
9
+ const MAX_PARAMS: usize = 32;
10
+ const MAX_OSC_LEN: usize = 8192;
11
+ const MAX_DCS_LEN: usize = 4096;
12
+ const MAX_INTERMEDIATES: usize = 4;
13
+
14
+ /// ANSI parser state machine.
15
+ #[derive(Clone, Debug, Default)]
16
+ pub struct Parser {
17
+ state: State,
18
+ params: ArrayVec<u16, MAX_PARAMS>,
19
+ param: u16,
20
+ intermediates: ArrayVec<u8, MAX_INTERMEDIATES>,
21
+ osc_raw: ArrayVec<u8, MAX_OSC_LEN>,
22
+ osc_params: ArrayVec<(usize, usize), 8>,
23
+ dcs_raw: ArrayVec<u8, MAX_DCS_LEN>,
24
+ ignoring: bool,
25
+ }
26
+
27
+ #[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
28
+ enum State {
29
+ #[default]
30
+ Ground,
31
+ Utf8(u8, u32),
32
+ Escape,
33
+ EscapeIntermediate,
34
+ CsiEntry,
35
+ CsiParam,
36
+ CsiIntermediate,
37
+ CsiIgnore,
38
+ OscString,
39
+ DcsEntry,
40
+ DcsParam,
41
+ DcsIntermediate,
42
+ DcsPassthrough,
43
+ DcsIgnore,
44
+ SosPmApcString,
45
+ }
46
+
47
+ /// Trait for handling parsed terminal actions.
48
+ pub trait Perform {
49
+ fn print(&mut self, ch: char);
50
+ fn execute(&mut self, byte: u8);
51
+ fn csi_dispatch(
52
+ &mut self,
53
+ params: &[u16],
54
+ intermediates: &[u8],
55
+ ignore: bool,
56
+ action: char,
57
+ );
58
+ fn esc_dispatch(&mut self, intermediates: &[u8], ignore: bool, byte: u8);
59
+ fn osc_dispatch(&mut self, params: &[&[u8]]);
60
+ fn dcs_hook(
61
+ &mut self,
62
+ params: &[u16],
63
+ intermediates: &[u8],
64
+ ignore: bool,
65
+ action: char,
66
+ );
67
+ fn dcs_put(&mut self, byte: u8);
68
+ fn dcs_unhook(&mut self);
69
+ }
70
+
71
+ impl Parser {
72
+ pub fn new() -> Self {
73
+ Self::default()
74
+ }
75
+
76
+ #[inline]
77
+ pub fn advance<P: Perform>(&mut self, performer: &mut P, bytes: &[u8]) {
78
+ let mut i = 0;
79
+ while i < bytes.len() {
80
+ if matches!(self.state, State::Ground) {
81
+ match memchr(0x1B, &bytes[i..]) {
82
+ Some(offset) => {
83
+ if offset > 0 {
84
+ self.print_bulk(performer, &bytes[i..i + offset]);
85
+ }
86
+ i += offset;
87
+ self.state = State::Escape;
88
+ i += 1;
89
+ continue;
90
+ }
91
+ None => {
92
+ self.print_bulk(performer, &bytes[i..]);
93
+ break;
94
+ }
95
+ }
96
+ }
97
+ i += self.advance_one(performer, bytes[i]);
98
+ }
99
+ }
100
+
101
+ #[inline(always)]
102
+ fn print_bulk<P: Perform>(&mut self, performer: &mut P, text: &[u8]) {
103
+ let mut i = 0;
104
+ while i < text.len() {
105
+ let b = text[i];
106
+ if b.is_ascii() {
107
+ if b < 0x20 {
108
+ performer.execute(b);
109
+ } else {
110
+ performer.print(b as char);
111
+ }
112
+ i += 1;
113
+ } else {
114
+ let len = utf8_len(b);
115
+ if i + len <= text.len() {
116
+ if let Some(ch) = decode_utf8(&text[i..i + len]) {
117
+ performer.print(ch);
118
+ } else {
119
+ performer.print('\u{FFFD}');
120
+ }
121
+ i += len;
122
+ } else {
123
+ self.state = State::Utf8((len - 1) as u8, utf8_acc(b));
124
+ return;
125
+ }
126
+ }
127
+ }
128
+ }
129
+
130
+ #[inline(always)]
131
+ fn advance_one<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize {
132
+ match self.state {
133
+ State::Ground => self.state_ground(performer, byte),
134
+ State::Utf8(rem, acc) => self.state_utf8(performer, byte, rem, acc),
135
+ State::Escape => self.state_escape(performer, byte),
136
+ State::EscapeIntermediate => self.state_escape_intermediate(performer, byte),
137
+ State::CsiEntry => self.state_csi_entry(performer, byte),
138
+ State::CsiParam => self.state_csi_param(performer, byte),
139
+ State::CsiIntermediate => self.state_csi_intermediate(performer, byte),
140
+ State::CsiIgnore => self.state_csi_ignore(performer, byte),
141
+ State::OscString => self.state_osc_string(performer, byte),
142
+ State::DcsEntry => self.state_dcs_entry(performer, byte),
143
+ State::DcsParam => self.state_dcs_param(performer, byte),
144
+ State::DcsIntermediate => self.state_dcs_intermediate(performer, byte),
145
+ State::DcsPassthrough => self.state_dcs_passthrough(performer, byte),
146
+ State::DcsIgnore => self.state_dcs_ignore(performer, byte),
147
+ State::SosPmApcString => self.state_sos_pm_apc_string(performer, byte),
148
+ }
149
+ }
150
+
151
+ #[inline(always)]
152
+ fn state_ground<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize {
153
+ if byte.is_ascii_control() {
154
+ if byte != 0x1B {
155
+ performer.execute(byte);
156
+ }
157
+ return 1;
158
+ }
159
+ if byte.is_ascii() {
160
+ performer.print(byte as char);
161
+ return 1;
162
+ }
163
+ let len = utf8_len(byte);
164
+ let acc = utf8_acc(byte);
165
+ if len == 1 {
166
+ performer.print('\u{FFFD}');
167
+ return 1;
168
+ }
169
+ self.state = State::Utf8((len - 1) as u8, acc);
170
+ 1
171
+ }
172
+
173
+ #[inline(always)]
174
+ fn state_utf8<P: Perform>(&mut self, performer: &mut P, byte: u8, rem: u8, acc: u32) -> usize {
175
+ let acc = (acc << 6) | (byte as u32 & 0x3F);
176
+ let rem = rem - 1;
177
+ if rem == 0 {
178
+ if let Some(ch) = char::from_u32(acc) {
179
+ performer.print(ch);
180
+ } else {
181
+ performer.print('\u{FFFD}');
182
+ }
183
+ self.state = State::Ground;
184
+ } else {
185
+ self.state = State::Utf8(rem, acc);
186
+ }
187
+ 1
188
+ }
189
+
190
+ #[inline(always)]
191
+ fn state_escape<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize {
192
+ match byte {
193
+ 0x5B => { self.state = State::CsiEntry; self.clear_params(); }
194
+ 0x5D => { self.state = State::OscString; self.osc_raw.clear(); self.osc_params.clear(); }
195
+ 0x50 => { self.state = State::DcsEntry; self.clear_params(); }
196
+ 0x58 | 0x5E | 0x5F => { self.state = State::SosPmApcString; }
197
+ 0x20..=0x2F => { self.intermediates.push(byte); self.state = State::EscapeIntermediate; }
198
+ 0x30..=0x7E => {
199
+ performer.esc_dispatch(&self.intermediates, self.ignoring, byte);
200
+ self.reset();
201
+ }
202
+ _ => { self.reset(); }
203
+ }
204
+ 1
205
+ }
206
+
207
+ #[inline(always)]
208
+ fn state_escape_intermediate<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize {
209
+ match byte {
210
+ 0x20..=0x2F => self.intermediates.push(byte),
211
+ 0x30..=0x7E => {
212
+ performer.esc_dispatch(&self.intermediates, self.ignoring, byte);
213
+ self.reset();
214
+ }
215
+ _ => self.reset(),
216
+ }
217
+ 1
218
+ }
219
+
220
+ #[inline(always)]
221
+ fn state_csi_entry<P: Perform>(&mut self, _performer: &mut P, byte: u8) -> usize {
222
+ match byte {
223
+ 0x30..=0x39 | 0x3B => { self.state = State::CsiParam; self.process_csi_byte(byte); }
224
+ 0x3A => { self.ignoring = true; self.state = State::CsiParam; }
225
+ 0x3C..=0x3F => { self.intermediates.push(byte); self.state = State::CsiParam; }
226
+ 0x20..=0x2F => { self.intermediates.push(byte); self.state = State::CsiIntermediate; }
227
+ 0x40..=0x7E => { self.dispatch_csi(_performer, byte); }
228
+ _ => { self.state = State::CsiIgnore; }
229
+ }
230
+ 1
231
+ }
232
+
233
+ #[inline(always)]
234
+ fn state_csi_param<P: Perform>(&mut self, _performer: &mut P, byte: u8) -> usize {
235
+ match byte {
236
+ 0x30..=0x39 | 0x3B => self.process_csi_byte(byte),
237
+ 0x3A => self.ignoring = true,
238
+ 0x20..=0x2F => { self.intermediates.push(byte); self.state = State::CsiIntermediate; }
239
+ 0x40..=0x7E => { self.dispatch_csi(_performer, byte); }
240
+ _ => { self.state = State::CsiIgnore; }
241
+ }
242
+ 1
243
+ }
244
+
245
+ #[inline(always)]
246
+ fn state_csi_intermediate<P: Perform>(&mut self, _performer: &mut P, byte: u8) -> usize {
247
+ match byte {
248
+ 0x20..=0x2F => self.intermediates.push(byte),
249
+ 0x40..=0x7E => { self.dispatch_csi(_performer, byte); }
250
+ _ => { self.state = State::CsiIgnore; }
251
+ }
252
+ 1
253
+ }
254
+
255
+ #[inline(always)]
256
+ fn state_csi_ignore<P: Perform>(&mut self, _performer: &mut P, byte: u8) -> usize {
257
+ if (0x40..=0x7E).contains(&byte) {
258
+ self.reset();
259
+ }
260
+ 1
261
+ }
262
+
263
+ #[inline(always)]
264
+ fn process_csi_byte(&mut self, byte: u8) {
265
+ if byte == 0x3B {
266
+ self.push_param();
267
+ } else {
268
+ let digit = (byte - 0x30) as u16;
269
+ self.param = self.param.saturating_mul(10).saturating_add(digit);
270
+ }
271
+ }
272
+
273
+ #[inline(always)]
274
+ fn dispatch_csi<P: Perform>(&mut self, performer: &mut P, byte: u8) {
275
+ self.push_param();
276
+ let params: ArrayVec<u16, MAX_PARAMS> = self.params.clone();
277
+ let intermediates: ArrayVec<u8, MAX_INTERMEDIATES> = self.intermediates.clone();
278
+ performer.csi_dispatch(&params, &intermediates, self.ignoring, byte as char);
279
+ self.reset();
280
+ }
281
+
282
+ #[inline(always)]
283
+ fn state_osc_string<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize {
284
+ match byte {
285
+ 0x07 => { self.dispatch_osc(performer); self.reset(); }
286
+ 0x1B => { self.dispatch_osc(performer); self.reset(); }
287
+ 0x9C => { self.dispatch_osc(performer); self.reset(); }
288
+ b => {
289
+ if self.osc_raw.len() < MAX_OSC_LEN {
290
+ self.osc_raw.push(b);
291
+ }
292
+ }
293
+ }
294
+ 1
295
+ }
296
+
297
+ #[inline(always)]
298
+ fn dispatch_osc<P: Perform>(&mut self, performer: &mut P) {
299
+ if self.osc_raw.is_empty() {
300
+ return;
301
+ }
302
+ let mut params: Vec<&[u8]> = Vec::new();
303
+ let mut start = 0;
304
+ for (i, &b) in self.osc_raw.iter().enumerate() {
305
+ if b == 0x3B {
306
+ params.push(&self.osc_raw[start..i]);
307
+ start = i + 1;
308
+ }
309
+ }
310
+ if start < self.osc_raw.len() {
311
+ params.push(&self.osc_raw[start..]);
312
+ }
313
+ performer.osc_dispatch(&params);
314
+ }
315
+
316
+ #[inline(always)]
317
+ fn state_dcs_entry<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize {
318
+ match byte {
319
+ 0x30..=0x39 | 0x3B | 0x3C..=0x3F => { self.state = State::DcsParam; self.process_csi_byte(byte); }
320
+ 0x20..=0x2F => { self.intermediates.push(byte); self.state = State::DcsIntermediate; }
321
+ 0x40..=0x7E => {
322
+ self.state = State::DcsPassthrough;
323
+ self.dcs_hook(performer, byte);
324
+ }
325
+ _ => { self.state = State::DcsIgnore; }
326
+ }
327
+ 1
328
+ }
329
+
330
+ #[inline(always)]
331
+ fn state_dcs_param<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize {
332
+ match byte {
333
+ 0x30..=0x39 | 0x3B => self.process_csi_byte(byte),
334
+ 0x20..=0x2F => { self.intermediates.push(byte); self.state = State::DcsIntermediate; }
335
+ 0x40..=0x7E => {
336
+ self.state = State::DcsPassthrough;
337
+ self.dcs_hook(performer, byte);
338
+ }
339
+ _ => { self.state = State::DcsIgnore; }
340
+ }
341
+ 1
342
+ }
343
+
344
+ #[inline(always)]
345
+ fn state_dcs_intermediate<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize {
346
+ match byte {
347
+ 0x20..=0x2F => self.intermediates.push(byte),
348
+ 0x40..=0x7E => {
349
+ self.state = State::DcsPassthrough;
350
+ self.dcs_hook(performer, byte);
351
+ }
352
+ _ => { self.state = State::DcsIgnore; }
353
+ }
354
+ 1
355
+ }
356
+
357
+ #[inline(always)]
358
+ fn state_dcs_passthrough<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize {
359
+ match byte {
360
+ 0x1B => {}
361
+ 0x9C => {
362
+ performer.dcs_unhook();
363
+ self.reset();
364
+ }
365
+ b => {
366
+ if self.dcs_raw.len() < MAX_DCS_LEN {
367
+ self.dcs_raw.push(b);
368
+ performer.dcs_put(b);
369
+ }
370
+ }
371
+ }
372
+ 1
373
+ }
374
+
375
+ #[inline(always)]
376
+ fn state_dcs_ignore<P: Perform>(&mut self, _performer: &mut P, byte: u8) -> usize {
377
+ if byte == 0x9C || byte == 0x1B {
378
+ self.reset();
379
+ }
380
+ 1
381
+ }
382
+
383
+ #[inline(always)]
384
+ fn dcs_hook<P: Perform>(&mut self, performer: &mut P, byte: u8) {
385
+ self.push_param();
386
+ let params: ArrayVec<u16, MAX_PARAMS> = self.params.clone();
387
+ let intermediates: ArrayVec<u8, MAX_INTERMEDIATES> = self.intermediates.clone();
388
+ performer.dcs_hook(&params, &intermediates, self.ignoring, byte as char);
389
+ }
390
+
391
+ #[inline(always)]
392
+ fn state_sos_pm_apc_string<P: Perform>(&mut self, _performer: &mut P, byte: u8) -> usize {
393
+ if byte == 0x9C || byte == 0x1B {
394
+ self.reset();
395
+ }
396
+ 1
397
+ }
398
+
399
+ #[inline(always)]
400
+ fn clear_params(&mut self) {
401
+ self.params.clear();
402
+ self.param = 0;
403
+ }
404
+
405
+ #[inline(always)]
406
+ fn push_param(&mut self) {
407
+ if self.params.len() < MAX_PARAMS {
408
+ self.params.push(self.param);
409
+ }
410
+ self.param = 0;
411
+ }
412
+
413
+ #[inline(always)]
414
+ fn reset(&mut self) {
415
+ self.state = State::Ground;
416
+ self.clear_params();
417
+ self.intermediates.clear();
418
+ self.ignoring = false;
419
+ }
420
+ }
421
+
422
+ #[inline(always)]
423
+ fn utf8_len(byte: u8) -> usize {
424
+ match byte.leading_ones() {
425
+ 0 => 1,
426
+ 2 => 2,
427
+ 3 => 3,
428
+ 4 => 4,
429
+ _ => 1,
430
+ }
431
+ }
432
+
433
+ #[inline(always)]
434
+ fn utf8_acc(byte: u8) -> u32 {
435
+ match byte.leading_ones() {
436
+ 0 => byte as u32,
437
+ 2 => (byte & 0x1F) as u32,
438
+ 3 => (byte & 0x0F) as u32,
439
+ 4 => (byte & 0x07) as u32,
440
+ _ => byte as u32,
441
+ }
442
+ }
443
+
444
+ #[inline(always)]
445
+ fn decode_utf8(bytes: &[u8]) -> Option<char> {
446
+ let first = bytes[0];
447
+ let len = utf8_len(first);
448
+ if bytes.len() < len {
449
+ return None;
450
+ }
451
+ let mut acc = utf8_acc(first);
452
+ for i in 1..len {
453
+ acc = (acc << 6) | ((bytes[i] & 0x3F) as u32);
454
+ }
455
+ char::from_u32(acc)
456
+ }