| |
| |
| |
| |
|
|
| use arrayvec::ArrayVec; |
| use memchr::memchr; |
|
|
| const MAX_PARAMS: usize = 32; |
| const MAX_OSC_LEN: usize = 8192; |
| const MAX_DCS_LEN: usize = 4096; |
| const MAX_INTERMEDIATES: usize = 4; |
|
|
| |
| #[derive(Clone, Debug, Default)] |
| pub struct Parser { |
| state: State, |
| params: ArrayVec<u16, MAX_PARAMS>, |
| param: u16, |
| intermediates: ArrayVec<u8, MAX_INTERMEDIATES>, |
| osc_raw: ArrayVec<u8, MAX_OSC_LEN>, |
| osc_params: ArrayVec<(usize, usize), 8>, |
| dcs_raw: ArrayVec<u8, MAX_DCS_LEN>, |
| ignoring: bool, |
| } |
|
|
| #[derive(Clone, Copy, Debug, PartialEq, Eq, Default)] |
| enum State { |
| #[default] |
| Ground, |
| Utf8(u8, u32), |
| Escape, |
| EscapeIntermediate, |
| CsiEntry, |
| CsiParam, |
| CsiIntermediate, |
| CsiIgnore, |
| OscString, |
| DcsEntry, |
| DcsParam, |
| DcsIntermediate, |
| DcsPassthrough, |
| DcsIgnore, |
| SosPmApcString, |
| } |
|
|
| |
| pub trait Perform { |
| fn print(&mut self, ch: char); |
| fn execute(&mut self, byte: u8); |
| fn csi_dispatch( |
| &mut self, |
| params: &[u16], |
| intermediates: &[u8], |
| ignore: bool, |
| action: char, |
| ); |
| fn esc_dispatch(&mut self, intermediates: &[u8], ignore: bool, byte: u8); |
| fn osc_dispatch(&mut self, params: &[&[u8]]); |
| fn dcs_hook( |
| &mut self, |
| params: &[u16], |
| intermediates: &[u8], |
| ignore: bool, |
| action: char, |
| ); |
| fn dcs_put(&mut self, byte: u8); |
| fn dcs_unhook(&mut self); |
| } |
|
|
| impl Parser { |
| pub fn new() -> Self { |
| Self::default() |
| } |
|
|
| #[inline] |
| pub fn advance<P: Perform>(&mut self, performer: &mut P, bytes: &[u8]) { |
| let mut i = 0; |
| while i < bytes.len() { |
| if matches!(self.state, State::Ground) { |
| match memchr(0x1B, &bytes[i..]) { |
| Some(offset) => { |
| if offset > 0 { |
| self.print_bulk(performer, &bytes[i..i + offset]); |
| } |
| i += offset; |
| self.state = State::Escape; |
| i += 1; |
| continue; |
| } |
| None => { |
| self.print_bulk(performer, &bytes[i..]); |
| break; |
| } |
| } |
| } |
| i += self.advance_one(performer, bytes[i]); |
| } |
| } |
|
|
| #[inline(always)] |
| fn print_bulk<P: Perform>(&mut self, performer: &mut P, text: &[u8]) { |
| let mut i = 0; |
| while i < text.len() { |
| let b = text[i]; |
| if b.is_ascii() { |
| if b < 0x20 { |
| performer.execute(b); |
| } else { |
| performer.print(b as char); |
| } |
| i += 1; |
| } else { |
| let len = utf8_len(b); |
| if i + len <= text.len() { |
| if let Some(ch) = decode_utf8(&text[i..i + len]) { |
| performer.print(ch); |
| } else { |
| performer.print('\u{FFFD}'); |
| } |
| i += len; |
| } else { |
| self.state = State::Utf8((len - 1) as u8, utf8_acc(b)); |
| return; |
| } |
| } |
| } |
| } |
|
|
| #[inline(always)] |
| fn advance_one<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize { |
| match self.state { |
| State::Ground => self.state_ground(performer, byte), |
| State::Utf8(rem, acc) => self.state_utf8(performer, byte, rem, acc), |
| State::Escape => self.state_escape(performer, byte), |
| State::EscapeIntermediate => self.state_escape_intermediate(performer, byte), |
| State::CsiEntry => self.state_csi_entry(performer, byte), |
| State::CsiParam => self.state_csi_param(performer, byte), |
| State::CsiIntermediate => self.state_csi_intermediate(performer, byte), |
| State::CsiIgnore => self.state_csi_ignore(performer, byte), |
| State::OscString => self.state_osc_string(performer, byte), |
| State::DcsEntry => self.state_dcs_entry(performer, byte), |
| State::DcsParam => self.state_dcs_param(performer, byte), |
| State::DcsIntermediate => self.state_dcs_intermediate(performer, byte), |
| State::DcsPassthrough => self.state_dcs_passthrough(performer, byte), |
| State::DcsIgnore => self.state_dcs_ignore(performer, byte), |
| State::SosPmApcString => self.state_sos_pm_apc_string(performer, byte), |
| } |
| } |
|
|
| #[inline(always)] |
| fn state_ground<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize { |
| if byte.is_ascii_control() { |
| if byte != 0x1B { |
| performer.execute(byte); |
| } |
| return 1; |
| } |
| if byte.is_ascii() { |
| performer.print(byte as char); |
| return 1; |
| } |
| let len = utf8_len(byte); |
| let acc = utf8_acc(byte); |
| if len == 1 { |
| performer.print('\u{FFFD}'); |
| return 1; |
| } |
| self.state = State::Utf8((len - 1) as u8, acc); |
| 1 |
| } |
|
|
| #[inline(always)] |
| fn state_utf8<P: Perform>(&mut self, performer: &mut P, byte: u8, rem: u8, acc: u32) -> usize { |
| let acc = (acc << 6) | (byte as u32 & 0x3F); |
| let rem = rem - 1; |
| if rem == 0 { |
| if let Some(ch) = char::from_u32(acc) { |
| performer.print(ch); |
| } else { |
| performer.print('\u{FFFD}'); |
| } |
| self.state = State::Ground; |
| } else { |
| self.state = State::Utf8(rem, acc); |
| } |
| 1 |
| } |
|
|
| #[inline(always)] |
| fn state_escape<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize { |
| match byte { |
| 0x5B => { self.state = State::CsiEntry; self.clear_params(); } |
| 0x5D => { self.state = State::OscString; self.osc_raw.clear(); self.osc_params.clear(); } |
| 0x50 => { self.state = State::DcsEntry; self.clear_params(); } |
| 0x58 | 0x5E | 0x5F => { self.state = State::SosPmApcString; } |
| 0x20..=0x2F => { self.intermediates.push(byte); self.state = State::EscapeIntermediate; } |
| 0x30..=0x7E => { |
| performer.esc_dispatch(&self.intermediates, self.ignoring, byte); |
| self.reset(); |
| } |
| _ => { self.reset(); } |
| } |
| 1 |
| } |
|
|
| #[inline(always)] |
| fn state_escape_intermediate<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize { |
| match byte { |
| 0x20..=0x2F => self.intermediates.push(byte), |
| 0x30..=0x7E => { |
| performer.esc_dispatch(&self.intermediates, self.ignoring, byte); |
| self.reset(); |
| } |
| _ => self.reset(), |
| } |
| 1 |
| } |
|
|
| #[inline(always)] |
| fn state_csi_entry<P: Perform>(&mut self, _performer: &mut P, byte: u8) -> usize { |
| match byte { |
| 0x30..=0x39 | 0x3B => { self.state = State::CsiParam; self.process_csi_byte(byte); } |
| 0x3A => { self.ignoring = true; self.state = State::CsiParam; } |
| 0x3C..=0x3F => { self.intermediates.push(byte); self.state = State::CsiParam; } |
| 0x20..=0x2F => { self.intermediates.push(byte); self.state = State::CsiIntermediate; } |
| 0x40..=0x7E => { self.dispatch_csi(_performer, byte); } |
| _ => { self.state = State::CsiIgnore; } |
| } |
| 1 |
| } |
|
|
| #[inline(always)] |
| fn state_csi_param<P: Perform>(&mut self, _performer: &mut P, byte: u8) -> usize { |
| match byte { |
| 0x30..=0x39 | 0x3B => self.process_csi_byte(byte), |
| 0x3A => self.ignoring = true, |
| 0x20..=0x2F => { self.intermediates.push(byte); self.state = State::CsiIntermediate; } |
| 0x40..=0x7E => { self.dispatch_csi(_performer, byte); } |
| _ => { self.state = State::CsiIgnore; } |
| } |
| 1 |
| } |
|
|
| #[inline(always)] |
| fn state_csi_intermediate<P: Perform>(&mut self, _performer: &mut P, byte: u8) -> usize { |
| match byte { |
| 0x20..=0x2F => self.intermediates.push(byte), |
| 0x40..=0x7E => { self.dispatch_csi(_performer, byte); } |
| _ => { self.state = State::CsiIgnore; } |
| } |
| 1 |
| } |
|
|
| #[inline(always)] |
| fn state_csi_ignore<P: Perform>(&mut self, _performer: &mut P, byte: u8) -> usize { |
| if (0x40..=0x7E).contains(&byte) { |
| self.reset(); |
| } |
| 1 |
| } |
|
|
| #[inline(always)] |
| fn process_csi_byte(&mut self, byte: u8) { |
| if byte == 0x3B { |
| self.push_param(); |
| } else { |
| let digit = (byte - 0x30) as u16; |
| self.param = self.param.saturating_mul(10).saturating_add(digit); |
| } |
| } |
|
|
| #[inline(always)] |
| fn dispatch_csi<P: Perform>(&mut self, performer: &mut P, byte: u8) { |
| self.push_param(); |
| let params: ArrayVec<u16, MAX_PARAMS> = self.params.clone(); |
| let intermediates: ArrayVec<u8, MAX_INTERMEDIATES> = self.intermediates.clone(); |
| performer.csi_dispatch(¶ms, &intermediates, self.ignoring, byte as char); |
| self.reset(); |
| } |
|
|
| #[inline(always)] |
| fn state_osc_string<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize { |
| match byte { |
| 0x07 => { self.dispatch_osc(performer); self.reset(); } |
| 0x1B => { self.dispatch_osc(performer); self.reset(); } |
| 0x9C => { self.dispatch_osc(performer); self.reset(); } |
| b => { |
| if self.osc_raw.len() < MAX_OSC_LEN { |
| self.osc_raw.push(b); |
| } |
| } |
| } |
| 1 |
| } |
|
|
| #[inline(always)] |
| fn dispatch_osc<P: Perform>(&mut self, performer: &mut P) { |
| if self.osc_raw.is_empty() { |
| return; |
| } |
| let mut params: Vec<&[u8]> = Vec::new(); |
| let mut start = 0; |
| for (i, &b) in self.osc_raw.iter().enumerate() { |
| if b == 0x3B { |
| params.push(&self.osc_raw[start..i]); |
| start = i + 1; |
| } |
| } |
| if start < self.osc_raw.len() { |
| params.push(&self.osc_raw[start..]); |
| } |
| performer.osc_dispatch(¶ms); |
| } |
|
|
| #[inline(always)] |
| fn state_dcs_entry<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize { |
| match byte { |
| 0x30..=0x39 | 0x3B | 0x3C..=0x3F => { self.state = State::DcsParam; self.process_csi_byte(byte); } |
| 0x20..=0x2F => { self.intermediates.push(byte); self.state = State::DcsIntermediate; } |
| 0x40..=0x7E => { |
| self.state = State::DcsPassthrough; |
| self.dcs_hook(performer, byte); |
| } |
| _ => { self.state = State::DcsIgnore; } |
| } |
| 1 |
| } |
|
|
| #[inline(always)] |
| fn state_dcs_param<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize { |
| match byte { |
| 0x30..=0x39 | 0x3B => self.process_csi_byte(byte), |
| 0x20..=0x2F => { self.intermediates.push(byte); self.state = State::DcsIntermediate; } |
| 0x40..=0x7E => { |
| self.state = State::DcsPassthrough; |
| self.dcs_hook(performer, byte); |
| } |
| _ => { self.state = State::DcsIgnore; } |
| } |
| 1 |
| } |
|
|
| #[inline(always)] |
| fn state_dcs_intermediate<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize { |
| match byte { |
| 0x20..=0x2F => self.intermediates.push(byte), |
| 0x40..=0x7E => { |
| self.state = State::DcsPassthrough; |
| self.dcs_hook(performer, byte); |
| } |
| _ => { self.state = State::DcsIgnore; } |
| } |
| 1 |
| } |
|
|
| #[inline(always)] |
| fn state_dcs_passthrough<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize { |
| match byte { |
| 0x1B => {} |
| 0x9C => { |
| performer.dcs_unhook(); |
| self.reset(); |
| } |
| b => { |
| if self.dcs_raw.len() < MAX_DCS_LEN { |
| self.dcs_raw.push(b); |
| performer.dcs_put(b); |
| } |
| } |
| } |
| 1 |
| } |
|
|
| #[inline(always)] |
| fn state_dcs_ignore<P: Perform>(&mut self, _performer: &mut P, byte: u8) -> usize { |
| if byte == 0x9C || byte == 0x1B { |
| self.reset(); |
| } |
| 1 |
| } |
|
|
| #[inline(always)] |
| fn dcs_hook<P: Perform>(&mut self, performer: &mut P, byte: u8) { |
| self.push_param(); |
| let params: ArrayVec<u16, MAX_PARAMS> = self.params.clone(); |
| let intermediates: ArrayVec<u8, MAX_INTERMEDIATES> = self.intermediates.clone(); |
| performer.dcs_hook(¶ms, &intermediates, self.ignoring, byte as char); |
| } |
|
|
| #[inline(always)] |
| fn state_sos_pm_apc_string<P: Perform>(&mut self, _performer: &mut P, byte: u8) -> usize { |
| if byte == 0x9C || byte == 0x1B { |
| self.reset(); |
| } |
| 1 |
| } |
|
|
| #[inline(always)] |
| fn clear_params(&mut self) { |
| self.params.clear(); |
| self.param = 0; |
| } |
|
|
| #[inline(always)] |
| fn push_param(&mut self) { |
| if self.params.len() < MAX_PARAMS { |
| self.params.push(self.param); |
| } |
| self.param = 0; |
| } |
|
|
| #[inline(always)] |
| fn reset(&mut self) { |
| self.state = State::Ground; |
| self.clear_params(); |
| self.intermediates.clear(); |
| self.ignoring = false; |
| } |
| } |
|
|
| #[inline(always)] |
| fn utf8_len(byte: u8) -> usize { |
| match byte.leading_ones() { |
| 0 => 1, |
| 2 => 2, |
| 3 => 3, |
| 4 => 4, |
| _ => 1, |
| } |
| } |
|
|
| #[inline(always)] |
| fn utf8_acc(byte: u8) -> u32 { |
| match byte.leading_ones() { |
| 0 => byte as u32, |
| 2 => (byte & 0x1F) as u32, |
| 3 => (byte & 0x0F) as u32, |
| 4 => (byte & 0x07) as u32, |
| _ => byte as u32, |
| } |
| } |
|
|
| #[inline(always)] |
| fn decode_utf8(bytes: &[u8]) -> Option<char> { |
| let first = bytes[0]; |
| let len = utf8_len(first); |
| if bytes.len() < len { |
| return None; |
| } |
| let mut acc = utf8_acc(first); |
| for i in 1..len { |
| acc = (acc << 6) | ((bytes[i] & 0x3F) as u32); |
| } |
| char::from_u32(acc) |
| } |
|
|