stevenkhan's picture
Upload spectral-core/src/ansi.rs
b9ee308 verified
//! ANSI / VT100 Parser — Paul Williams state machine with SIMD fast-path.
//!
//! Uses `memchr` for ESC byte scanning to skip large text runs,
//! then dispatches through a compact state machine.
use arrayvec::ArrayVec;
use memchr::memchr;
const MAX_PARAMS: usize = 32;
const MAX_OSC_LEN: usize = 8192;
const MAX_DCS_LEN: usize = 4096;
const MAX_INTERMEDIATES: usize = 4;
/// ANSI parser state machine.
#[derive(Clone, Debug, Default)]
pub struct Parser {
state: State,
params: ArrayVec<u16, MAX_PARAMS>,
param: u16,
intermediates: ArrayVec<u8, MAX_INTERMEDIATES>,
osc_raw: ArrayVec<u8, MAX_OSC_LEN>,
osc_params: ArrayVec<(usize, usize), 8>,
dcs_raw: ArrayVec<u8, MAX_DCS_LEN>,
ignoring: bool,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
enum State {
#[default]
Ground,
Utf8(u8, u32),
Escape,
EscapeIntermediate,
CsiEntry,
CsiParam,
CsiIntermediate,
CsiIgnore,
OscString,
DcsEntry,
DcsParam,
DcsIntermediate,
DcsPassthrough,
DcsIgnore,
SosPmApcString,
}
/// Trait for handling parsed terminal actions.
pub trait Perform {
fn print(&mut self, ch: char);
fn execute(&mut self, byte: u8);
fn csi_dispatch(
&mut self,
params: &[u16],
intermediates: &[u8],
ignore: bool,
action: char,
);
fn esc_dispatch(&mut self, intermediates: &[u8], ignore: bool, byte: u8);
fn osc_dispatch(&mut self, params: &[&[u8]]);
fn dcs_hook(
&mut self,
params: &[u16],
intermediates: &[u8],
ignore: bool,
action: char,
);
fn dcs_put(&mut self, byte: u8);
fn dcs_unhook(&mut self);
}
impl Parser {
pub fn new() -> Self {
Self::default()
}
#[inline]
pub fn advance<P: Perform>(&mut self, performer: &mut P, bytes: &[u8]) {
let mut i = 0;
while i < bytes.len() {
if matches!(self.state, State::Ground) {
match memchr(0x1B, &bytes[i..]) {
Some(offset) => {
if offset > 0 {
self.print_bulk(performer, &bytes[i..i + offset]);
}
i += offset;
self.state = State::Escape;
i += 1;
continue;
}
None => {
self.print_bulk(performer, &bytes[i..]);
break;
}
}
}
i += self.advance_one(performer, bytes[i]);
}
}
#[inline(always)]
fn print_bulk<P: Perform>(&mut self, performer: &mut P, text: &[u8]) {
let mut i = 0;
while i < text.len() {
let b = text[i];
if b.is_ascii() {
if b < 0x20 {
performer.execute(b);
} else {
performer.print(b as char);
}
i += 1;
} else {
let len = utf8_len(b);
if i + len <= text.len() {
if let Some(ch) = decode_utf8(&text[i..i + len]) {
performer.print(ch);
} else {
performer.print('\u{FFFD}');
}
i += len;
} else {
self.state = State::Utf8((len - 1) as u8, utf8_acc(b));
return;
}
}
}
}
#[inline(always)]
fn advance_one<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize {
match self.state {
State::Ground => self.state_ground(performer, byte),
State::Utf8(rem, acc) => self.state_utf8(performer, byte, rem, acc),
State::Escape => self.state_escape(performer, byte),
State::EscapeIntermediate => self.state_escape_intermediate(performer, byte),
State::CsiEntry => self.state_csi_entry(performer, byte),
State::CsiParam => self.state_csi_param(performer, byte),
State::CsiIntermediate => self.state_csi_intermediate(performer, byte),
State::CsiIgnore => self.state_csi_ignore(performer, byte),
State::OscString => self.state_osc_string(performer, byte),
State::DcsEntry => self.state_dcs_entry(performer, byte),
State::DcsParam => self.state_dcs_param(performer, byte),
State::DcsIntermediate => self.state_dcs_intermediate(performer, byte),
State::DcsPassthrough => self.state_dcs_passthrough(performer, byte),
State::DcsIgnore => self.state_dcs_ignore(performer, byte),
State::SosPmApcString => self.state_sos_pm_apc_string(performer, byte),
}
}
#[inline(always)]
fn state_ground<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize {
if byte.is_ascii_control() {
if byte != 0x1B {
performer.execute(byte);
}
return 1;
}
if byte.is_ascii() {
performer.print(byte as char);
return 1;
}
let len = utf8_len(byte);
let acc = utf8_acc(byte);
if len == 1 {
performer.print('\u{FFFD}');
return 1;
}
self.state = State::Utf8((len - 1) as u8, acc);
1
}
#[inline(always)]
fn state_utf8<P: Perform>(&mut self, performer: &mut P, byte: u8, rem: u8, acc: u32) -> usize {
let acc = (acc << 6) | (byte as u32 & 0x3F);
let rem = rem - 1;
if rem == 0 {
if let Some(ch) = char::from_u32(acc) {
performer.print(ch);
} else {
performer.print('\u{FFFD}');
}
self.state = State::Ground;
} else {
self.state = State::Utf8(rem, acc);
}
1
}
#[inline(always)]
fn state_escape<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize {
match byte {
0x5B => { self.state = State::CsiEntry; self.clear_params(); }
0x5D => { self.state = State::OscString; self.osc_raw.clear(); self.osc_params.clear(); }
0x50 => { self.state = State::DcsEntry; self.clear_params(); }
0x58 | 0x5E | 0x5F => { self.state = State::SosPmApcString; }
0x20..=0x2F => { self.intermediates.push(byte); self.state = State::EscapeIntermediate; }
0x30..=0x7E => {
performer.esc_dispatch(&self.intermediates, self.ignoring, byte);
self.reset();
}
_ => { self.reset(); }
}
1
}
#[inline(always)]
fn state_escape_intermediate<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize {
match byte {
0x20..=0x2F => self.intermediates.push(byte),
0x30..=0x7E => {
performer.esc_dispatch(&self.intermediates, self.ignoring, byte);
self.reset();
}
_ => self.reset(),
}
1
}
#[inline(always)]
fn state_csi_entry<P: Perform>(&mut self, _performer: &mut P, byte: u8) -> usize {
match byte {
0x30..=0x39 | 0x3B => { self.state = State::CsiParam; self.process_csi_byte(byte); }
0x3A => { self.ignoring = true; self.state = State::CsiParam; }
0x3C..=0x3F => { self.intermediates.push(byte); self.state = State::CsiParam; }
0x20..=0x2F => { self.intermediates.push(byte); self.state = State::CsiIntermediate; }
0x40..=0x7E => { self.dispatch_csi(_performer, byte); }
_ => { self.state = State::CsiIgnore; }
}
1
}
#[inline(always)]
fn state_csi_param<P: Perform>(&mut self, _performer: &mut P, byte: u8) -> usize {
match byte {
0x30..=0x39 | 0x3B => self.process_csi_byte(byte),
0x3A => self.ignoring = true,
0x20..=0x2F => { self.intermediates.push(byte); self.state = State::CsiIntermediate; }
0x40..=0x7E => { self.dispatch_csi(_performer, byte); }
_ => { self.state = State::CsiIgnore; }
}
1
}
#[inline(always)]
fn state_csi_intermediate<P: Perform>(&mut self, _performer: &mut P, byte: u8) -> usize {
match byte {
0x20..=0x2F => self.intermediates.push(byte),
0x40..=0x7E => { self.dispatch_csi(_performer, byte); }
_ => { self.state = State::CsiIgnore; }
}
1
}
#[inline(always)]
fn state_csi_ignore<P: Perform>(&mut self, _performer: &mut P, byte: u8) -> usize {
if (0x40..=0x7E).contains(&byte) {
self.reset();
}
1
}
#[inline(always)]
fn process_csi_byte(&mut self, byte: u8) {
if byte == 0x3B {
self.push_param();
} else {
let digit = (byte - 0x30) as u16;
self.param = self.param.saturating_mul(10).saturating_add(digit);
}
}
#[inline(always)]
fn dispatch_csi<P: Perform>(&mut self, performer: &mut P, byte: u8) {
self.push_param();
let params: ArrayVec<u16, MAX_PARAMS> = self.params.clone();
let intermediates: ArrayVec<u8, MAX_INTERMEDIATES> = self.intermediates.clone();
performer.csi_dispatch(&params, &intermediates, self.ignoring, byte as char);
self.reset();
}
#[inline(always)]
fn state_osc_string<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize {
match byte {
0x07 => { self.dispatch_osc(performer); self.reset(); }
0x1B => { self.dispatch_osc(performer); self.reset(); }
0x9C => { self.dispatch_osc(performer); self.reset(); }
b => {
if self.osc_raw.len() < MAX_OSC_LEN {
self.osc_raw.push(b);
}
}
}
1
}
#[inline(always)]
fn dispatch_osc<P: Perform>(&mut self, performer: &mut P) {
if self.osc_raw.is_empty() {
return;
}
let mut params: Vec<&[u8]> = Vec::new();
let mut start = 0;
for (i, &b) in self.osc_raw.iter().enumerate() {
if b == 0x3B {
params.push(&self.osc_raw[start..i]);
start = i + 1;
}
}
if start < self.osc_raw.len() {
params.push(&self.osc_raw[start..]);
}
performer.osc_dispatch(&params);
}
#[inline(always)]
fn state_dcs_entry<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize {
match byte {
0x30..=0x39 | 0x3B | 0x3C..=0x3F => { self.state = State::DcsParam; self.process_csi_byte(byte); }
0x20..=0x2F => { self.intermediates.push(byte); self.state = State::DcsIntermediate; }
0x40..=0x7E => {
self.state = State::DcsPassthrough;
self.dcs_hook(performer, byte);
}
_ => { self.state = State::DcsIgnore; }
}
1
}
#[inline(always)]
fn state_dcs_param<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize {
match byte {
0x30..=0x39 | 0x3B => self.process_csi_byte(byte),
0x20..=0x2F => { self.intermediates.push(byte); self.state = State::DcsIntermediate; }
0x40..=0x7E => {
self.state = State::DcsPassthrough;
self.dcs_hook(performer, byte);
}
_ => { self.state = State::DcsIgnore; }
}
1
}
#[inline(always)]
fn state_dcs_intermediate<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize {
match byte {
0x20..=0x2F => self.intermediates.push(byte),
0x40..=0x7E => {
self.state = State::DcsPassthrough;
self.dcs_hook(performer, byte);
}
_ => { self.state = State::DcsIgnore; }
}
1
}
#[inline(always)]
fn state_dcs_passthrough<P: Perform>(&mut self, performer: &mut P, byte: u8) -> usize {
match byte {
0x1B => {}
0x9C => {
performer.dcs_unhook();
self.reset();
}
b => {
if self.dcs_raw.len() < MAX_DCS_LEN {
self.dcs_raw.push(b);
performer.dcs_put(b);
}
}
}
1
}
#[inline(always)]
fn state_dcs_ignore<P: Perform>(&mut self, _performer: &mut P, byte: u8) -> usize {
if byte == 0x9C || byte == 0x1B {
self.reset();
}
1
}
#[inline(always)]
fn dcs_hook<P: Perform>(&mut self, performer: &mut P, byte: u8) {
self.push_param();
let params: ArrayVec<u16, MAX_PARAMS> = self.params.clone();
let intermediates: ArrayVec<u8, MAX_INTERMEDIATES> = self.intermediates.clone();
performer.dcs_hook(&params, &intermediates, self.ignoring, byte as char);
}
#[inline(always)]
fn state_sos_pm_apc_string<P: Perform>(&mut self, _performer: &mut P, byte: u8) -> usize {
if byte == 0x9C || byte == 0x1B {
self.reset();
}
1
}
#[inline(always)]
fn clear_params(&mut self) {
self.params.clear();
self.param = 0;
}
#[inline(always)]
fn push_param(&mut self) {
if self.params.len() < MAX_PARAMS {
self.params.push(self.param);
}
self.param = 0;
}
#[inline(always)]
fn reset(&mut self) {
self.state = State::Ground;
self.clear_params();
self.intermediates.clear();
self.ignoring = false;
}
}
#[inline(always)]
fn utf8_len(byte: u8) -> usize {
match byte.leading_ones() {
0 => 1,
2 => 2,
3 => 3,
4 => 4,
_ => 1,
}
}
#[inline(always)]
fn utf8_acc(byte: u8) -> u32 {
match byte.leading_ones() {
0 => byte as u32,
2 => (byte & 0x1F) as u32,
3 => (byte & 0x0F) as u32,
4 => (byte & 0x07) as u32,
_ => byte as u32,
}
}
#[inline(always)]
fn decode_utf8(bytes: &[u8]) -> Option<char> {
let first = bytes[0];
let len = utf8_len(first);
if bytes.len() < len {
return None;
}
let mut acc = utf8_acc(first);
for i in 1..len {
acc = (acc << 6) | ((bytes[i] & 0x3F) as u32);
}
char::from_u32(acc)
}