use std::{
borrow::Cow,
iter::{FusedIterator, Peekable},
str::CharIndices,
};
#[derive(Debug, Clone, Copy)]
enum State {
Start,
S1,
S2,
S3,
S4,
S5,
S6,
S7,
S8,
S9,
S10,
S11,
Trap,
}
impl Default for State {
fn default() -> Self {
Self::Start
}
}
impl State {
fn is_final(&self) -> bool {
#[allow(clippy::match_like_matches_macro)]
match self {
Self::S3 | Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S9 | Self::S11 => true,
_ => false,
}
}
fn is_trapped(&self) -> bool {
#[allow(clippy::match_like_matches_macro)]
match self {
Self::Trap => true,
_ => false,
}
}
fn transition(&mut self, c: char) {
*self = match c {
'\u{1b}' | '\u{9b}' => match self {
Self::Start => Self::S1,
_ => Self::Trap,
},
'(' | ')' => match self {
Self::S1 => Self::S2,
Self::S2 | Self::S4 => Self::S4,
_ => Self::Trap,
},
';' => match self {
Self::S1 | Self::S2 | Self::S4 => Self::S4,
Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S10 => Self::S10,
_ => Self::Trap,
},
'[' | '#' | '?' => match self {
Self::S1 | Self::S2 | Self::S4 => Self::S4,
_ => Self::Trap,
},
'0'..='2' => match self {
Self::S1 | Self::S4 => Self::S5,
Self::S2 => Self::S3,
Self::S5 => Self::S6,
Self::S6 => Self::S7,
Self::S7 => Self::S8,
Self::S8 => Self::S9,
Self::S10 => Self::S5,
_ => Self::Trap,
},
'3'..='9' => match self {
Self::S1 | Self::S4 => Self::S5,
Self::S2 => Self::S5,
Self::S5 => Self::S6,
Self::S6 => Self::S7,
Self::S7 => Self::S8,
Self::S8 => Self::S9,
Self::S10 => Self::S5,
_ => Self::Trap,
},
'A'..='P' | 'R' | 'Z' | 'c' | 'f'..='n' | 'q' | 'r' | 'y' | '=' | '>' | '<' => {
match self {
Self::S1
| Self::S2
| Self::S4
| Self::S5
| Self::S6
| Self::S7
| Self::S8
| Self::S10 => Self::S11,
_ => Self::Trap,
}
}
_ => Self::Trap,
};
}
}
#[derive(Debug)]
struct Matches<'a> {
s: &'a str,
it: Peekable<CharIndices<'a>>,
}
impl<'a> Matches<'a> {
fn new(s: &'a str) -> Self {
let it = s.char_indices().peekable();
Self { s, it }
}
}
#[derive(Debug)]
struct Match<'a> {
text: &'a str,
start: usize,
end: usize,
}
impl<'a> Match<'a> {
#[inline]
pub fn as_str(&self) -> &'a str {
&self.text[self.start..self.end]
}
}
impl<'a> Iterator for Matches<'a> {
type Item = Match<'a>;
fn next(&mut self) -> Option<Self::Item> {
find_ansi_code_exclusive(&mut self.it).map(|(start, end)| Match {
text: self.s,
start,
end,
})
}
}
impl<'a> FusedIterator for Matches<'a> {}
fn find_ansi_code_exclusive(it: &mut Peekable<CharIndices>) -> Option<(usize, usize)> {
'outer: loop {
if let (start, '\u{1b}') | (start, '\u{9b}') = it.peek()? {
let start = *start;
let mut state = State::default();
let mut maybe_end = None;
loop {
let item = it.peek();
if let Some((idx, c)) = item {
state.transition(*c);
if state.is_final() {
maybe_end = Some(*idx);
}
}
if state.is_trapped() || item.is_none() {
match maybe_end {
Some(end) => {
return Some((start, end + 1));
}
None => continue 'outer,
}
}
it.next();
}
}
it.next();
}
}
pub fn strip_ansi_codes(s: &str) -> Cow<str> {
let mut char_it = s.char_indices().peekable();
match find_ansi_code_exclusive(&mut char_it) {
Some(_) => {
let stripped: String = AnsiCodeIterator::new(s)
.filter_map(|(text, is_ansi)| if is_ansi { None } else { Some(text) })
.collect();
Cow::Owned(stripped)
}
None => Cow::Borrowed(s),
}
}
pub struct AnsiCodeIterator<'a> {
s: &'a str,
pending_item: Option<(&'a str, bool)>,
last_idx: usize,
cur_idx: usize,
iter: Matches<'a>,
}
impl<'a> AnsiCodeIterator<'a> {
pub fn new(s: &'a str) -> AnsiCodeIterator<'a> {
AnsiCodeIterator {
s,
pending_item: None,
last_idx: 0,
cur_idx: 0,
iter: Matches::new(s),
}
}
pub fn current_slice(&self) -> &str {
&self.s[..self.cur_idx]
}
pub fn rest_slice(&self) -> &str {
&self.s[self.cur_idx..]
}
}
impl<'a> Iterator for AnsiCodeIterator<'a> {
type Item = (&'a str, bool);
fn next(&mut self) -> Option<(&'a str, bool)> {
if let Some(pending_item) = self.pending_item.take() {
self.cur_idx += pending_item.0.len();
Some(pending_item)
} else if let Some(m) = self.iter.next() {
let s = &self.s[self.last_idx..m.start];
self.last_idx = m.end;
if s.is_empty() {
self.cur_idx = m.end;
Some((m.as_str(), true))
} else {
self.cur_idx = m.start;
self.pending_item = Some((m.as_str(), true));
Some((s, false))
}
} else if self.last_idx < self.s.len() {
let rv = &self.s[self.last_idx..];
self.cur_idx = self.s.len();
self.last_idx = self.s.len();
Some((rv, false))
} else {
None
}
}
}
impl<'a> FusedIterator for AnsiCodeIterator<'a> {}
#[cfg(test)]
mod tests {
use super::*;
use lazy_static::lazy_static;
use proptest::prelude::*;
use regex::Regex;
lazy_static! {
static ref STRIP_ANSI_RE: Regex = Regex::new(
r"[\x1b\x9b]([()][012AB]|[\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><])",
)
.unwrap();
}
impl<'a, 'b> PartialEq<Match<'a>> for regex::Match<'b> {
fn eq(&self, other: &Match<'a>) -> bool {
self.start() == other.start && self.end() == other.end
}
}
proptest! {
#[test]
fn dfa_matches_old_regex(s in r"([\x1b\x9b]?.*){0,5}") {
let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect();
let new_matches: Vec<_> = Matches::new(&s).collect();
assert_eq!(old_matches, new_matches);
}
}
#[test]
fn dfa_matches_regex_on_small_strings() {
const POSSIBLE_BYTES: &[u8] = &[b' ', 0x1b, 0x9b, b'(', b'0', b'[', b';', b'3', b'C'];
fn check_all_strings_of_len(len: usize) {
_check_all_strings_of_len(len, &mut Vec::with_capacity(len));
}
fn _check_all_strings_of_len(len: usize, chunk: &mut Vec<u8>) {
if len == 0 {
if let Ok(s) = std::str::from_utf8(chunk) {
let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(s).collect();
let new_matches: Vec<_> = Matches::new(s).collect();
assert_eq!(old_matches, new_matches);
}
return;
}
for b in POSSIBLE_BYTES {
chunk.push(*b);
_check_all_strings_of_len(len - 1, chunk);
chunk.pop();
}
}
for str_len in 0..=6 {
check_all_strings_of_len(str_len);
}
}
#[test]
fn complex_data() {
let s = std::fs::read_to_string(
std::path::Path::new("tests")
.join("data")
.join("sample_zellij_session.log"),
)
.unwrap();
let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect();
let new_matches: Vec<_> = Matches::new(&s).collect();
assert_eq!(old_matches, new_matches);
}
#[test]
fn state_machine() {
let ansi_code = "\x1b)B";
let mut state = State::default();
assert!(!state.is_final());
for c in ansi_code.chars() {
state.transition(c);
}
assert!(state.is_final());
state.transition('A');
assert!(state.is_trapped());
}
#[test]
fn back_to_back_entry_char() {
let s = "\x1b\x1bf";
let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
assert_eq!(&["\x1bf"], matches.as_slice());
}
#[test]
fn early_paren_can_use_many_chars() {
let s = "\x1b(C";
let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
assert_eq!(&[s], matches.as_slice());
}
#[test]
fn long_run_of_digits() {
let s = "\u{1b}00000";
let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
assert_eq!(&[s], matches.as_slice());
}
#[test]
fn test_ansi_iter_re_vt100() {
let s = "\x1b(0lpq\x1b)Benglish";
let mut iter = AnsiCodeIterator::new(s);
assert_eq!(iter.next(), Some(("\x1b(0", true)));
assert_eq!(iter.next(), Some(("lpq", false)));
assert_eq!(iter.next(), Some(("\x1b)B", true)));
assert_eq!(iter.next(), Some(("english", false)));
}
#[test]
fn test_ansi_iter_re() {
use crate::style;
let s = format!("Hello {}!", style("World").red().force_styling(true));
let mut iter = AnsiCodeIterator::new(&s);
assert_eq!(iter.next(), Some(("Hello ", false)));
assert_eq!(iter.current_slice(), "Hello ");
assert_eq!(iter.rest_slice(), "\x1b[31mWorld\x1b[0m!");
assert_eq!(iter.next(), Some(("\x1b[31m", true)));
assert_eq!(iter.current_slice(), "Hello \x1b[31m");
assert_eq!(iter.rest_slice(), "World\x1b[0m!");
assert_eq!(iter.next(), Some(("World", false)));
assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld");
assert_eq!(iter.rest_slice(), "\x1b[0m!");
assert_eq!(iter.next(), Some(("\x1b[0m", true)));
assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld\x1b[0m");
assert_eq!(iter.rest_slice(), "!");
assert_eq!(iter.next(), Some(("!", false)));
assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld\x1b[0m!");
assert_eq!(iter.rest_slice(), "");
assert_eq!(iter.next(), None);
}
#[test]
fn test_ansi_iter_re_on_multi() {
use crate::style;
let s = format!("{}", style("a").red().bold().force_styling(true));
let mut iter = AnsiCodeIterator::new(&s);
assert_eq!(iter.next(), Some(("\x1b[31m", true)));
assert_eq!(iter.current_slice(), "\x1b[31m");
assert_eq!(iter.rest_slice(), "\x1b[1ma\x1b[0m");
assert_eq!(iter.next(), Some(("\x1b[1m", true)));
assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1m");
assert_eq!(iter.rest_slice(), "a\x1b[0m");
assert_eq!(iter.next(), Some(("a", false)));
assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1ma");
assert_eq!(iter.rest_slice(), "\x1b[0m");
assert_eq!(iter.next(), Some(("\x1b[0m", true)));
assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1ma\x1b[0m");
assert_eq!(iter.rest_slice(), "");
assert_eq!(iter.next(), None);
}
}