diff options
| author | SavagePeanut <sourcehut@lazytapir.com> | 2023-08-02 17:45:21 -0500 |
|---|---|---|
| committer | SavagePeanut <sourcehut@lazytapir.com> | 2023-08-02 17:45:21 -0500 |
| commit | eb2942a595b5f6f1e7eed711659ceba8821be3a3 (patch) | |
| tree | 995f4ef718c1a2b9623f83f4a3ec20a920abff5e /src | |
| parent | db5bd2e6e42bed5204788f006d241b618671b94b (diff) | |
fix code blocks
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib.rs | 198 |
1 files changed, 135 insertions, 63 deletions
@@ -2,10 +2,9 @@ use std::collections::HashMap; use pyo3::prelude::*; -const KEYWORDS: [char; 5] = ['*', '_', '~', '`', '|']; +const KEYWORDS: [char; 4] = ['*', '_', '~', '`']; const NO_SUB_PARSING_KEYWORDS: [char; 1] = ['`']; const QUOTE_KEYWORDS: [char; 1] = ['>']; -const BLOCK_KEYWORDS: [(char, usize); 2] = [('`', 2), ('|', 1)]; const PLACEHOLDER: &str = "\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}"; #[pyfunction] @@ -14,31 +13,32 @@ fn format_body(body: String, new_tags: HashMap<String, (String, String)>) -> PyR if chars.len() < 1 { return Ok(body); } - let styles: Vec<(String, usize, usize)> = parse_with_limits(&chars, 0, chars.len() - 1, 0); + let styles: Vec<(String, usize, usize, usize, usize)> = parse_with_limits(&chars, 0, chars.len() - 1, 0); let parse_quotes = new_tags.contains_key(&">".to_string()); - let mut tags: Vec<(usize, String, String, bool)> = vec![]; + let mut tags: Vec<(usize, String, usize)> = vec![]; for style in styles { - let (keyword, start, end) = style; + let (keyword, start, remove_start, end, remove_end) = style; if new_tags.contains_key(&keyword) { - tags.push((start, keyword.clone(), new_tags.get(&keyword).unwrap().0.clone(), false)); - tags.push((end, keyword.clone(), new_tags.get(&keyword).unwrap().1.clone(), QUOTE_KEYWORDS.contains(&keyword.chars().next().unwrap()))); + let opening_tag = if keyword == "```language" { + new_tags.get(&keyword).unwrap().0.clone() + .replace("{}", &chars[start+3..remove_start-1] + .into_iter() + .collect::<String>()) + } else { + new_tags.get(&keyword).unwrap().0.clone() + }; + tags.push((start, opening_tag, remove_start)); + tags.push((end, new_tags.get(&keyword).unwrap().1.clone(), remove_end)); } else if keyword == ">>" && parse_quotes { - tags.push((start, keyword.clone(), "".to_string(), false)); + tags.push((start, "".to_string(), start+1)); } } tags.sort_by(|a, b| b.0.cmp(&a.0)); for tag in tags { - let (index, keyword, tag, is_end_quote_block) = tag; - let end = if is_end_quote_block { - index - } else if keyword == ">>" { - index + 1 - } else { - index + keyword.len() - }; + let (index, tag, end) = tag; chars = [chars[..index].to_vec(), tag.chars().collect(), chars[end..].to_vec()].concat(); } @@ -51,11 +51,10 @@ fn remove_non_escaped_backslashes(text: String) -> String { tmp_string.replace(PLACEHOLDER, "\\") } -fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize)> { +fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize, usize, usize)> { let mut styles = Vec::new(); let mut index = start; let end = end.min(chars.len() - 1); - println!("parse with limits start {}, end {}", start, end); while index <= end { if preceeded_by_backslash(chars, index, start) { @@ -64,58 +63,83 @@ fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize) } let c = chars[index]; - if c == '|' && !is_char_repeating(chars, c, index, end) { - index += 1; - continue; - } - if QUOTE_KEYWORDS.contains(&c) { if is_quote_start(chars, index, depth) { let to = seek_end_of_quote(chars, index, end, depth); - styles.push((">".to_string(), index, to)); + styles.push((">".to_string(), index, index + 1, to, to)); styles.append(&mut parse_with_limits(chars, index + 1, to, depth + 1)); index = to; continue; } - if depth > 0 { - styles.push((">>".to_string(), index, index + 1)); + if is_nested_quote(chars, index, depth) { + styles.push((">>".to_string(), index, index + 1, index + 1, index + 1)); } index += 1; continue; } - if !preceeded_by_whitespace(chars, index, start) || followed_by_whitespace(chars, index, end) { - index += 1; + if c == '`' && is_char_repeating(chars, c, 2, index + 1, end) { + let end_of_line = seek_end_of_line(chars, index + 1, end); + if end_of_line == end { + index += 3; + continue; + } + match seek_end_block(chars, c, end_of_line, end, depth) { + Some(to) => { + if to != index + 3 && is_quote_start(chars, index, depth) { + let keyword = if end_of_line == index + 3 { + "```".to_string() + } else { + "```language".to_string() + }; + let remove_end = if depth > 0 && to == end { + to + } else { + to + 4 + depth + }; + styles.push((keyword, index, end_of_line + 1, to, remove_end)); + styles.append(&mut parse_quotes_in_code_block(chars, index + 3, to, depth)); + } + index = to + 3; + continue; + } + None => () + } + index += 3; continue; } - if !KEYWORDS.contains(&c) { + if !preceeded_by_whitespace(chars, index, start) || followed_by_whitespace(chars, index, end) { index += 1; continue; } - if BLOCK_KEYWORDS.iter().any(|&(k, _)| k == c) && is_char_repeating(chars, c, index, end) { - let block_indicator_size = get_block_indicator_size(c); - match seek_end_block(chars, c, index + block_indicator_size + 1, end) { + if c == '|' && is_char_repeating(chars, c, 1, index + 1, end) { + match seek_end(chars, c, index + 2, 1, end) { Some(to) => { - if to != index + block_indicator_size * 2 - 1 { - let keyword = c.to_string().repeat(block_indicator_size+1); - styles.push((keyword, index, to)); - if !NO_SUB_PARSING_KEYWORDS.contains(&c) { - styles.append(&mut parse_with_limits(chars, index + block_indicator_size + 1, to - 1, depth)); - } + if to != index + 2 { + let keyword = "||".to_string(); + styles.push((keyword, index, index + 2, to, to + 2)); + styles.append(&mut parse_with_limits(chars, index + 2, to - 1, depth)); } - index = to + block_indicator_size; + index = to + 2; continue; } None => () } + index += 2; + continue; } - match seek_end(chars, c, index + 1, end) { + if !KEYWORDS.contains(&c) { + index += 1; + continue; + } + + match seek_end(chars, c, index + 1, 0, end) { Some (to) => { if to != index + 1 { - styles.push((c.to_string(), index, to)); + styles.push((c.to_string(), index, index + 1, to, to + 1)); if !NO_SUB_PARSING_KEYWORDS.contains(&c) { styles.append(&mut parse_with_limits(chars, index + 1, to - 1, depth)); } @@ -129,10 +153,51 @@ fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize) styles } -fn is_char_repeating(chars: &Vec<char>, keyword: char, index: usize, end: usize) -> bool { - let block_indicator_size = get_block_indicator_size(keyword); +fn parse_quotes_in_code_block(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize, usize, usize)> { + let mut quotes = Vec::new(); + let mut index = start; + let end = end.min(chars.len() - 1); + + if depth < 1 { + return quotes; + } + + while index <= end { + let c = chars[index]; + if QUOTE_KEYWORDS.contains(&c) { + if is_nested_quote(chars, index, depth) { + quotes.push((">>".to_string(), index, index + 1, index + 1, index + 1)); + } + index += 1; + continue; + } + index += 1; + } + quotes +} + +fn is_nested_quote(chars: &Vec<char>, start: usize, depth: usize) -> bool { + let mut index = start; + let mut count = 0; - (0..block_indicator_size as usize) + while index > 0 { + if chars[index] == '\n' { + return true; + } + if !QUOTE_KEYWORDS.contains(&chars[index]) { + return false; + } + count += 1; + if count > depth { + return false; + } + index -= 1; + } + true +} + +fn is_char_repeating(chars: &Vec<char>, keyword: char, repetitions: usize, index: usize, end: usize) -> bool { + (0..repetitions as usize) .all(|i| index + i <= end && chars[index + i] == keyword) } @@ -144,7 +209,7 @@ fn followed_by_whitespace(chars: &Vec<char>, index: usize, end: usize) -> bool { index >= end || chars[index + 1].is_whitespace() } -fn seek_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Option<usize> { +fn seek_end(chars: &Vec<char>, keyword: char, start: usize, repetitions: usize, end: usize) -> Option<usize> { for i in start..=end { let c = chars[i]; if c == '\n' { @@ -153,10 +218,11 @@ fn seek_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Optio if c == keyword && !chars[i - 1].is_whitespace() && !preceeded_by_backslash(chars, i, start) + && is_char_repeating(chars, keyword, repetitions, i + 1, end) { - match seek_higher_order_end(chars, c, i + 1, end) { - Some(higher_order_index) => { - return Some(higher_order_index); + match seek_higher_order_end(chars, c, i + 1, repetitions, end) { + Some(higher_order_i) => { + return Some(higher_order_i); } None => { return Some(i); @@ -167,7 +233,7 @@ fn seek_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Optio None } -fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Option<usize> { +fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, repetitions: usize, end: usize) -> Option<usize> { for i in start..=end { let c = chars[i]; if c == '\n' { @@ -177,6 +243,7 @@ fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: us && chars[i - 1].is_whitespace() && !followed_by_whitespace(chars, i, end) && !preceeded_by_backslash(chars, i, start) + && is_char_repeating(chars, keyword, repetitions, i + 1, end) { return None; // "*bold* *<--- beginning of new bold>*" } @@ -184,6 +251,7 @@ fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: us && !chars[i - 1].is_whitespace() && followed_by_whitespace(chars, i, end) && !preceeded_by_backslash(chars, i, start) + && is_char_repeating(chars, keyword, repetitions, i + 1, end) { return Some(i); } @@ -191,6 +259,14 @@ fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: us None } +fn seek_end_of_line(chars: &Vec<char>, start: usize, end: usize) -> usize { + chars[start..=end] + .iter() + .enumerate() + .find(|&(_, &c)| c == '\n') + .map_or(end + 1, |(i, _)| start + i) +} + fn seek_end_of_quote(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> usize { for i in start..=end { if chars[i] == '\n' { @@ -205,16 +281,21 @@ fn seek_end_of_quote(chars: &Vec<char>, start: usize, end: usize, depth: usize) end + 1 } -fn seek_end_block(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Option<usize> { +fn seek_end_block(chars: &Vec<char>, keyword: char, start: usize, end: usize, depth: usize) -> Option<usize> { for i in start..=end { - if chars[i] == keyword - && is_char_repeating(chars, keyword, i + 1, end) - && !preceeded_by_backslash(chars, i, start) + if chars[i] == '\n' + && i + 4 + depth > end + && (depth == 0 || chars[i + 1..i + 1 + depth].iter().all(|&c| QUOTE_KEYWORDS.contains(&c))) + && chars[i + 1 + depth] == keyword + && is_char_repeating(chars, keyword, 2, i + 1 + depth, end) { return Some(i); } } - None + if end == chars.len() - 1 { + return None; + } + Some(end) } fn is_quote_start(chars: &Vec<char>, index: usize, depth: usize) -> bool { @@ -232,15 +313,6 @@ fn preceeded_by_backslash(chars: &Vec<char>, index: usize, start: usize) -> bool num_backslashes % 2 == 1 } -fn get_block_indicator_size(keyword: char) -> usize { - for &(k, v) in BLOCK_KEYWORDS.iter() { - if k == keyword { - return v; - } - } - 1 // shouldn't ever happen -} - #[pymodule] fn slidge_style_parser(_py: Python, m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(format_body, m)?)?; |
