fix code blocks

author: SavagePeanut <sourcehut@lazytapir.com> 2023-08-02 17:45:21 -0500
committer: SavagePeanut <sourcehut@lazytapir.com> 2023-08-02 17:45:21 -0500
commit: eb2942a595b5f6f1e7eed711659ceba8821be3a3 (patch)
tree: 995f4ef718c1a2b9623f83f4a3ec20a920abff5e /src
parent: db5bd2e6e42bed5204788f006d241b618671b94b (diff)
1 files changed, 135 insertions, 63 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 532fec7..88b040d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,10 +2,9 @@ use std::collections::HashMap;
 
 use pyo3::prelude::*;
 
-const KEYWORDS: [char; 5] = ['*', '_', '~', '`', '|'];
+const KEYWORDS: [char; 4] = ['*', '_', '~', '`'];
 const NO_SUB_PARSING_KEYWORDS: [char; 1] = ['`'];
 const QUOTE_KEYWORDS: [char; 1] = ['>'];
-const BLOCK_KEYWORDS: [(char, usize); 2] = [('`', 2), ('|', 1)];
 const PLACEHOLDER: &str = "\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}";
 
 #[pyfunction]
@@ -14,31 +13,32 @@ fn format_body(body: String, new_tags: HashMap<String, (String, String)>) -> PyR
     if chars.len() < 1 {
         return Ok(body);
     }
-    let styles: Vec<(String, usize, usize)> = parse_with_limits(&chars, 0, chars.len() - 1, 0);
+    let styles: Vec<(String, usize, usize, usize, usize)> = parse_with_limits(&chars, 0, chars.len() - 1, 0);
     let parse_quotes = new_tags.contains_key(&">".to_string());
 
-    let mut tags: Vec<(usize, String, String, bool)> = vec![];
+    let mut tags: Vec<(usize, String, usize)> = vec![];
     for style in styles {
-        let (keyword, start, end) = style;
+        let (keyword, start, remove_start, end, remove_end) = style;
         if new_tags.contains_key(&keyword) {
-            tags.push((start, keyword.clone(), new_tags.get(&keyword).unwrap().0.clone(), false));
-            tags.push((end, keyword.clone(), new_tags.get(&keyword).unwrap().1.clone(), QUOTE_KEYWORDS.contains(&keyword.chars().next().unwrap())));
+            let opening_tag = if keyword == "```language" {
+                new_tags.get(&keyword).unwrap().0.clone()
+                .replace("{}", &chars[start+3..remove_start-1]
+                .into_iter()
+                .collect::<String>())
+            } else {
+                new_tags.get(&keyword).unwrap().0.clone()
+            };
+            tags.push((start, opening_tag, remove_start));
+            tags.push((end, new_tags.get(&keyword).unwrap().1.clone(), remove_end));
         } else if keyword == ">>" && parse_quotes {
-            tags.push((start, keyword.clone(), "".to_string(), false));
+            tags.push((start, "".to_string(), start+1));
         }
     }
 
     tags.sort_by(|a, b| b.0.cmp(&a.0));
 
     for tag in tags {
-        let (index, keyword, tag, is_end_quote_block) = tag;
-        let end = if is_end_quote_block {
-            index
-        } else if keyword == ">>" {
-            index + 1
-        } else {
-            index + keyword.len()
-        };
+        let (index, tag, end) = tag;
         chars = [chars[..index].to_vec(), tag.chars().collect(), chars[end..].to_vec()].concat();
     }
 
@@ -51,11 +51,10 @@ fn remove_non_escaped_backslashes(text: String) -> String {
     tmp_string.replace(PLACEHOLDER, "\\")
 }
 
-fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize)> {
+fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize, usize, usize)> {
     let mut styles = Vec::new();
     let mut index = start;
     let end = end.min(chars.len() - 1);
-    println!("parse with limits start {}, end {}", start, end);
 
     while index <= end {
         if preceeded_by_backslash(chars, index, start) {
@@ -64,58 +63,83 @@ fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize)
         }
 
         let c = chars[index];
-        if c == '|' && !is_char_repeating(chars, c, index, end) {
-            index += 1;
-            continue;
-        }
-
         if QUOTE_KEYWORDS.contains(&c) {
             if is_quote_start(chars, index, depth) {
                 let to = seek_end_of_quote(chars, index, end, depth);
-                styles.push((">".to_string(), index, to));
+                styles.push((">".to_string(), index, index + 1, to, to));
                 styles.append(&mut parse_with_limits(chars, index + 1, to, depth + 1));
                 index = to;
                 continue;
             }
-            if depth > 0 {
-                styles.push((">>".to_string(), index, index + 1));
+            if is_nested_quote(chars, index, depth) {
+                styles.push((">>".to_string(), index, index + 1, index + 1, index + 1));
             }
             index += 1;
             continue;
         }
 
-        if !preceeded_by_whitespace(chars, index, start) || followed_by_whitespace(chars, index, end) {
-            index += 1;
+        if c == '`' && is_char_repeating(chars, c, 2, index + 1, end) {
+            let end_of_line = seek_end_of_line(chars, index + 1, end);
+            if end_of_line == end {
+                index += 3;
+                continue;
+            }
+            match seek_end_block(chars, c, end_of_line, end, depth) {
+                Some(to) => {
+                    if to != index + 3 && is_quote_start(chars, index, depth) {
+                        let keyword = if end_of_line == index + 3 {
+                            "```".to_string()
+                        } else {
+                            "```language".to_string()
+                        };
+                        let remove_end = if depth > 0 && to == end {
+                            to
+                        } else {
+                            to + 4 + depth
+                        };
+                        styles.push((keyword, index, end_of_line + 1, to, remove_end));
+                        styles.append(&mut parse_quotes_in_code_block(chars, index + 3, to, depth));
+                    }
+                    index = to + 3;
+                    continue;
+                }
+                None => ()
+            }
+            index += 3;
             continue;
         }
 
-        if !KEYWORDS.contains(&c) {
+        if !preceeded_by_whitespace(chars, index, start) || followed_by_whitespace(chars, index, end) {
             index += 1;
             continue;
         }
 
-        if BLOCK_KEYWORDS.iter().any(|&(k, _)| k == c) && is_char_repeating(chars, c, index, end) {
-            let block_indicator_size = get_block_indicator_size(c);
-            match seek_end_block(chars, c, index + block_indicator_size + 1, end) {
+        if c == '|' && is_char_repeating(chars, c, 1, index + 1, end) {
+            match seek_end(chars, c, index + 2, 1, end) {
                 Some(to) => {
-                    if to != index + block_indicator_size * 2 - 1 {
-                        let keyword = c.to_string().repeat(block_indicator_size+1);
-                        styles.push((keyword, index, to));
-                        if !NO_SUB_PARSING_KEYWORDS.contains(&c) {
-                            styles.append(&mut parse_with_limits(chars, index + block_indicator_size + 1, to - 1, depth));
-                        }
+                    if to != index + 2 {
+                        let keyword = "||".to_string();
+                        styles.push((keyword, index, index + 2, to, to + 2));
+                        styles.append(&mut parse_with_limits(chars, index + 2, to - 1, depth));
                     }
-                    index = to + block_indicator_size;
+                    index = to + 2;
                     continue;
                 }
                 None => ()
             }
+            index += 2;
+            continue;
         }
 
-        match seek_end(chars, c, index + 1, end) {
+        if !KEYWORDS.contains(&c) {
+            index += 1;
+            continue;
+        }
+
+        match seek_end(chars, c, index + 1, 0, end) {
             Some (to) => {
                 if to != index + 1 {
-                    styles.push((c.to_string(), index, to));
+                    styles.push((c.to_string(), index, index + 1, to, to + 1));
                     if !NO_SUB_PARSING_KEYWORDS.contains(&c) {
                         styles.append(&mut parse_with_limits(chars, index + 1, to - 1, depth));
                     }
@@ -129,10 +153,51 @@ fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize)
     styles
 }
 
-fn is_char_repeating(chars: &Vec<char>, keyword: char, index: usize, end: usize) -> bool {
-    let block_indicator_size = get_block_indicator_size(keyword);
+fn parse_quotes_in_code_block(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize, usize, usize)> {
+    let mut quotes = Vec::new();
+    let mut index = start;
+    let end = end.min(chars.len() - 1);
+
+    if depth < 1 {
+        return quotes;
+    }
+
+    while index <= end {
+        let c = chars[index];
+        if QUOTE_KEYWORDS.contains(&c) {
+            if is_nested_quote(chars, index, depth) {
+                quotes.push((">>".to_string(), index, index + 1, index + 1, index + 1));
+            }
+            index += 1;
+            continue;
+        }
+        index += 1;
+    }
+    quotes
+}
+
+fn is_nested_quote(chars: &Vec<char>, start: usize, depth: usize) -> bool {
+    let mut index = start;
+    let mut count = 0;
 
-    (0..block_indicator_size as usize)
+    while index > 0 {
+        if chars[index] == '\n' {
+            return true;
+        }
+        if !QUOTE_KEYWORDS.contains(&chars[index]) {
+            return false;
+        }
+        count += 1;
+        if count > depth {
+            return false;
+        }
+        index -= 1;
+    }
+    true
+}
+
+fn is_char_repeating(chars: &Vec<char>, keyword: char, repetitions: usize, index: usize, end: usize) -> bool {
+    (0..repetitions as usize)
         .all(|i| index + i <= end && chars[index + i] == keyword)
 }
 
@@ -144,7 +209,7 @@ fn followed_by_whitespace(chars: &Vec<char>, index: usize, end: usize) -> bool {
     index >= end || chars[index + 1].is_whitespace()
 }
 
-fn seek_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Option<usize> {
+fn seek_end(chars: &Vec<char>, keyword: char, start: usize, repetitions: usize, end: usize) -> Option<usize> {
     for i in start..=end {
         let c = chars[i];
         if c == '\n' {
@@ -153,10 +218,11 @@ fn seek_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Optio
         if c == keyword
             && !chars[i - 1].is_whitespace()
             && !preceeded_by_backslash(chars, i, start)
+            && is_char_repeating(chars, keyword, repetitions, i + 1, end)
         {
-            match seek_higher_order_end(chars, c, i + 1, end) {
-                Some(higher_order_index) => {
-                    return Some(higher_order_index);
+            match seek_higher_order_end(chars, c, i + 1, repetitions, end) {
+                Some(higher_order_i) => {
+                    return Some(higher_order_i);
                 }
                 None => {
                     return Some(i);
@@ -167,7 +233,7 @@ fn seek_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Optio
     None
 }
 
-fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Option<usize> {
+fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, repetitions: usize, end: usize) -> Option<usize> {
     for i in start..=end {
         let c = chars[i];
         if c == '\n' {
@@ -177,6 +243,7 @@ fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: us
             && chars[i - 1].is_whitespace()
             && !followed_by_whitespace(chars, i, end)
             && !preceeded_by_backslash(chars, i, start)
+            && is_char_repeating(chars, keyword, repetitions, i + 1, end)
         {
             return None; // "*bold* *<--- beginning of new bold>*"
         }
@@ -184,6 +251,7 @@ fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: us
             && !chars[i - 1].is_whitespace()
             && followed_by_whitespace(chars, i, end)
             && !preceeded_by_backslash(chars, i, start)
+            && is_char_repeating(chars, keyword, repetitions, i + 1, end)
         {
             return Some(i);
         }
@@ -191,6 +259,14 @@ fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: us
     None
 }
 
+fn seek_end_of_line(chars: &Vec<char>, start: usize, end: usize) -> usize {
+    chars[start..=end]
+        .iter()
+        .enumerate()
+        .find(|&(_, &c)| c == '\n')
+        .map_or(end + 1, |(i, _)| start + i)
+}
+
 fn seek_end_of_quote(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> usize {
     for i in start..=end {
         if chars[i] == '\n' {
@@ -205,16 +281,21 @@ fn seek_end_of_quote(chars: &Vec<char>, start: usize, end: usize, depth: usize)
     end + 1
 }
 
-fn seek_end_block(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Option<usize> {
+fn seek_end_block(chars: &Vec<char>, keyword: char, start: usize, end: usize, depth: usize) -> Option<usize> {
     for i in start..=end {
-        if chars[i] == keyword
-            && is_char_repeating(chars, keyword, i + 1, end)
-            && !preceeded_by_backslash(chars, i, start)
+        if chars[i] == '\n'
+            && i + 4 + depth > end
+            && (depth == 0 || chars[i + 1..i + 1 + depth].iter().all(|&c| QUOTE_KEYWORDS.contains(&c)))
+            && chars[i + 1 + depth] == keyword
+            && is_char_repeating(chars, keyword, 2, i + 1 + depth, end)
         {
             return Some(i);
         }
     }
-    None
+    if end == chars.len() - 1 {
+        return None;
+    }
+    Some(end)
 }
 
 fn is_quote_start(chars: &Vec<char>, index: usize, depth: usize) -> bool {
@@ -232,15 +313,6 @@ fn preceeded_by_backslash(chars: &Vec<char>, index: usize, start: usize) -> bool
     num_backslashes % 2 == 1
 }
 
-fn get_block_indicator_size(keyword: char) -> usize {
-    for &(k, v) in BLOCK_KEYWORDS.iter() {
-        if k == keyword {
-            return v;
-        }
-    }
-    1 // shouldn't ever happen
-}
-
 #[pymodule]
 fn slidge_style_parser(_py: Python, m: &PyModule) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(format_body, m)?)?;
author	SavagePeanut <sourcehut@lazytapir.com>	2023-08-02 17:45:21 -0500
committer	SavagePeanut <sourcehut@lazytapir.com>	2023-08-02 17:45:21 -0500
commit	eb2942a595b5f6f1e7eed711659ceba8821be3a3 (patch)
tree	995f4ef718c1a2b9623f83f4a3ec20a920abff5e /src
parent	db5bd2e6e42bed5204788f006d241b618671b94b (diff)