summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSavagePeanut <sourcehut@lazytapir.com>2023-08-02 17:45:21 -0500
committerSavagePeanut <sourcehut@lazytapir.com>2023-08-02 17:45:21 -0500
commiteb2942a595b5f6f1e7eed711659ceba8821be3a3 (patch)
tree995f4ef718c1a2b9623f83f4a3ec20a920abff5e /src
parentdb5bd2e6e42bed5204788f006d241b618671b94b (diff)
fix code blocks
Diffstat (limited to 'src')
-rw-r--r--src/lib.rs198
1 files changed, 135 insertions, 63 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 532fec7..88b040d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,10 +2,9 @@ use std::collections::HashMap;
use pyo3::prelude::*;
-const KEYWORDS: [char; 5] = ['*', '_', '~', '`', '|'];
+const KEYWORDS: [char; 4] = ['*', '_', '~', '`'];
const NO_SUB_PARSING_KEYWORDS: [char; 1] = ['`'];
const QUOTE_KEYWORDS: [char; 1] = ['>'];
-const BLOCK_KEYWORDS: [(char, usize); 2] = [('`', 2), ('|', 1)];
const PLACEHOLDER: &str = "\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}";
#[pyfunction]
@@ -14,31 +13,32 @@ fn format_body(body: String, new_tags: HashMap<String, (String, String)>) -> PyR
if chars.len() < 1 {
return Ok(body);
}
- let styles: Vec<(String, usize, usize)> = parse_with_limits(&chars, 0, chars.len() - 1, 0);
+ let styles: Vec<(String, usize, usize, usize, usize)> = parse_with_limits(&chars, 0, chars.len() - 1, 0);
let parse_quotes = new_tags.contains_key(&">".to_string());
- let mut tags: Vec<(usize, String, String, bool)> = vec![];
+ let mut tags: Vec<(usize, String, usize)> = vec![];
for style in styles {
- let (keyword, start, end) = style;
+ let (keyword, start, remove_start, end, remove_end) = style;
if new_tags.contains_key(&keyword) {
- tags.push((start, keyword.clone(), new_tags.get(&keyword).unwrap().0.clone(), false));
- tags.push((end, keyword.clone(), new_tags.get(&keyword).unwrap().1.clone(), QUOTE_KEYWORDS.contains(&keyword.chars().next().unwrap())));
+ let opening_tag = if keyword == "```language" {
+ new_tags.get(&keyword).unwrap().0.clone()
+ .replace("{}", &chars[start+3..remove_start-1]
+ .into_iter()
+ .collect::<String>())
+ } else {
+ new_tags.get(&keyword).unwrap().0.clone()
+ };
+ tags.push((start, opening_tag, remove_start));
+ tags.push((end, new_tags.get(&keyword).unwrap().1.clone(), remove_end));
} else if keyword == ">>" && parse_quotes {
- tags.push((start, keyword.clone(), "".to_string(), false));
+ tags.push((start, "".to_string(), start+1));
}
}
tags.sort_by(|a, b| b.0.cmp(&a.0));
for tag in tags {
- let (index, keyword, tag, is_end_quote_block) = tag;
- let end = if is_end_quote_block {
- index
- } else if keyword == ">>" {
- index + 1
- } else {
- index + keyword.len()
- };
+ let (index, tag, end) = tag;
chars = [chars[..index].to_vec(), tag.chars().collect(), chars[end..].to_vec()].concat();
}
@@ -51,11 +51,10 @@ fn remove_non_escaped_backslashes(text: String) -> String {
tmp_string.replace(PLACEHOLDER, "\\")
}
-fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize)> {
+fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize, usize, usize)> {
let mut styles = Vec::new();
let mut index = start;
let end = end.min(chars.len() - 1);
- println!("parse with limits start {}, end {}", start, end);
while index <= end {
if preceeded_by_backslash(chars, index, start) {
@@ -64,58 +63,83 @@ fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize)
}
let c = chars[index];
- if c == '|' && !is_char_repeating(chars, c, index, end) {
- index += 1;
- continue;
- }
-
if QUOTE_KEYWORDS.contains(&c) {
if is_quote_start(chars, index, depth) {
let to = seek_end_of_quote(chars, index, end, depth);
- styles.push((">".to_string(), index, to));
+ styles.push((">".to_string(), index, index + 1, to, to));
styles.append(&mut parse_with_limits(chars, index + 1, to, depth + 1));
index = to;
continue;
}
- if depth > 0 {
- styles.push((">>".to_string(), index, index + 1));
+ if is_nested_quote(chars, index, depth) {
+ styles.push((">>".to_string(), index, index + 1, index + 1, index + 1));
}
index += 1;
continue;
}
- if !preceeded_by_whitespace(chars, index, start) || followed_by_whitespace(chars, index, end) {
- index += 1;
+ if c == '`' && is_char_repeating(chars, c, 2, index + 1, end) {
+ let end_of_line = seek_end_of_line(chars, index + 1, end);
+ if end_of_line == end {
+ index += 3;
+ continue;
+ }
+ match seek_end_block(chars, c, end_of_line, end, depth) {
+ Some(to) => {
+ if to != index + 3 && is_quote_start(chars, index, depth) {
+ let keyword = if end_of_line == index + 3 {
+ "```".to_string()
+ } else {
+ "```language".to_string()
+ };
+ let remove_end = if depth > 0 && to == end {
+ to
+ } else {
+ to + 4 + depth
+ };
+ styles.push((keyword, index, end_of_line + 1, to, remove_end));
+ styles.append(&mut parse_quotes_in_code_block(chars, index + 3, to, depth));
+ }
+ index = to + 3;
+ continue;
+ }
+ None => ()
+ }
+ index += 3;
continue;
}
- if !KEYWORDS.contains(&c) {
+ if !preceeded_by_whitespace(chars, index, start) || followed_by_whitespace(chars, index, end) {
index += 1;
continue;
}
- if BLOCK_KEYWORDS.iter().any(|&(k, _)| k == c) && is_char_repeating(chars, c, index, end) {
- let block_indicator_size = get_block_indicator_size(c);
- match seek_end_block(chars, c, index + block_indicator_size + 1, end) {
+ if c == '|' && is_char_repeating(chars, c, 1, index + 1, end) {
+ match seek_end(chars, c, index + 2, 1, end) {
Some(to) => {
- if to != index + block_indicator_size * 2 - 1 {
- let keyword = c.to_string().repeat(block_indicator_size+1);
- styles.push((keyword, index, to));
- if !NO_SUB_PARSING_KEYWORDS.contains(&c) {
- styles.append(&mut parse_with_limits(chars, index + block_indicator_size + 1, to - 1, depth));
- }
+ if to != index + 2 {
+ let keyword = "||".to_string();
+ styles.push((keyword, index, index + 2, to, to + 2));
+ styles.append(&mut parse_with_limits(chars, index + 2, to - 1, depth));
}
- index = to + block_indicator_size;
+ index = to + 2;
continue;
}
None => ()
}
+ index += 2;
+ continue;
}
- match seek_end(chars, c, index + 1, end) {
+ if !KEYWORDS.contains(&c) {
+ index += 1;
+ continue;
+ }
+
+ match seek_end(chars, c, index + 1, 0, end) {
Some (to) => {
if to != index + 1 {
- styles.push((c.to_string(), index, to));
+ styles.push((c.to_string(), index, index + 1, to, to + 1));
if !NO_SUB_PARSING_KEYWORDS.contains(&c) {
styles.append(&mut parse_with_limits(chars, index + 1, to - 1, depth));
}
@@ -129,10 +153,51 @@ fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize)
styles
}
-fn is_char_repeating(chars: &Vec<char>, keyword: char, index: usize, end: usize) -> bool {
- let block_indicator_size = get_block_indicator_size(keyword);
+fn parse_quotes_in_code_block(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize, usize, usize)> {
+ let mut quotes = Vec::new();
+ let mut index = start;
+ let end = end.min(chars.len() - 1);
+
+ if depth < 1 {
+ return quotes;
+ }
+
+ while index <= end {
+ let c = chars[index];
+ if QUOTE_KEYWORDS.contains(&c) {
+ if is_nested_quote(chars, index, depth) {
+ quotes.push((">>".to_string(), index, index + 1, index + 1, index + 1));
+ }
+ index += 1;
+ continue;
+ }
+ index += 1;
+ }
+ quotes
+}
+
+fn is_nested_quote(chars: &Vec<char>, start: usize, depth: usize) -> bool {
+ let mut index = start;
+ let mut count = 0;
- (0..block_indicator_size as usize)
+ while index > 0 {
+ if chars[index] == '\n' {
+ return true;
+ }
+ if !QUOTE_KEYWORDS.contains(&chars[index]) {
+ return false;
+ }
+ count += 1;
+ if count > depth {
+ return false;
+ }
+ index -= 1;
+ }
+ true
+}
+
+fn is_char_repeating(chars: &Vec<char>, keyword: char, repetitions: usize, index: usize, end: usize) -> bool {
+ (0..repetitions as usize)
.all(|i| index + i <= end && chars[index + i] == keyword)
}
@@ -144,7 +209,7 @@ fn followed_by_whitespace(chars: &Vec<char>, index: usize, end: usize) -> bool {
index >= end || chars[index + 1].is_whitespace()
}
-fn seek_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Option<usize> {
+fn seek_end(chars: &Vec<char>, keyword: char, start: usize, repetitions: usize, end: usize) -> Option<usize> {
for i in start..=end {
let c = chars[i];
if c == '\n' {
@@ -153,10 +218,11 @@ fn seek_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Optio
if c == keyword
&& !chars[i - 1].is_whitespace()
&& !preceeded_by_backslash(chars, i, start)
+ && is_char_repeating(chars, keyword, repetitions, i + 1, end)
{
- match seek_higher_order_end(chars, c, i + 1, end) {
- Some(higher_order_index) => {
- return Some(higher_order_index);
+ match seek_higher_order_end(chars, c, i + 1, repetitions, end) {
+ Some(higher_order_i) => {
+ return Some(higher_order_i);
}
None => {
return Some(i);
@@ -167,7 +233,7 @@ fn seek_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Optio
None
}
-fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Option<usize> {
+fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, repetitions: usize, end: usize) -> Option<usize> {
for i in start..=end {
let c = chars[i];
if c == '\n' {
@@ -177,6 +243,7 @@ fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: us
&& chars[i - 1].is_whitespace()
&& !followed_by_whitespace(chars, i, end)
&& !preceeded_by_backslash(chars, i, start)
+ && is_char_repeating(chars, keyword, repetitions, i + 1, end)
{
return None; // "*bold* *<--- beginning of new bold>*"
}
@@ -184,6 +251,7 @@ fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: us
&& !chars[i - 1].is_whitespace()
&& followed_by_whitespace(chars, i, end)
&& !preceeded_by_backslash(chars, i, start)
+ && is_char_repeating(chars, keyword, repetitions, i + 1, end)
{
return Some(i);
}
@@ -191,6 +259,14 @@ fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: us
None
}
+fn seek_end_of_line(chars: &Vec<char>, start: usize, end: usize) -> usize {
+ chars[start..=end]
+ .iter()
+ .enumerate()
+ .find(|&(_, &c)| c == '\n')
+ .map_or(end + 1, |(i, _)| start + i)
+}
+
fn seek_end_of_quote(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> usize {
for i in start..=end {
if chars[i] == '\n' {
@@ -205,16 +281,21 @@ fn seek_end_of_quote(chars: &Vec<char>, start: usize, end: usize, depth: usize)
end + 1
}
-fn seek_end_block(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Option<usize> {
+fn seek_end_block(chars: &Vec<char>, keyword: char, start: usize, end: usize, depth: usize) -> Option<usize> {
for i in start..=end {
- if chars[i] == keyword
- && is_char_repeating(chars, keyword, i + 1, end)
- && !preceeded_by_backslash(chars, i, start)
+ if chars[i] == '\n'
+ && i + 4 + depth > end
+ && (depth == 0 || chars[i + 1..i + 1 + depth].iter().all(|&c| QUOTE_KEYWORDS.contains(&c)))
+ && chars[i + 1 + depth] == keyword
+ && is_char_repeating(chars, keyword, 2, i + 1 + depth, end)
{
return Some(i);
}
}
- None
+ if end == chars.len() - 1 {
+ return None;
+ }
+ Some(end)
}
fn is_quote_start(chars: &Vec<char>, index: usize, depth: usize) -> bool {
@@ -232,15 +313,6 @@ fn preceeded_by_backslash(chars: &Vec<char>, index: usize, start: usize) -> bool
num_backslashes % 2 == 1
}
-fn get_block_indicator_size(keyword: char) -> usize {
- for &(k, v) in BLOCK_KEYWORDS.iter() {
- if k == keyword {
- return v;
- }
- }
- 1 // shouldn't ever happen
-}
-
#[pymodule]
fn slidge_style_parser(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(format_body, m)?)?;