diff options
| -rw-r--r-- | src/lib.rs | 198 | ||||
| -rw-r--r-- | tests/test_style_parser.py | 280 |
2 files changed, 375 insertions, 103 deletions
@@ -2,10 +2,9 @@ use std::collections::HashMap; use pyo3::prelude::*; -const KEYWORDS: [char; 5] = ['*', '_', '~', '`', '|']; +const KEYWORDS: [char; 4] = ['*', '_', '~', '`']; const NO_SUB_PARSING_KEYWORDS: [char; 1] = ['`']; const QUOTE_KEYWORDS: [char; 1] = ['>']; -const BLOCK_KEYWORDS: [(char, usize); 2] = [('`', 2), ('|', 1)]; const PLACEHOLDER: &str = "\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}"; #[pyfunction] @@ -14,31 +13,32 @@ fn format_body(body: String, new_tags: HashMap<String, (String, String)>) -> PyR if chars.len() < 1 { return Ok(body); } - let styles: Vec<(String, usize, usize)> = parse_with_limits(&chars, 0, chars.len() - 1, 0); + let styles: Vec<(String, usize, usize, usize, usize)> = parse_with_limits(&chars, 0, chars.len() - 1, 0); let parse_quotes = new_tags.contains_key(&">".to_string()); - let mut tags: Vec<(usize, String, String, bool)> = vec![]; + let mut tags: Vec<(usize, String, usize)> = vec![]; for style in styles { - let (keyword, start, end) = style; + let (keyword, start, remove_start, end, remove_end) = style; if new_tags.contains_key(&keyword) { - tags.push((start, keyword.clone(), new_tags.get(&keyword).unwrap().0.clone(), false)); - tags.push((end, keyword.clone(), new_tags.get(&keyword).unwrap().1.clone(), QUOTE_KEYWORDS.contains(&keyword.chars().next().unwrap()))); + let opening_tag = if keyword == "```language" { + new_tags.get(&keyword).unwrap().0.clone() + .replace("{}", &chars[start+3..remove_start-1] + .into_iter() + .collect::<String>()) + } else { + new_tags.get(&keyword).unwrap().0.clone() + }; + tags.push((start, opening_tag, remove_start)); + tags.push((end, new_tags.get(&keyword).unwrap().1.clone(), remove_end)); } else if keyword == ">>" && parse_quotes { - tags.push((start, keyword.clone(), "".to_string(), false)); + tags.push((start, "".to_string(), start+1)); } } tags.sort_by(|a, b| b.0.cmp(&a.0)); for tag in tags { - let (index, keyword, tag, is_end_quote_block) = tag; - let end = if is_end_quote_block { - index - } else if keyword == ">>" { - index + 1 - } else { - index + keyword.len() - }; + let (index, tag, end) = tag; chars = [chars[..index].to_vec(), tag.chars().collect(), chars[end..].to_vec()].concat(); } @@ -51,11 +51,10 @@ fn remove_non_escaped_backslashes(text: String) -> String { tmp_string.replace(PLACEHOLDER, "\\") } -fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize)> { +fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize, usize, usize)> { let mut styles = Vec::new(); let mut index = start; let end = end.min(chars.len() - 1); - println!("parse with limits start {}, end {}", start, end); while index <= end { if preceeded_by_backslash(chars, index, start) { @@ -64,58 +63,83 @@ fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize) } let c = chars[index]; - if c == '|' && !is_char_repeating(chars, c, index, end) { - index += 1; - continue; - } - if QUOTE_KEYWORDS.contains(&c) { if is_quote_start(chars, index, depth) { let to = seek_end_of_quote(chars, index, end, depth); - styles.push((">".to_string(), index, to)); + styles.push((">".to_string(), index, index + 1, to, to)); styles.append(&mut parse_with_limits(chars, index + 1, to, depth + 1)); index = to; continue; } - if depth > 0 { - styles.push((">>".to_string(), index, index + 1)); + if is_nested_quote(chars, index, depth) { + styles.push((">>".to_string(), index, index + 1, index + 1, index + 1)); } index += 1; continue; } - if !preceeded_by_whitespace(chars, index, start) || followed_by_whitespace(chars, index, end) { - index += 1; + if c == '`' && is_char_repeating(chars, c, 2, index + 1, end) { + let end_of_line = seek_end_of_line(chars, index + 1, end); + if end_of_line == end { + index += 3; + continue; + } + match seek_end_block(chars, c, end_of_line, end, depth) { + Some(to) => { + if to != index + 3 && is_quote_start(chars, index, depth) { + let keyword = if end_of_line == index + 3 { + "```".to_string() + } else { + "```language".to_string() + }; + let remove_end = if depth > 0 && to == end { + to + } else { + to + 4 + depth + }; + styles.push((keyword, index, end_of_line + 1, to, remove_end)); + styles.append(&mut parse_quotes_in_code_block(chars, index + 3, to, depth)); + } + index = to + 3; + continue; + } + None => () + } + index += 3; continue; } - if !KEYWORDS.contains(&c) { + if !preceeded_by_whitespace(chars, index, start) || followed_by_whitespace(chars, index, end) { index += 1; continue; } - if BLOCK_KEYWORDS.iter().any(|&(k, _)| k == c) && is_char_repeating(chars, c, index, end) { - let block_indicator_size = get_block_indicator_size(c); - match seek_end_block(chars, c, index + block_indicator_size + 1, end) { + if c == '|' && is_char_repeating(chars, c, 1, index + 1, end) { + match seek_end(chars, c, index + 2, 1, end) { Some(to) => { - if to != index + block_indicator_size * 2 - 1 { - let keyword = c.to_string().repeat(block_indicator_size+1); - styles.push((keyword, index, to)); - if !NO_SUB_PARSING_KEYWORDS.contains(&c) { - styles.append(&mut parse_with_limits(chars, index + block_indicator_size + 1, to - 1, depth)); - } + if to != index + 2 { + let keyword = "||".to_string(); + styles.push((keyword, index, index + 2, to, to + 2)); + styles.append(&mut parse_with_limits(chars, index + 2, to - 1, depth)); } - index = to + block_indicator_size; + index = to + 2; continue; } None => () } + index += 2; + continue; } - match seek_end(chars, c, index + 1, end) { + if !KEYWORDS.contains(&c) { + index += 1; + continue; + } + + match seek_end(chars, c, index + 1, 0, end) { Some (to) => { if to != index + 1 { - styles.push((c.to_string(), index, to)); + styles.push((c.to_string(), index, index + 1, to, to + 1)); if !NO_SUB_PARSING_KEYWORDS.contains(&c) { styles.append(&mut parse_with_limits(chars, index + 1, to - 1, depth)); } @@ -129,10 +153,51 @@ fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize) styles } -fn is_char_repeating(chars: &Vec<char>, keyword: char, index: usize, end: usize) -> bool { - let block_indicator_size = get_block_indicator_size(keyword); +fn parse_quotes_in_code_block(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize, usize, usize)> { + let mut quotes = Vec::new(); + let mut index = start; + let end = end.min(chars.len() - 1); + + if depth < 1 { + return quotes; + } + + while index <= end { + let c = chars[index]; + if QUOTE_KEYWORDS.contains(&c) { + if is_nested_quote(chars, index, depth) { + quotes.push((">>".to_string(), index, index + 1, index + 1, index + 1)); + } + index += 1; + continue; + } + index += 1; + } + quotes +} + +fn is_nested_quote(chars: &Vec<char>, start: usize, depth: usize) -> bool { + let mut index = start; + let mut count = 0; - (0..block_indicator_size as usize) + while index > 0 { + if chars[index] == '\n' { + return true; + } + if !QUOTE_KEYWORDS.contains(&chars[index]) { + return false; + } + count += 1; + if count > depth { + return false; + } + index -= 1; + } + true +} + +fn is_char_repeating(chars: &Vec<char>, keyword: char, repetitions: usize, index: usize, end: usize) -> bool { + (0..repetitions as usize) .all(|i| index + i <= end && chars[index + i] == keyword) } @@ -144,7 +209,7 @@ fn followed_by_whitespace(chars: &Vec<char>, index: usize, end: usize) -> bool { index >= end || chars[index + 1].is_whitespace() } -fn seek_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Option<usize> { +fn seek_end(chars: &Vec<char>, keyword: char, start: usize, repetitions: usize, end: usize) -> Option<usize> { for i in start..=end { let c = chars[i]; if c == '\n' { @@ -153,10 +218,11 @@ fn seek_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Optio if c == keyword && !chars[i - 1].is_whitespace() && !preceeded_by_backslash(chars, i, start) + && is_char_repeating(chars, keyword, repetitions, i + 1, end) { - match seek_higher_order_end(chars, c, i + 1, end) { - Some(higher_order_index) => { - return Some(higher_order_index); + match seek_higher_order_end(chars, c, i + 1, repetitions, end) { + Some(higher_order_i) => { + return Some(higher_order_i); } None => { return Some(i); @@ -167,7 +233,7 @@ fn seek_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Optio None } -fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Option<usize> { +fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, repetitions: usize, end: usize) -> Option<usize> { for i in start..=end { let c = chars[i]; if c == '\n' { @@ -177,6 +243,7 @@ fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: us && chars[i - 1].is_whitespace() && !followed_by_whitespace(chars, i, end) && !preceeded_by_backslash(chars, i, start) + && is_char_repeating(chars, keyword, repetitions, i + 1, end) { return None; // "*bold* *<--- beginning of new bold>*" } @@ -184,6 +251,7 @@ fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: us && !chars[i - 1].is_whitespace() && followed_by_whitespace(chars, i, end) && !preceeded_by_backslash(chars, i, start) + && is_char_repeating(chars, keyword, repetitions, i + 1, end) { return Some(i); } @@ -191,6 +259,14 @@ fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: us None } +fn seek_end_of_line(chars: &Vec<char>, start: usize, end: usize) -> usize { + chars[start..=end] + .iter() + .enumerate() + .find(|&(_, &c)| c == '\n') + .map_or(end + 1, |(i, _)| start + i) +} + fn seek_end_of_quote(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> usize { for i in start..=end { if chars[i] == '\n' { @@ -205,16 +281,21 @@ fn seek_end_of_quote(chars: &Vec<char>, start: usize, end: usize, depth: usize) end + 1 } -fn seek_end_block(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Option<usize> { +fn seek_end_block(chars: &Vec<char>, keyword: char, start: usize, end: usize, depth: usize) -> Option<usize> { for i in start..=end { - if chars[i] == keyword - && is_char_repeating(chars, keyword, i + 1, end) - && !preceeded_by_backslash(chars, i, start) + if chars[i] == '\n' + && i + 4 + depth > end + && (depth == 0 || chars[i + 1..i + 1 + depth].iter().all(|&c| QUOTE_KEYWORDS.contains(&c))) + && chars[i + 1 + depth] == keyword + && is_char_repeating(chars, keyword, 2, i + 1 + depth, end) { return Some(i); } } - None + if end == chars.len() - 1 { + return None; + } + Some(end) } fn is_quote_start(chars: &Vec<char>, index: usize, depth: usize) -> bool { @@ -232,15 +313,6 @@ fn preceeded_by_backslash(chars: &Vec<char>, index: usize, start: usize) -> bool num_backslashes % 2 == 1 } -fn get_block_indicator_size(keyword: char) -> usize { - for &(k, v) in BLOCK_KEYWORDS.iter() { - if k == keyword { - return v; - } - } - 1 // shouldn't ever happen -} - #[pymodule] fn slidge_style_parser(_py: Python, m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(format_body, m)?)?; diff --git a/tests/test_style_parser.py b/tests/test_style_parser.py index 671af64..fd02597 100644 --- a/tests/test_style_parser.py +++ b/tests/test_style_parser.py @@ -6,67 +6,267 @@ MATRIX_FORMATS = { "~": ("<strike>", "</strike>"), "`": ("<code>", "</code>"), "```": ("<pre><code>", "</code></pre>"), + "```language": ("<pre><code class=\"language-{}\">", "</code></pre>"), ">": ("<blockquote>", "</blockquote>"), "||": ("<span data-mx-spoiler>", "</span>") } def test_basic(): - assert(format_body("_underline_", MATRIX_FORMATS) == "<em>underline</em>") - assert(format_body("*bold*", MATRIX_FORMATS) == "<strong>bold</strong>") - assert(format_body("~strikethrough~", MATRIX_FORMATS) == "<strike>strikethrough</strike>") - assert(format_body("`code span`", MATRIX_FORMATS) == "<code>code span</code>") - assert(format_body("```code\nblock```", MATRIX_FORMATS) == "<pre><code>code\nblock</code></pre>") - assert(format_body("||spoiler||", MATRIX_FORMATS) == "<span data-mx-spoiler>spoiler</span>") + test = "_underline_" + formatted_body = "<em>underline</em>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "*bold*" + formatted_body = "<strong>bold</strong>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "~strikethrough~" + formatted_body = "<strike>strikethrough</strike>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "`code span`" + formatted_body = "<code>code span</code>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = """ + ```python + def test_basic(): + test = "_underline_" + formatted_body = "<em>underline</em>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + ``` + """ + formatted_body = test = """ + <pre><code class="language-python">def test_basic(): + test = "_underline_" + formatted_body = "<em>underline</em>" + assert(format_body(test, MATRIX_FORMATS) == (test, formatted_body))</pre></code> + """ + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "```\ncode block\n```" + formatted_body = "<pre><code>code block</code></pre>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "||this message contains a spoiler||" + formatted_body = "<span data-mx-spoiler>this message contains a spoiler</span>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) def test_quotes(): - assert(format_body(">single", MATRIX_FORMATS) == "<blockquote>single</blockquote>") - assert(format_body(">single\n>grouped", MATRIX_FORMATS) == "<blockquote>single\ngrouped</blockquote>") - assert(format_body(">>double", MATRIX_FORMATS) == "<blockquote><blockquote>double</blockquote></blockquote>") - assert(format_body(">>double\n>grouped single", MATRIX_FORMATS) == "<blockquote><blockquote>double</blockquote>\ngrouped single</blockquote>") - assert(format_body(">>>tripple\n>single\n>>double", MATRIX_FORMATS) == "<blockquote><blockquote><blockquote>tripple</blockquote></blockquote>\nsingle\n<blockquote>double</blockquote></blockquote>") + test = ">single" + formatted_body = "<blockquote>single</blockquote>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">single arrow ->" + formatted_body = "<blockquote>single arrow -></blockquote>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">single\n>grouped" + formatted_body = "<blockquote>single\ngrouped</blockquote>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">>double" + formatted_body = "<blockquote><blockquote>double</blockquote></blockquote>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">>double\n>>double" + formatted_body = "<blockquote><blockquote>double\ndouble</blockquote></blockquote>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">>double\n&>not quote" + formatted_body = "<blockquote><blockquote>double</blockquote></blockquote>\n&>not quote" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">>double\n>grouped single" + formatted_body = "<blockquote><blockquote>double</blockquote>\ngrouped single</blockquote>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">>>tripple\n>single\n>>double" + formatted_body = "<blockquote><blockquote><blockquote>tripple</blockquote></blockquote>\nsingle\n<blockquote>double</blockquote></blockquote>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + +CODE_BLOCK_TEST_CASE = \ +""" +Code test +```python3 +def who_is_awesome(): + return "you!" +``` +Nope +""" + +CODE_BLOCK_TEST_CASE_OUTPUT = \ +""" +Code test +<pre><code> +def who_is_awesome(): + return \"you!\" +</code></pre> +Nope +""" + +def test_code_blocks(): + test = "```\nhacker\ncode\n```" + formatted_body = "<pre><code>hacker\ncode</code></pre>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "```python\nhacker code\n```" + formatted_body = "<pre><code class=\"language-python\">hacker code</code></pre>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">```java\n>why are you quoting a code block\n>```" + formatted_body = "<blockquote><pre><code class=\"language-java\">why are you quoting a code block</code></pre></blockquote>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">```\n>please stop trying to break my parser ;-;\n>```" + formatted_body = "<blockquote><pre><code>please stop trying to break my parser ;-;</code></pre></blockquote>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">>```\n>>double quote code block\n>single quote not in code block\nnormal text" + formatted_body = "<blockquote><blockquote><pre><code>double quote code block</code></pre></blockquote>\nsingle quote not in code block</blockquote>\nnormal text" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">>```\n>>>>double quote code block\n>single quote not in code block\nnormal text" + formatted_body = "<blockquote><blockquote><pre><code>>>double quote code block</code></pre></blockquote>\nsingle quote not in code block</blockquote>\nnormal text" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "_```_ignored\ninvalid code block\n```" + formatted_body = "<em>```</em>ignored\ninvalid code block\n```" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + def test_escaped(): - assert(format_body("\\_no underline_", MATRIX_FORMATS) == "_no underline_") - assert(format_body("\\\\_no underline_", MATRIX_FORMATS) == "\\_no underline_") - assert(format_body(">>>tripple\n\\>none\n>>double", MATRIX_FORMATS) == "<blockquote><blockquote><blockquote>tripple</blockquote></blockquote></blockquote>\n>none\n<blockquote><blockquote>double</blockquote></blockquote>") + test = "\\_no underline_" + formatted_body = "_no underline_" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "\\\\_no underline_" + formatted_body = "\\_no underline_" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">>>tripple\n\\>none\n>>double" + formatted_body = "<blockquote><blockquote><blockquote>tripple</blockquote></blockquote></blockquote>\n>none\n<blockquote><blockquote>double</blockquote></blockquote>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) def test_nested(): - assert(format_body("`*~_code span_~*`", MATRIX_FORMATS) == "<code>*~_code span_~*</code>") - assert(format_body("*_~`code span`~_*", MATRIX_FORMATS) == "<strong><em><strike><code>code span</code></strike></em></strong>") - assert(format_body(">*_~`code span`~_*", MATRIX_FORMATS) == "<blockquote><strong><em><strike><code>code span</code></strike></em></strong></blockquote>") - assert(format_body("*bold star >*< star bold*", MATRIX_FORMATS) == "<strong>bold star >*< star bold</strong>") - assert(format_body("*_bold*_", MATRIX_FORMATS) == "<strong>_bold</strong>_") - assert(format_body("__underlined__", MATRIX_FORMATS) == "<em><em>underlined</em></em>") + test = "`*~_code span_~*`" + formatted_body = "<code>*~_code span_~*</code>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "*_~`code span`~_*" + formatted_body = "<strong><em><strike><code>code span</code></strike></em></strong>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">*_~`code span`~_*" + formatted_body = "<blockquote><strong><em><strike><code>code span</code></strike></em></strong></blockquote>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "*bold star >*< star bold*" + formatted_body = "<strong>bold star >*< star bold</strong>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "*_bold*_" + formatted_body = "<strong>_bold</strong>_" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "__underlined__" + formatted_body = "<em><em>underlined</em></em>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) def test_no_changes(): - assert(format_body("", MATRIX_FORMATS) == "") - assert(format_body("~~ empty `````` styles **", MATRIX_FORMATS) == "~~ empty `````` styles **") - assert(format_body("this is not an empty string", MATRIX_FORMATS) == "this is not an empty string") - assert(format_body("arrow ->", MATRIX_FORMATS) == "arrow ->") - assert(format_body(" > no quote", MATRIX_FORMATS) == " > no quote") - assert(format_body("_not underlined", MATRIX_FORMATS) == "_not underlined") - assert(format_body("|not a spoiler|", MATRIX_FORMATS) == "|not a spoiler|") - assert(format_body("`no code\nblock here`", MATRIX_FORMATS) == "`no code\nblock here`") + test = "" + formatted_body = "" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "~~ empty `````` styles **" + formatted_body = "~~ empty `````` styles **" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "this is not an empty string" + formatted_body = "this is not an empty string" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "arrow ->" + formatted_body = "arrow ->" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = " > no quote" + formatted_body = " > no quote" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "_not underlined" + formatted_body = "_not underlined" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "|not a spoiler|" + formatted_body = "|not a spoiler|" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "||\nalso\nnot\na\nspoiler||" + formatted_body = "||\nalso\nnot\na\nspoiler||" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "`no code\nblock here`" + formatted_body = "`no code\nblock here`" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "invalid ```\ncode block\n```" + formatted_body = "invalid ```\ncode block\n```" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "```\ncode block\ninvalid```" + formatted_body = "```\ncode block\ninvalid```" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "```\ncode block\n```invalid" + formatted_body = "```\ncode block\n```invalid" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) def test_assorted(): - assert(format_body("at the ```end```", MATRIX_FORMATS) == "at the <pre><code>end</code></pre>") - assert(format_body("in the ~middle~ here", MATRIX_FORMATS) == "in the <strike>middle</strike> here") - assert(format_body("_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||", MATRIX_FORMATS) == "<em>underline</em> <strong>bold</strong> <strike>strikethrough</strike> >not quote <span data-mx-spoiler>spoiler</span>\n<blockquote>quote</blockquote>\nnothing\nnothing\n<blockquote><blockquote><blockquote><blockquote>another quote with <span data-mx-spoiler><strike><em><strong><pre><code>four</code></pre></strong></em></strike></span></blockquote></blockquote></blockquote></blockquote>") + test = "at the ||end||" + formatted_body = "at the <span data-mx-spoiler>end</span>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "in the ~middle~ here" + formatted_body = "in the <strike>middle</strike> here" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||" + formatted_body = "<em>underline</em> <strong>bold</strong> <strike>strikethrough</strike> >not quote <span data-mx-spoiler>spoiler</span>\n<blockquote>quote</blockquote>\nnothing\nnothing\n<blockquote><blockquote><blockquote><blockquote>another quote with <span data-mx-spoiler><strike><em><strong>```four```</strong></em></strike></span></blockquote></blockquote></blockquote></blockquote>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) def test_weird_utf8(): - assert(format_body("โค๏ธ๐๐๐๐ ```๐๐๐๐๐ค``` ๐๐๐โฃ๏ธ", MATRIX_FORMATS) == "โค๏ธ๐๐๐๐ <pre><code>๐๐๐๐๐ค</code></pre> ๐๐๐โฃ๏ธ") - assert(format_body("๐จโ๐ฉโ๐งโ๐ง _underline_๐ฉโ๐ฉโ๐ฆโ๐ง", MATRIX_FORMATS) == "๐จโ๐ฉโ๐งโ๐ง <em>underline</em>๐ฉโ๐ฉโ๐ฆโ๐ง") - assert(format_body("\u202eRight to left", MATRIX_FORMATS) == "\u202eRight to left") - assert(format_body(">\u202eRight to left quote?", MATRIX_FORMATS) == "<blockquote>\u202eRight to left quote?</blockquote>") - assert(format_body("_Invisible\u200bseparator_", MATRIX_FORMATS) == "<em>Invisible\u200bseparator</em>") - assert(format_body("~\u200b~", MATRIX_FORMATS) == "<strike>\u200b</strike>") + test = "โค๏ธ๐๐๐๐ ||๐๐๐๐๐ค|| ๐๐๐โฃ๏ธ" + formatted_body = "โค๏ธ๐๐๐๐ <span data-mx-spoiler>๐๐๐๐๐ค</span> ๐๐๐โฃ๏ธ" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "๐จโ๐ฉโ๐งโ๐ง _underline_๐ฉโ๐ฉโ๐ฆโ๐ง" + formatted_body = "๐จโ๐ฉโ๐งโ๐ง <em>underline</em>๐ฉโ๐ฉโ๐ฆโ๐ง" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "\u202eRight to left" + formatted_body = "\u202eRight to left" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">\u202eRight to left quote?" + formatted_body = "<blockquote>\u202eRight to left quote?</blockquote>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "_Invisible\u200bseparator_" + formatted_body = "<em>Invisible\u200bseparator</em>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "~\u200b~" + formatted_body = "<strike>\u200b</strike>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) LIMITED_FORMATS = { "_": ("<em>", "</em>"), "~": ("<strike>", "</strike>"), - "`": ("<code>", "</code>"), - "||": ("<span data-mx-spoiler>", "</span>") + "`": ("<code>", "</code>") } def test_limited(): - assert(format_body("_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||", LIMITED_FORMATS) == "<em>underline</em> *bold* <strike>strikethrough</strike> >not quote <span data-mx-spoiler>spoiler</span>\n>quote\nnothing\nnothing\n>>>>another quote with <span data-mx-spoiler><strike><em>*```four```*</em></strike></span>") + test = "_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||" + formatted_body = "<em>underline</em> *bold* <strike>strikethrough</strike> >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||<strike><em>*```four```*</em></strike>||" + assert(format_body(test, LIMITED_FORMATS) == formatted_body) |
