summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSavagePeanut <sourcehut@lazytapir.com>2023-08-02 17:45:21 -0500
committerSavagePeanut <sourcehut@lazytapir.com>2023-08-02 17:45:21 -0500
commiteb2942a595b5f6f1e7eed711659ceba8821be3a3 (patch)
tree995f4ef718c1a2b9623f83f4a3ec20a920abff5e
parentdb5bd2e6e42bed5204788f006d241b618671b94b (diff)
fix code blocks
-rw-r--r--src/lib.rs198
-rw-r--r--tests/test_style_parser.py280
2 files changed, 375 insertions, 103 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 532fec7..88b040d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,10 +2,9 @@ use std::collections::HashMap;
use pyo3::prelude::*;
-const KEYWORDS: [char; 5] = ['*', '_', '~', '`', '|'];
+const KEYWORDS: [char; 4] = ['*', '_', '~', '`'];
const NO_SUB_PARSING_KEYWORDS: [char; 1] = ['`'];
const QUOTE_KEYWORDS: [char; 1] = ['>'];
-const BLOCK_KEYWORDS: [(char, usize); 2] = [('`', 2), ('|', 1)];
const PLACEHOLDER: &str = "\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}";
#[pyfunction]
@@ -14,31 +13,32 @@ fn format_body(body: String, new_tags: HashMap<String, (String, String)>) -> PyR
if chars.len() < 1 {
return Ok(body);
}
- let styles: Vec<(String, usize, usize)> = parse_with_limits(&chars, 0, chars.len() - 1, 0);
+ let styles: Vec<(String, usize, usize, usize, usize)> = parse_with_limits(&chars, 0, chars.len() - 1, 0);
let parse_quotes = new_tags.contains_key(&">".to_string());
- let mut tags: Vec<(usize, String, String, bool)> = vec![];
+ let mut tags: Vec<(usize, String, usize)> = vec![];
for style in styles {
- let (keyword, start, end) = style;
+ let (keyword, start, remove_start, end, remove_end) = style;
if new_tags.contains_key(&keyword) {
- tags.push((start, keyword.clone(), new_tags.get(&keyword).unwrap().0.clone(), false));
- tags.push((end, keyword.clone(), new_tags.get(&keyword).unwrap().1.clone(), QUOTE_KEYWORDS.contains(&keyword.chars().next().unwrap())));
+ let opening_tag = if keyword == "```language" {
+ new_tags.get(&keyword).unwrap().0.clone()
+ .replace("{}", &chars[start+3..remove_start-1]
+ .into_iter()
+ .collect::<String>())
+ } else {
+ new_tags.get(&keyword).unwrap().0.clone()
+ };
+ tags.push((start, opening_tag, remove_start));
+ tags.push((end, new_tags.get(&keyword).unwrap().1.clone(), remove_end));
} else if keyword == ">>" && parse_quotes {
- tags.push((start, keyword.clone(), "".to_string(), false));
+ tags.push((start, "".to_string(), start+1));
}
}
tags.sort_by(|a, b| b.0.cmp(&a.0));
for tag in tags {
- let (index, keyword, tag, is_end_quote_block) = tag;
- let end = if is_end_quote_block {
- index
- } else if keyword == ">>" {
- index + 1
- } else {
- index + keyword.len()
- };
+ let (index, tag, end) = tag;
chars = [chars[..index].to_vec(), tag.chars().collect(), chars[end..].to_vec()].concat();
}
@@ -51,11 +51,10 @@ fn remove_non_escaped_backslashes(text: String) -> String {
tmp_string.replace(PLACEHOLDER, "\\")
}
-fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize)> {
+fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize, usize, usize)> {
let mut styles = Vec::new();
let mut index = start;
let end = end.min(chars.len() - 1);
- println!("parse with limits start {}, end {}", start, end);
while index <= end {
if preceeded_by_backslash(chars, index, start) {
@@ -64,58 +63,83 @@ fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize)
}
let c = chars[index];
- if c == '|' && !is_char_repeating(chars, c, index, end) {
- index += 1;
- continue;
- }
-
if QUOTE_KEYWORDS.contains(&c) {
if is_quote_start(chars, index, depth) {
let to = seek_end_of_quote(chars, index, end, depth);
- styles.push((">".to_string(), index, to));
+ styles.push((">".to_string(), index, index + 1, to, to));
styles.append(&mut parse_with_limits(chars, index + 1, to, depth + 1));
index = to;
continue;
}
- if depth > 0 {
- styles.push((">>".to_string(), index, index + 1));
+ if is_nested_quote(chars, index, depth) {
+ styles.push((">>".to_string(), index, index + 1, index + 1, index + 1));
}
index += 1;
continue;
}
- if !preceeded_by_whitespace(chars, index, start) || followed_by_whitespace(chars, index, end) {
- index += 1;
+ if c == '`' && is_char_repeating(chars, c, 2, index + 1, end) {
+ let end_of_line = seek_end_of_line(chars, index + 1, end);
+ if end_of_line == end {
+ index += 3;
+ continue;
+ }
+ match seek_end_block(chars, c, end_of_line, end, depth) {
+ Some(to) => {
+ if to != index + 3 && is_quote_start(chars, index, depth) {
+ let keyword = if end_of_line == index + 3 {
+ "```".to_string()
+ } else {
+ "```language".to_string()
+ };
+ let remove_end = if depth > 0 && to == end {
+ to
+ } else {
+ to + 4 + depth
+ };
+ styles.push((keyword, index, end_of_line + 1, to, remove_end));
+ styles.append(&mut parse_quotes_in_code_block(chars, index + 3, to, depth));
+ }
+ index = to + 3;
+ continue;
+ }
+ None => ()
+ }
+ index += 3;
continue;
}
- if !KEYWORDS.contains(&c) {
+ if !preceeded_by_whitespace(chars, index, start) || followed_by_whitespace(chars, index, end) {
index += 1;
continue;
}
- if BLOCK_KEYWORDS.iter().any(|&(k, _)| k == c) && is_char_repeating(chars, c, index, end) {
- let block_indicator_size = get_block_indicator_size(c);
- match seek_end_block(chars, c, index + block_indicator_size + 1, end) {
+ if c == '|' && is_char_repeating(chars, c, 1, index + 1, end) {
+ match seek_end(chars, c, index + 2, 1, end) {
Some(to) => {
- if to != index + block_indicator_size * 2 - 1 {
- let keyword = c.to_string().repeat(block_indicator_size+1);
- styles.push((keyword, index, to));
- if !NO_SUB_PARSING_KEYWORDS.contains(&c) {
- styles.append(&mut parse_with_limits(chars, index + block_indicator_size + 1, to - 1, depth));
- }
+ if to != index + 2 {
+ let keyword = "||".to_string();
+ styles.push((keyword, index, index + 2, to, to + 2));
+ styles.append(&mut parse_with_limits(chars, index + 2, to - 1, depth));
}
- index = to + block_indicator_size;
+ index = to + 2;
continue;
}
None => ()
}
+ index += 2;
+ continue;
}
- match seek_end(chars, c, index + 1, end) {
+ if !KEYWORDS.contains(&c) {
+ index += 1;
+ continue;
+ }
+
+ match seek_end(chars, c, index + 1, 0, end) {
Some (to) => {
if to != index + 1 {
- styles.push((c.to_string(), index, to));
+ styles.push((c.to_string(), index, index + 1, to, to + 1));
if !NO_SUB_PARSING_KEYWORDS.contains(&c) {
styles.append(&mut parse_with_limits(chars, index + 1, to - 1, depth));
}
@@ -129,10 +153,51 @@ fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize)
styles
}
-fn is_char_repeating(chars: &Vec<char>, keyword: char, index: usize, end: usize) -> bool {
- let block_indicator_size = get_block_indicator_size(keyword);
+fn parse_quotes_in_code_block(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize, usize, usize)> {
+ let mut quotes = Vec::new();
+ let mut index = start;
+ let end = end.min(chars.len() - 1);
+
+ if depth < 1 {
+ return quotes;
+ }
+
+ while index <= end {
+ let c = chars[index];
+ if QUOTE_KEYWORDS.contains(&c) {
+ if is_nested_quote(chars, index, depth) {
+ quotes.push((">>".to_string(), index, index + 1, index + 1, index + 1));
+ }
+ index += 1;
+ continue;
+ }
+ index += 1;
+ }
+ quotes
+}
+
+fn is_nested_quote(chars: &Vec<char>, start: usize, depth: usize) -> bool {
+ let mut index = start;
+ let mut count = 0;
- (0..block_indicator_size as usize)
+ while index > 0 {
+ if chars[index] == '\n' {
+ return true;
+ }
+ if !QUOTE_KEYWORDS.contains(&chars[index]) {
+ return false;
+ }
+ count += 1;
+ if count > depth {
+ return false;
+ }
+ index -= 1;
+ }
+ true
+}
+
+fn is_char_repeating(chars: &Vec<char>, keyword: char, repetitions: usize, index: usize, end: usize) -> bool {
+ (0..repetitions as usize)
.all(|i| index + i <= end && chars[index + i] == keyword)
}
@@ -144,7 +209,7 @@ fn followed_by_whitespace(chars: &Vec<char>, index: usize, end: usize) -> bool {
index >= end || chars[index + 1].is_whitespace()
}
-fn seek_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Option<usize> {
+fn seek_end(chars: &Vec<char>, keyword: char, start: usize, repetitions: usize, end: usize) -> Option<usize> {
for i in start..=end {
let c = chars[i];
if c == '\n' {
@@ -153,10 +218,11 @@ fn seek_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Optio
if c == keyword
&& !chars[i - 1].is_whitespace()
&& !preceeded_by_backslash(chars, i, start)
+ && is_char_repeating(chars, keyword, repetitions, i + 1, end)
{
- match seek_higher_order_end(chars, c, i + 1, end) {
- Some(higher_order_index) => {
- return Some(higher_order_index);
+ match seek_higher_order_end(chars, c, i + 1, repetitions, end) {
+ Some(higher_order_i) => {
+ return Some(higher_order_i);
}
None => {
return Some(i);
@@ -167,7 +233,7 @@ fn seek_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Optio
None
}
-fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Option<usize> {
+fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, repetitions: usize, end: usize) -> Option<usize> {
for i in start..=end {
let c = chars[i];
if c == '\n' {
@@ -177,6 +243,7 @@ fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: us
&& chars[i - 1].is_whitespace()
&& !followed_by_whitespace(chars, i, end)
&& !preceeded_by_backslash(chars, i, start)
+ && is_char_repeating(chars, keyword, repetitions, i + 1, end)
{
return None; // "*bold* *<--- beginning of new bold>*"
}
@@ -184,6 +251,7 @@ fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: us
&& !chars[i - 1].is_whitespace()
&& followed_by_whitespace(chars, i, end)
&& !preceeded_by_backslash(chars, i, start)
+ && is_char_repeating(chars, keyword, repetitions, i + 1, end)
{
return Some(i);
}
@@ -191,6 +259,14 @@ fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, end: us
None
}
+fn seek_end_of_line(chars: &Vec<char>, start: usize, end: usize) -> usize {
+ chars[start..=end]
+ .iter()
+ .enumerate()
+ .find(|&(_, &c)| c == '\n')
+ .map_or(end + 1, |(i, _)| start + i)
+}
+
fn seek_end_of_quote(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> usize {
for i in start..=end {
if chars[i] == '\n' {
@@ -205,16 +281,21 @@ fn seek_end_of_quote(chars: &Vec<char>, start: usize, end: usize, depth: usize)
end + 1
}
-fn seek_end_block(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Option<usize> {
+fn seek_end_block(chars: &Vec<char>, keyword: char, start: usize, end: usize, depth: usize) -> Option<usize> {
for i in start..=end {
- if chars[i] == keyword
- && is_char_repeating(chars, keyword, i + 1, end)
- && !preceeded_by_backslash(chars, i, start)
+ if chars[i] == '\n'
+ && i + 4 + depth > end
+ && (depth == 0 || chars[i + 1..i + 1 + depth].iter().all(|&c| QUOTE_KEYWORDS.contains(&c)))
+ && chars[i + 1 + depth] == keyword
+ && is_char_repeating(chars, keyword, 2, i + 1 + depth, end)
{
return Some(i);
}
}
- None
+ if end == chars.len() - 1 {
+ return None;
+ }
+ Some(end)
}
fn is_quote_start(chars: &Vec<char>, index: usize, depth: usize) -> bool {
@@ -232,15 +313,6 @@ fn preceeded_by_backslash(chars: &Vec<char>, index: usize, start: usize) -> bool
num_backslashes % 2 == 1
}
-fn get_block_indicator_size(keyword: char) -> usize {
- for &(k, v) in BLOCK_KEYWORDS.iter() {
- if k == keyword {
- return v;
- }
- }
- 1 // shouldn't ever happen
-}
-
#[pymodule]
fn slidge_style_parser(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(format_body, m)?)?;
diff --git a/tests/test_style_parser.py b/tests/test_style_parser.py
index 671af64..fd02597 100644
--- a/tests/test_style_parser.py
+++ b/tests/test_style_parser.py
@@ -6,67 +6,267 @@ MATRIX_FORMATS = {
"~": ("<strike>", "</strike>"),
"`": ("<code>", "</code>"),
"```": ("<pre><code>", "</code></pre>"),
+ "```language": ("<pre><code class=\"language-{}\">", "</code></pre>"),
">": ("<blockquote>", "</blockquote>"),
"||": ("<span data-mx-spoiler>", "</span>")
}
def test_basic():
- assert(format_body("_underline_", MATRIX_FORMATS) == "<em>underline</em>")
- assert(format_body("*bold*", MATRIX_FORMATS) == "<strong>bold</strong>")
- assert(format_body("~strikethrough~", MATRIX_FORMATS) == "<strike>strikethrough</strike>")
- assert(format_body("`code span`", MATRIX_FORMATS) == "<code>code span</code>")
- assert(format_body("```code\nblock```", MATRIX_FORMATS) == "<pre><code>code\nblock</code></pre>")
- assert(format_body("||spoiler||", MATRIX_FORMATS) == "<span data-mx-spoiler>spoiler</span>")
+ test = "_underline_"
+ formatted_body = "<em>underline</em>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "*bold*"
+ formatted_body = "<strong>bold</strong>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "~strikethrough~"
+ formatted_body = "<strike>strikethrough</strike>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "`code span`"
+ formatted_body = "<code>code span</code>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = """
+ ```python
+ def test_basic():
+ test = "_underline_"
+ formatted_body = "<em>underline</em>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+ ```
+ """
+ formatted_body = test = """
+ <pre><code class="language-python">def test_basic():
+ test = "_underline_"
+ formatted_body = "<em>underline</em>"
+ assert(format_body(test, MATRIX_FORMATS) == (test, formatted_body))</pre></code>
+ """
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "```\ncode block\n```"
+ formatted_body = "<pre><code>code block</code></pre>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "||this message contains a spoiler||"
+ formatted_body = "<span data-mx-spoiler>this message contains a spoiler</span>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
def test_quotes():
- assert(format_body(">single", MATRIX_FORMATS) == "<blockquote>single</blockquote>")
- assert(format_body(">single\n>grouped", MATRIX_FORMATS) == "<blockquote>single\ngrouped</blockquote>")
- assert(format_body(">>double", MATRIX_FORMATS) == "<blockquote><blockquote>double</blockquote></blockquote>")
- assert(format_body(">>double\n>grouped single", MATRIX_FORMATS) == "<blockquote><blockquote>double</blockquote>\ngrouped single</blockquote>")
- assert(format_body(">>>tripple\n>single\n>>double", MATRIX_FORMATS) == "<blockquote><blockquote><blockquote>tripple</blockquote></blockquote>\nsingle\n<blockquote>double</blockquote></blockquote>")
+ test = ">single"
+ formatted_body = "<blockquote>single</blockquote>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = ">single arrow ->"
+ formatted_body = "<blockquote>single arrow -></blockquote>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = ">single\n>grouped"
+ formatted_body = "<blockquote>single\ngrouped</blockquote>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = ">>double"
+ formatted_body = "<blockquote><blockquote>double</blockquote></blockquote>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = ">>double\n>>double"
+ formatted_body = "<blockquote><blockquote>double\ndouble</blockquote></blockquote>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = ">>double\n&>not quote"
+ formatted_body = "<blockquote><blockquote>double</blockquote></blockquote>\n&>not quote"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = ">>double\n>grouped single"
+ formatted_body = "<blockquote><blockquote>double</blockquote>\ngrouped single</blockquote>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = ">>>tripple\n>single\n>>double"
+ formatted_body = "<blockquote><blockquote><blockquote>tripple</blockquote></blockquote>\nsingle\n<blockquote>double</blockquote></blockquote>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+CODE_BLOCK_TEST_CASE = \
+"""
+Code test
+```python3
+def who_is_awesome():
+ return "you!"
+```
+Nope
+"""
+
+CODE_BLOCK_TEST_CASE_OUTPUT = \
+"""
+Code test
+<pre><code>
+def who_is_awesome():
+ return \"you!\"
+</code></pre>
+Nope
+"""
+
+def test_code_blocks():
+ test = "```\nhacker\ncode\n```"
+ formatted_body = "<pre><code>hacker\ncode</code></pre>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "```python\nhacker code\n```"
+ formatted_body = "<pre><code class=\"language-python\">hacker code</code></pre>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = ">```java\n>why are you quoting a code block\n>```"
+ formatted_body = "<blockquote><pre><code class=\"language-java\">why are you quoting a code block</code></pre></blockquote>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = ">```\n>please stop trying to break my parser ;-;\n>```"
+ formatted_body = "<blockquote><pre><code>please stop trying to break my parser ;-;</code></pre></blockquote>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = ">>```\n>>double quote code block\n>single quote not in code block\nnormal text"
+ formatted_body = "<blockquote><blockquote><pre><code>double quote code block</code></pre></blockquote>\nsingle quote not in code block</blockquote>\nnormal text"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = ">>```\n>>>>double quote code block\n>single quote not in code block\nnormal text"
+ formatted_body = "<blockquote><blockquote><pre><code>>>double quote code block</code></pre></blockquote>\nsingle quote not in code block</blockquote>\nnormal text"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "_```_ignored\ninvalid code block\n```"
+ formatted_body = "<em>```</em>ignored\ninvalid code block\n```"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
def test_escaped():
- assert(format_body("\\_no underline_", MATRIX_FORMATS) == "_no underline_")
- assert(format_body("\\\\_no underline_", MATRIX_FORMATS) == "\\_no underline_")
- assert(format_body(">>>tripple\n\\>none\n>>double", MATRIX_FORMATS) == "<blockquote><blockquote><blockquote>tripple</blockquote></blockquote></blockquote>\n>none\n<blockquote><blockquote>double</blockquote></blockquote>")
+ test = "\\_no underline_"
+ formatted_body = "_no underline_"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "\\\\_no underline_"
+ formatted_body = "\\_no underline_"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = ">>>tripple\n\\>none\n>>double"
+ formatted_body = "<blockquote><blockquote><blockquote>tripple</blockquote></blockquote></blockquote>\n>none\n<blockquote><blockquote>double</blockquote></blockquote>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
def test_nested():
- assert(format_body("`*~_code span_~*`", MATRIX_FORMATS) == "<code>*~_code span_~*</code>")
- assert(format_body("*_~`code span`~_*", MATRIX_FORMATS) == "<strong><em><strike><code>code span</code></strike></em></strong>")
- assert(format_body(">*_~`code span`~_*", MATRIX_FORMATS) == "<blockquote><strong><em><strike><code>code span</code></strike></em></strong></blockquote>")
- assert(format_body("*bold star >*< star bold*", MATRIX_FORMATS) == "<strong>bold star >*< star bold</strong>")
- assert(format_body("*_bold*_", MATRIX_FORMATS) == "<strong>_bold</strong>_")
- assert(format_body("__underlined__", MATRIX_FORMATS) == "<em><em>underlined</em></em>")
+ test = "`*~_code span_~*`"
+ formatted_body = "<code>*~_code span_~*</code>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "*_~`code span`~_*"
+ formatted_body = "<strong><em><strike><code>code span</code></strike></em></strong>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = ">*_~`code span`~_*"
+ formatted_body = "<blockquote><strong><em><strike><code>code span</code></strike></em></strong></blockquote>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "*bold star >*< star bold*"
+ formatted_body = "<strong>bold star >*< star bold</strong>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "*_bold*_"
+ formatted_body = "<strong>_bold</strong>_"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "__underlined__"
+ formatted_body = "<em><em>underlined</em></em>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
def test_no_changes():
- assert(format_body("", MATRIX_FORMATS) == "")
- assert(format_body("~~ empty `````` styles **", MATRIX_FORMATS) == "~~ empty `````` styles **")
- assert(format_body("this is not an empty string", MATRIX_FORMATS) == "this is not an empty string")
- assert(format_body("arrow ->", MATRIX_FORMATS) == "arrow ->")
- assert(format_body(" > no quote", MATRIX_FORMATS) == " > no quote")
- assert(format_body("_not underlined", MATRIX_FORMATS) == "_not underlined")
- assert(format_body("|not a spoiler|", MATRIX_FORMATS) == "|not a spoiler|")
- assert(format_body("`no code\nblock here`", MATRIX_FORMATS) == "`no code\nblock here`")
+ test = ""
+ formatted_body = ""
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "~~ empty `````` styles **"
+ formatted_body = "~~ empty `````` styles **"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "this is not an empty string"
+ formatted_body = "this is not an empty string"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "arrow ->"
+ formatted_body = "arrow ->"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = " > no quote"
+ formatted_body = " > no quote"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "_not underlined"
+ formatted_body = "_not underlined"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "|not a spoiler|"
+ formatted_body = "|not a spoiler|"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "||\nalso\nnot\na\nspoiler||"
+ formatted_body = "||\nalso\nnot\na\nspoiler||"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "`no code\nblock here`"
+ formatted_body = "`no code\nblock here`"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "invalid ```\ncode block\n```"
+ formatted_body = "invalid ```\ncode block\n```"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "```\ncode block\ninvalid```"
+ formatted_body = "```\ncode block\ninvalid```"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "```\ncode block\n```invalid"
+ formatted_body = "```\ncode block\n```invalid"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
def test_assorted():
- assert(format_body("at the ```end```", MATRIX_FORMATS) == "at the <pre><code>end</code></pre>")
- assert(format_body("in the ~middle~ here", MATRIX_FORMATS) == "in the <strike>middle</strike> here")
- assert(format_body("_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||", MATRIX_FORMATS) == "<em>underline</em> <strong>bold</strong> <strike>strikethrough</strike> >not quote <span data-mx-spoiler>spoiler</span>\n<blockquote>quote</blockquote>\nnothing\nnothing\n<blockquote><blockquote><blockquote><blockquote>another quote with <span data-mx-spoiler><strike><em><strong><pre><code>four</code></pre></strong></em></strike></span></blockquote></blockquote></blockquote></blockquote>")
+ test = "at the ||end||"
+ formatted_body = "at the <span data-mx-spoiler>end</span>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "in the ~middle~ here"
+ formatted_body = "in the <strike>middle</strike> here"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||"
+ formatted_body = "<em>underline</em> <strong>bold</strong> <strike>strikethrough</strike> >not quote <span data-mx-spoiler>spoiler</span>\n<blockquote>quote</blockquote>\nnothing\nnothing\n<blockquote><blockquote><blockquote><blockquote>another quote with <span data-mx-spoiler><strike><em><strong>```four```</strong></em></strike></span></blockquote></blockquote></blockquote></blockquote>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
def test_weird_utf8():
- assert(format_body("โค๏ธ๐Ÿ’“๐Ÿ’•๐Ÿ’–๐Ÿ’— ```๐Ÿ’™๐Ÿ’š๐Ÿ’›๐Ÿ’œ๐Ÿ–ค``` ๐Ÿ’๐Ÿ’ž๐Ÿ’Ÿโฃ๏ธ", MATRIX_FORMATS) == "โค๏ธ๐Ÿ’“๐Ÿ’•๐Ÿ’–๐Ÿ’— <pre><code>๐Ÿ’™๐Ÿ’š๐Ÿ’›๐Ÿ’œ๐Ÿ–ค</code></pre> ๐Ÿ’๐Ÿ’ž๐Ÿ’Ÿโฃ๏ธ")
- assert(format_body("๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง _underline_๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ง", MATRIX_FORMATS) == "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง <em>underline</em>๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ง")
- assert(format_body("\u202eRight to left", MATRIX_FORMATS) == "\u202eRight to left")
- assert(format_body(">\u202eRight to left quote?", MATRIX_FORMATS) == "<blockquote>\u202eRight to left quote?</blockquote>")
- assert(format_body("_Invisible\u200bseparator_", MATRIX_FORMATS) == "<em>Invisible\u200bseparator</em>")
- assert(format_body("~\u200b~", MATRIX_FORMATS) == "<strike>\u200b</strike>")
+ test = "โค๏ธ๐Ÿ’“๐Ÿ’•๐Ÿ’–๐Ÿ’— ||๐Ÿ’™๐Ÿ’š๐Ÿ’›๐Ÿ’œ๐Ÿ–ค|| ๐Ÿ’๐Ÿ’ž๐Ÿ’Ÿโฃ๏ธ"
+ formatted_body = "โค๏ธ๐Ÿ’“๐Ÿ’•๐Ÿ’–๐Ÿ’— <span data-mx-spoiler>๐Ÿ’™๐Ÿ’š๐Ÿ’›๐Ÿ’œ๐Ÿ–ค</span> ๐Ÿ’๐Ÿ’ž๐Ÿ’Ÿโฃ๏ธ"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง _underline_๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ง"
+ formatted_body = "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง <em>underline</em>๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ง"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "\u202eRight to left"
+ formatted_body = "\u202eRight to left"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = ">\u202eRight to left quote?"
+ formatted_body = "<blockquote>\u202eRight to left quote?</blockquote>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "_Invisible\u200bseparator_"
+ formatted_body = "<em>Invisible\u200bseparator</em>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
+ test = "~\u200b~"
+ formatted_body = "<strike>\u200b</strike>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
LIMITED_FORMATS = {
"_": ("<em>", "</em>"),
"~": ("<strike>", "</strike>"),
- "`": ("<code>", "</code>"),
- "||": ("<span data-mx-spoiler>", "</span>")
+ "`": ("<code>", "</code>")
}
def test_limited():
- assert(format_body("_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||", LIMITED_FORMATS) == "<em>underline</em> *bold* <strike>strikethrough</strike> >not quote <span data-mx-spoiler>spoiler</span>\n>quote\nnothing\nnothing\n>>>>another quote with <span data-mx-spoiler><strike><em>*```four```*</em></strike></span>")
+ test = "_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||"
+ formatted_body = "<em>underline</em> *bold* <strike>strikethrough</strike> >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||<strike><em>*```four```*</em></strike>||"
+ assert(format_body(test, LIMITED_FORMATS) == formatted_body)