diff options
| -rw-r--r-- | src/parser.rs | 1 | ||||
| -rw-r--r-- | src/telegram.rs | 17 | ||||
| -rw-r--r-- | tests/test_matrix.py (renamed from tests/test_matrix_style_parser.py) | 20 | ||||
| -rw-r--r-- | tests/test_telegram.py | 268 |
4 files changed, 289 insertions, 17 deletions
diff --git a/src/parser.rs b/src/parser.rs index 53c38f6..4f5ae58 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -38,7 +38,6 @@ pub fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usi } match seek_end_block(chars, c, end_of_line, end, depth) { Some(to) => { - println!("to {}", to); if to != index + 3 && is_quote_start(chars, index, depth) { let keyword = if end_of_line == index + 3 { "```".to_string() diff --git a/src/telegram.rs b/src/telegram.rs index 74473c6..0774143 100644 --- a/src/telegram.rs +++ b/src/telegram.rs @@ -32,12 +32,7 @@ pub fn parse_for_telegram(body: String) -> PyResult<(String, Vec<(String, usize, } } - remove_tags.sort_by(|a, b| b.0.cmp(&a.0)); - - for (index, end) in remove_tags { - chars = [chars[..index].to_vec(), chars[end..].to_vec()].concat(); - } - + // is_start (*<--- start, end -->*), index of all_indexes, format, index of tag, language of codeblock let mut message_entities: Vec<(bool, usize, String, usize, String)> = Vec::with_capacity(styles.len() * 2); let mut all_indexes: Vec<Vec<usize>> = Vec::with_capacity(styles.len()); for (keyword, start, remove_start, end, remove_end) in &styles { @@ -49,7 +44,7 @@ pub fn parse_for_telegram(body: String) -> PyResult<(String, Vec<(String, usize, } else { "".to_string() }; - all_indexes.push(vec![*start, *remove_start, *end, *remove_end]); + all_indexes.push(vec![*start, *remove_start - *start, *end, *remove_end - *end]); let last_index = all_indexes.len() - 1; message_entities.push((true, last_index, TELEGRAM_STYLES.iter().find(|&&(k, _)| k == keyword).unwrap().1.to_string(), *start, language)); message_entities.push((false, last_index, "".to_string(), *end, "".to_string())); @@ -60,6 +55,12 @@ pub fn parse_for_telegram(body: String) -> PyResult<(String, Vec<(String, usize, } message_entities.sort_by(sort_message_entities); + remove_tags.sort_by(|a, b| b.0.cmp(&a.0)); + + for (index, end) in remove_tags { + chars = [chars[..index].to_vec(), chars[end..].to_vec()].concat(); + } + let formatted_text = chars.into_iter().collect::<String>(); let utf16_lengths: Vec<usize> = utf8_to_utf16_length(&formatted_text); @@ -78,7 +79,7 @@ pub fn parse_for_telegram(body: String) -> PyResult<(String, Vec<(String, usize, formatted_text, message_entities.into_iter() .filter(|(is_start, _, _, _, _)| { *is_start } ) - .map(|(_, index, format, _, language)| { (format, utf16_lengths[all_indexes[index][0]], utf16_lengths[all_indexes[index][2]], language) }) + .map(|(_, index, format, _, language)| { (format, utf16_lengths[all_indexes[index][0]], utf16_lengths[all_indexes[index][2] - 1] - utf16_lengths[all_indexes[index][0]], language) }) .collect() )) } diff --git a/tests/test_matrix_style_parser.py b/tests/test_matrix.py index b8ed112..0bc6f1a 100644 --- a/tests/test_matrix_style_parser.py +++ b/tests/test_matrix.py @@ -30,14 +30,14 @@ def test_basic(): assert(format_body(test, MATRIX_FORMATS) == formatted_body) test = """ - ```python - def test_basic(): - test = "_underline_" - formatted_body = "<em>underline</em>" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - ``` - """ - formatted_body = test = """<pre><code class="language-python">def test_basic():<br> test = "_underline_"<br> formatted_body = "<em>underline</em>"<br> assert(format_body(test, MATRIX_FORMATS) == (test, formatted_body))</pre></code><br>""" +```python + def test_basic(): + test = "_underline_" + formatted_body = "<em>underline</em>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) +``` +""" + formatted_body = '<br><pre><code class="language-python"> def test_basic():<br> test = "_underline_"<br> formatted_body = "<em>underline</em>"<br> assert(format_body(test, MATRIX_FORMATS) == formatted_body)</code></pre><br>' assert(format_body(test, MATRIX_FORMATS) == formatted_body) test = "```\ncode block\n```" @@ -90,6 +90,10 @@ def test_code_blocks(): formatted_body = "<pre><code class=\"language-python\">hacker code</code></pre>" assert(format_body(test, MATRIX_FORMATS) == formatted_body) + test = "```pythonaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\nhacker code\n```" + formatted_body = "<pre><code class=\"language-pythonaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\">hacker code</code></pre>" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + test = "```python\nhacker code\n```\nnormal text" formatted_body = "<pre><code class=\"language-python\">hacker code</code></pre><br>normal text" assert(format_body(test, MATRIX_FORMATS) == formatted_body) diff --git a/tests/test_telegram.py b/tests/test_telegram.py new file mode 100644 index 0000000..164a697 --- /dev/null +++ b/tests/test_telegram.py @@ -0,0 +1,268 @@ +from slidge_style_parser import parse_for_telegram + +def test_basic(): + test = "_underline_" + formatted_body = "underline" + styles = [('italics', 1, 8, '')] + assert(parse_for_telegram(test) == (formatted_body, styles)) + + test = "*bold*" + formatted_body = "bold" + styles = [('bold', 1, 3, '')] + assert(parse_for_telegram(test) == (formatted_body, styles)) + + test = "~strikethrough~" + formatted_body = "strikethrough" + styles = [('strikethrough', 1, 12, '')] + assert(parse_for_telegram(test) == (formatted_body, styles)) + + test = "`code span`" + formatted_body = "code span" + styles = [('code', 1, 8, '')] + assert(parse_for_telegram(test) == (formatted_body, styles)) + + test = """ +```python + def test_basic(): + test = "_underline_" + formatted_body = "underline" + assert(parse_for_telegram(test)[0] == formatted_body) +``` +""" + formatted_body = '\n def test_basic():\n test = "_underline_"\n formatted_body = "underline"\n assert(parse_for_telegram(test)[0] == formatted_body)\n' + styles = [('pre', 2, 148, 'python')] + assert(parse_for_telegram(test) == (formatted_body, styles)) + + test = "```\ncode block\n```" + formatted_body = "code block" + styles = [('pre', 1, 9, '')] + assert(parse_for_telegram(test) == (formatted_body, styles)) + + test = "||this message contains a spoiler||" + formatted_body = "this message contains a spoiler" + styles = [('spoiler', 1, 30, '')] + assert(parse_for_telegram(test) == (formatted_body, styles)) + +def test_quotes(): + test = ">single" + formatted_body = ">single" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">single arrow ->" + formatted_body = ">single arrow ->" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">single\n>grouped" + formatted_body = ">single\n>grouped" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">>double" + formatted_body = ">>double" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">>double\n>>double" + formatted_body = ">>double\n>>double" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">>double\n&>not quote" + formatted_body = ">>double\n&>not quote" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">>double\n>grouped single" + formatted_body = ">>double\n>grouped single" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">>>tripple\n>single\n>>double" + formatted_body = ">>>tripple\n>single\n>>double" + assert(parse_for_telegram(test)[0] == formatted_body) + +def test_code_blocks(): + test = "```\nhacker\ncode\n```" + formatted_body = "hacker\ncode" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "```python\nhacker code\n```" + formatted_body = "hacker code" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "```pythonaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\nhacker code\n```" + formatted_body = "hacker code" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "```python\nhacker code\n```\nnormal text" + formatted_body = "hacker code\nnormal text" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "```python\nhacker code\n```\nnormal text\n```java\npublic static void main(String [])\n```" + formatted_body = "hacker code\nnormal text\npublic static void main(String [])" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">```java\n>why are you quoting a code block\n>```" + formatted_body = ">why are you quoting a code block" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">>```\n>>double quote code block\n>single quote not in code block\nnormal text" + formatted_body = ">>double quote code block\n>single quote not in code block\nnormal text" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">```\n>please stop trying to break my parser ;-;" + formatted_body = ">please stop trying to break my parser ;-;" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">>```\n>>>>double quote code block\n>single quote not in code block\nnormal text" + formatted_body = ">>>>double quote code block\n>single quote not in code block\nnormal text" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "_```_ignored\ninvalid code block\n```" + formatted_body = "```ignored\ninvalid code block\n```" + assert(parse_for_telegram(test)[0] == formatted_body) + + +def test_escaped(): + test = "\\_no underline_" + formatted_body = "_no underline_" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "\\\\_no underline_" + formatted_body = "\\_no underline_" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">>>tripple\n\\>none\n>>double" + formatted_body = ">>>tripple\n>none\n>>double" + assert(parse_for_telegram(test)[0] == formatted_body) + +def test_nested(): + test = "`*~_code span_~*`" + formatted_body = "*~_code span_~*" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "*_~`code span`~_*" + formatted_body = "code span" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">*_~`code span`~_*" + formatted_body = ">code span" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "*bold star >*< star bold*" + formatted_body = "bold star >*< star bold" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "*_bold*_" + formatted_body = "_bold_" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "__underlined__" + formatted_body = "underlined" + assert(parse_for_telegram(test)[0] == formatted_body) + +def test_no_changes(): + test = "" + formatted_body = "" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "~~ empty `````` styles **" + formatted_body = "~~ empty `````` styles **" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "this is not an empty string" + formatted_body = "this is not an empty string" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "arrow ->" + formatted_body = "arrow ->" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = " > no quote" + formatted_body = " > no quote" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "_not underlined" + formatted_body = "_not underlined" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "|not a spoiler|" + formatted_body = "|not a spoiler|" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "||\nalso\nnot\na\nspoiler||" + formatted_body = "||\nalso\nnot\na\nspoiler||" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "`no code\nblock here`" + formatted_body = "`no code\nblock here`" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "invalid ```\ncode block\n```" + formatted_body = "invalid ```\ncode block\n```" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "```\ncode block\ninvalid```" + formatted_body = "```\ncode block\ninvalid```" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "```\ncode block\n```invalid" + formatted_body = "```\ncode block\n```invalid" + assert(parse_for_telegram(test)[0] == formatted_body) + +def test_assorted(): + test = "\n" + formatted_body = "\n" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "at the ||end||" + formatted_body = "at the end" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "in the ~middle~ here" + formatted_body = "in the middle here" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||" + formatted_body = "underline bold strikethrough >not quote spoiler\n>quote\nnothing\nnothing\n>>>>another quote with ```four```" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">```\n>do be do be dooo ba do be do be do ba\n>>>" + formatted_body = ">do be do be dooo ba do be do be do ba\n>>" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "\n\n>```\n>do be do be dooo ba do be do be do ba\na\n\n\naoeu\n" + formatted_body = "\n\n>do be do be dooo ba do be do be do ba\na\n\n\naoeu\n" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">```\n>do be do be dooo ba do be do be do ba\n>\n>\n>aoeu" + formatted_body = ">do be do be dooo ba do be do be do ba\n\n\naoeu" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">```\n>code block\n>```invalid end\n" + formatted_body = ">code block\n```invalid end\n" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "invalid ```\ncode block\n*bold*\n```" + formatted_body = "invalid ```\ncode block\nbold\n```" + assert(parse_for_telegram(test)[0] == formatted_body) + +def test_weird_utf8(): + test = "β€οΈππππ ||πππππ€|| πππβ£οΈ" + formatted_body = "β€οΈππππ πππππ€ πππβ£οΈ" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "π¨βπ©βπ§βπ§ _underline_π©βπ©βπ¦βπ§" + formatted_body = "π¨βπ©βπ§βπ§ underlineπ©βπ©βπ¦βπ§" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "\u202eRight to left" + formatted_body = "\u202eRight to left" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">\u202eRight to left quote?" + formatted_body = ">\u202eRight to left quote?" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "_Invisible\u200bseparator_" + formatted_body = "Invisible\u200bseparator" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "~\u200b~" + formatted_body = "\u200b" + assert(parse_for_telegram(test)[0] == formatted_body) |
