From 788f64389f5479fcd23844740e7401a8a7870294 Mon Sep 17 00:00:00 2001 From: SavagePeanut Date: Sun, 3 Sep 2023 17:28:57 -0500 Subject: tests passing --- src/parser.rs | 1 - src/telegram.rs | 17 +-- tests/test_matrix.py | 284 ++++++++++++++++++++++++++++++++++++++ tests/test_matrix_style_parser.py | 280 ------------------------------------- tests/test_telegram.py | 268 +++++++++++++++++++++++++++++++++++ 5 files changed, 561 insertions(+), 289 deletions(-) create mode 100644 tests/test_matrix.py delete mode 100644 tests/test_matrix_style_parser.py create mode 100644 tests/test_telegram.py diff --git a/src/parser.rs b/src/parser.rs index 53c38f6..4f5ae58 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -38,7 +38,6 @@ pub fn parse_with_limits(chars: &Vec, start: usize, end: usize, depth: usi } match seek_end_block(chars, c, end_of_line, end, depth) { Some(to) => { - println!("to {}", to); if to != index + 3 && is_quote_start(chars, index, depth) { let keyword = if end_of_line == index + 3 { "```".to_string() diff --git a/src/telegram.rs b/src/telegram.rs index 74473c6..0774143 100644 --- a/src/telegram.rs +++ b/src/telegram.rs @@ -32,12 +32,7 @@ pub fn parse_for_telegram(body: String) -> PyResult<(String, Vec<(String, usize, } } - remove_tags.sort_by(|a, b| b.0.cmp(&a.0)); - - for (index, end) in remove_tags { - chars = [chars[..index].to_vec(), chars[end..].to_vec()].concat(); - } - + // is_start (*<--- start, end -->*), index of all_indexes, format, index of tag, language of codeblock let mut message_entities: Vec<(bool, usize, String, usize, String)> = Vec::with_capacity(styles.len() * 2); let mut all_indexes: Vec> = Vec::with_capacity(styles.len()); for (keyword, start, remove_start, end, remove_end) in &styles { @@ -49,7 +44,7 @@ pub fn parse_for_telegram(body: String) -> PyResult<(String, Vec<(String, usize, } else { "".to_string() }; - all_indexes.push(vec![*start, *remove_start, *end, *remove_end]); + all_indexes.push(vec![*start, *remove_start - *start, *end, *remove_end - *end]); let last_index = all_indexes.len() - 1; message_entities.push((true, last_index, TELEGRAM_STYLES.iter().find(|&&(k, _)| k == keyword).unwrap().1.to_string(), *start, language)); message_entities.push((false, last_index, "".to_string(), *end, "".to_string())); @@ -60,6 +55,12 @@ pub fn parse_for_telegram(body: String) -> PyResult<(String, Vec<(String, usize, } message_entities.sort_by(sort_message_entities); + remove_tags.sort_by(|a, b| b.0.cmp(&a.0)); + + for (index, end) in remove_tags { + chars = [chars[..index].to_vec(), chars[end..].to_vec()].concat(); + } + let formatted_text = chars.into_iter().collect::(); let utf16_lengths: Vec = utf8_to_utf16_length(&formatted_text); @@ -78,7 +79,7 @@ pub fn parse_for_telegram(body: String) -> PyResult<(String, Vec<(String, usize, formatted_text, message_entities.into_iter() .filter(|(is_start, _, _, _, _)| { *is_start } ) - .map(|(_, index, format, _, language)| { (format, utf16_lengths[all_indexes[index][0]], utf16_lengths[all_indexes[index][2]], language) }) + .map(|(_, index, format, _, language)| { (format, utf16_lengths[all_indexes[index][0]], utf16_lengths[all_indexes[index][2] - 1] - utf16_lengths[all_indexes[index][0]], language) }) .collect() )) } diff --git a/tests/test_matrix.py b/tests/test_matrix.py new file mode 100644 index 0000000..0bc6f1a --- /dev/null +++ b/tests/test_matrix.py @@ -0,0 +1,284 @@ +from slidge_style_parser import format_body + +MATRIX_FORMATS = { + "_": ("", ""), + "*": ("", ""), + "~": ("", ""), + "`": ("", ""), + "```": ("
", "
"), + "```language": ("
", "
"), + ">": ("
", "
"), + "||": ("", ""), + "\n": ("
", "") +} + +def test_basic(): + test = "_underline_" + formatted_body = "underline" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "*bold*" + formatted_body = "bold" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "~strikethrough~" + formatted_body = "strikethrough" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "`code span`" + formatted_body = "code span" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = """ +```python + def test_basic(): + test = "_underline_" + formatted_body = "underline" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) +``` +""" + formatted_body = '
    def test_basic():
test = "_underline_"
formatted_body = "underline"
assert(format_body(test, MATRIX_FORMATS) == formatted_body)

' + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "```\ncode block\n```" + formatted_body = "
code block
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "||this message contains a spoiler||" + formatted_body = "this message contains a spoiler" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + +def test_quotes(): + test = ">single" + formatted_body = "
single
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">single arrow ->" + formatted_body = "
single arrow ->
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">single\n>grouped" + formatted_body = "
single
grouped
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">>double" + formatted_body = "
double
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">>double\n>>double" + formatted_body = "
double
double
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">>double\n&>not quote" + formatted_body = "
double

&>not quote" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">>double\n>grouped single" + formatted_body = "
double

grouped single
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">>>tripple\n>single\n>>double" + formatted_body = "
tripple

single
double
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + +def test_code_blocks(): + test = "```\nhacker\ncode\n```" + formatted_body = "
hacker
code
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "```python\nhacker code\n```" + formatted_body = "
hacker code
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "```pythonaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\nhacker code\n```" + formatted_body = "
hacker code
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "```python\nhacker code\n```\nnormal text" + formatted_body = "
hacker code

normal text" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "```python\nhacker code\n```\nnormal text\n```java\npublic static void main(String [])\n```" + formatted_body = "
hacker code

normal text
public static void main(String [])
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">```java\n>why are you quoting a code block\n>```" + formatted_body = "
why are you quoting a code block
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">>```\n>>double quote code block\n>single quote not in code block\nnormal text" + formatted_body = "
double quote code block

single quote not in code block

normal text" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">```\n>please stop trying to break my parser ;-;" + formatted_body = "
please stop trying to break my parser ;-;
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">>```\n>>>>double quote code block\n>single quote not in code block\nnormal text" + formatted_body = "
>>double quote code block

single quote not in code block

normal text" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "_```_ignored\ninvalid code block\n```" + formatted_body = "```ignored
invalid code block
```" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + +def test_escaped(): + test = "\\_no underline_" + formatted_body = "_no underline_" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "\\\\_no underline_" + formatted_body = "\\_no underline_" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">>>tripple\n\\>none\n>>double" + formatted_body = "
tripple

>none
double
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + +def test_nested(): + test = "`*~_code span_~*`" + formatted_body = "*~_code span_~*" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "*_~`code span`~_*" + formatted_body = "code span" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">*_~`code span`~_*" + formatted_body = "
code span
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "*bold star >*< star bold*" + formatted_body = "bold star >*< star bold" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "*_bold*_" + formatted_body = "_bold_" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "__underlined__" + formatted_body = "underlined" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + +def test_no_changes(): + test = "" + formatted_body = "" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "~~ empty `````` styles **" + formatted_body = "~~ empty `````` styles **" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "this is not an empty string" + formatted_body = "this is not an empty string" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "arrow ->" + formatted_body = "arrow ->" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = " > no quote" + formatted_body = " > no quote" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "_not underlined" + formatted_body = "_not underlined" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "|not a spoiler|" + formatted_body = "|not a spoiler|" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "||\nalso\nnot\na\nspoiler||" + formatted_body = "||
also
not
a
spoiler||" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "`no code\nblock here`" + formatted_body = "`no code
block here`" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "invalid ```\ncode block\n```" + formatted_body = "invalid ```
code block
```" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "```\ncode block\ninvalid```" + formatted_body = "```
code block
invalid```" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "```\ncode block\n```invalid" + formatted_body = "```
code block
```invalid" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + +def test_assorted(): + test = "\n" + formatted_body = "
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "at the ||end||" + formatted_body = "at the end" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "in the ~middle~ here" + formatted_body = "in the middle here" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||" + formatted_body = "underline bold strikethrough >not quote spoiler
quote

nothing
nothing
another quote with ```four```
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">```\n>do be do be dooo ba do be do be do ba\n>>>" + formatted_body = "
do be do be dooo ba do be do be do ba
>>
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "\n\n>```\n>do be do be dooo ba do be do be do ba\na\n\n\naoeu\n" + formatted_body = "

do be do be dooo ba do be do be do ba

a


aoeu
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">```\n>do be do be dooo ba do be do be do ba\n>\n>\n>aoeu" + formatted_body = "
do be do be dooo ba do be do be do ba


aoeu
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">```\n>code block\n>```invalid end\n" + formatted_body = "
code block
```invalid end

" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "invalid ```\ncode block\n*bold*\n```" + formatted_body = "invalid ```
code block
bold
```" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + +def test_weird_utf8(): + test = "β€οΈπŸ’“πŸ’•πŸ’–πŸ’— ||πŸ’™πŸ’šπŸ’›πŸ’œπŸ–€|| πŸ’πŸ’žπŸ’Ÿβ£οΈ" + formatted_body = "β€οΈπŸ’“πŸ’•πŸ’–πŸ’— πŸ’™πŸ’šπŸ’›πŸ’œπŸ–€ πŸ’πŸ’žπŸ’Ÿβ£οΈ" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘§ _underline_πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘§" + formatted_body = "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘§ underlineπŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘§" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "\u202eRight to left" + formatted_body = "\u202eRight to left" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = ">\u202eRight to left quote?" + formatted_body = "
\u202eRight to left quote?
" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "_Invisible\u200bseparator_" + formatted_body = "Invisible\u200bseparator" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + + test = "~\u200b~" + formatted_body = "\u200b" + assert(format_body(test, MATRIX_FORMATS) == formatted_body) + +LIMITED_FORMATS = { + "_": ("", ""), + "~": ("", ""), + "`": ("", "") +} + +def test_limited(): + test = "_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||" + formatted_body = "underline *bold* strikethrough >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||*```four```*||" + assert(format_body(test, LIMITED_FORMATS) == formatted_body) diff --git a/tests/test_matrix_style_parser.py b/tests/test_matrix_style_parser.py deleted file mode 100644 index b8ed112..0000000 --- a/tests/test_matrix_style_parser.py +++ /dev/null @@ -1,280 +0,0 @@ -from slidge_style_parser import format_body - -MATRIX_FORMATS = { - "_": ("", ""), - "*": ("", ""), - "~": ("", ""), - "`": ("", ""), - "```": ("
", "
"), - "```language": ("
", "
"), - ">": ("
", "
"), - "||": ("", ""), - "\n": ("
", "") -} - -def test_basic(): - test = "_underline_" - formatted_body = "underline" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "*bold*" - formatted_body = "bold" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "~strikethrough~" - formatted_body = "strikethrough" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "`code span`" - formatted_body = "code span" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = """ - ```python - def test_basic(): - test = "_underline_" - formatted_body = "underline" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - ``` - """ - formatted_body = test = """
def test_basic():
test = "_underline_"
formatted_body = "underline"
assert(format_body(test, MATRIX_FORMATS) == (test, formatted_body))

""" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "```\ncode block\n```" - formatted_body = "
code block
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "||this message contains a spoiler||" - formatted_body = "this message contains a spoiler" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - -def test_quotes(): - test = ">single" - formatted_body = "
single
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = ">single arrow ->" - formatted_body = "
single arrow ->
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = ">single\n>grouped" - formatted_body = "
single
grouped
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = ">>double" - formatted_body = "
double
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = ">>double\n>>double" - formatted_body = "
double
double
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = ">>double\n&>not quote" - formatted_body = "
double

&>not quote" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = ">>double\n>grouped single" - formatted_body = "
double

grouped single
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = ">>>tripple\n>single\n>>double" - formatted_body = "
tripple

single
double
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - -def test_code_blocks(): - test = "```\nhacker\ncode\n```" - formatted_body = "
hacker
code
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "```python\nhacker code\n```" - formatted_body = "
hacker code
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "```python\nhacker code\n```\nnormal text" - formatted_body = "
hacker code

normal text" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "```python\nhacker code\n```\nnormal text\n```java\npublic static void main(String [])\n```" - formatted_body = "
hacker code

normal text
public static void main(String [])
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = ">```java\n>why are you quoting a code block\n>```" - formatted_body = "
why are you quoting a code block
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = ">>```\n>>double quote code block\n>single quote not in code block\nnormal text" - formatted_body = "
double quote code block

single quote not in code block

normal text" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = ">```\n>please stop trying to break my parser ;-;" - formatted_body = "
please stop trying to break my parser ;-;
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = ">>```\n>>>>double quote code block\n>single quote not in code block\nnormal text" - formatted_body = "
>>double quote code block

single quote not in code block

normal text" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "_```_ignored\ninvalid code block\n```" - formatted_body = "```ignored
invalid code block
```" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - -def test_escaped(): - test = "\\_no underline_" - formatted_body = "_no underline_" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "\\\\_no underline_" - formatted_body = "\\_no underline_" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = ">>>tripple\n\\>none\n>>double" - formatted_body = "
tripple

>none
double
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - -def test_nested(): - test = "`*~_code span_~*`" - formatted_body = "*~_code span_~*" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "*_~`code span`~_*" - formatted_body = "code span" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = ">*_~`code span`~_*" - formatted_body = "
code span
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "*bold star >*< star bold*" - formatted_body = "bold star >*< star bold" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "*_bold*_" - formatted_body = "_bold_" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "__underlined__" - formatted_body = "underlined" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - -def test_no_changes(): - test = "" - formatted_body = "" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "~~ empty `````` styles **" - formatted_body = "~~ empty `````` styles **" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "this is not an empty string" - formatted_body = "this is not an empty string" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "arrow ->" - formatted_body = "arrow ->" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = " > no quote" - formatted_body = " > no quote" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "_not underlined" - formatted_body = "_not underlined" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "|not a spoiler|" - formatted_body = "|not a spoiler|" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "||\nalso\nnot\na\nspoiler||" - formatted_body = "||
also
not
a
spoiler||" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "`no code\nblock here`" - formatted_body = "`no code
block here`" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "invalid ```\ncode block\n```" - formatted_body = "invalid ```
code block
```" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "```\ncode block\ninvalid```" - formatted_body = "```
code block
invalid```" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "```\ncode block\n```invalid" - formatted_body = "```
code block
```invalid" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - -def test_assorted(): - test = "\n" - formatted_body = "
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "at the ||end||" - formatted_body = "at the end" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "in the ~middle~ here" - formatted_body = "in the middle here" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||" - formatted_body = "underline bold strikethrough >not quote spoiler
quote

nothing
nothing
another quote with ```four```
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = ">```\n>do be do be dooo ba do be do be do ba\n>>>" - formatted_body = "
do be do be dooo ba do be do be do ba
>>
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "\n\n>```\n>do be do be dooo ba do be do be do ba\na\n\n\naoeu\n" - formatted_body = "

do be do be dooo ba do be do be do ba

a


aoeu
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = ">```\n>do be do be dooo ba do be do be do ba\n>\n>\n>aoeu" - formatted_body = "
do be do be dooo ba do be do be do ba


aoeu
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = ">```\n>code block\n>```invalid end\n" - formatted_body = "
code block
```invalid end

" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "invalid ```\ncode block\n*bold*\n```" - formatted_body = "invalid ```
code block
bold
```" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - -def test_weird_utf8(): - test = "β€οΈπŸ’“πŸ’•πŸ’–πŸ’— ||πŸ’™πŸ’šπŸ’›πŸ’œπŸ–€|| πŸ’πŸ’žπŸ’Ÿβ£οΈ" - formatted_body = "β€οΈπŸ’“πŸ’•πŸ’–πŸ’— πŸ’™πŸ’šπŸ’›πŸ’œπŸ–€ πŸ’πŸ’žπŸ’Ÿβ£οΈ" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘§ _underline_πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘§" - formatted_body = "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘§ underlineπŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘§" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "\u202eRight to left" - formatted_body = "\u202eRight to left" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = ">\u202eRight to left quote?" - formatted_body = "
\u202eRight to left quote?
" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "_Invisible\u200bseparator_" - formatted_body = "Invisible\u200bseparator" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - - test = "~\u200b~" - formatted_body = "\u200b" - assert(format_body(test, MATRIX_FORMATS) == formatted_body) - -LIMITED_FORMATS = { - "_": ("", ""), - "~": ("", ""), - "`": ("", "") -} - -def test_limited(): - test = "_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||" - formatted_body = "underline *bold* strikethrough >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||*```four```*||" - assert(format_body(test, LIMITED_FORMATS) == formatted_body) diff --git a/tests/test_telegram.py b/tests/test_telegram.py new file mode 100644 index 0000000..164a697 --- /dev/null +++ b/tests/test_telegram.py @@ -0,0 +1,268 @@ +from slidge_style_parser import parse_for_telegram + +def test_basic(): + test = "_underline_" + formatted_body = "underline" + styles = [('italics', 1, 8, '')] + assert(parse_for_telegram(test) == (formatted_body, styles)) + + test = "*bold*" + formatted_body = "bold" + styles = [('bold', 1, 3, '')] + assert(parse_for_telegram(test) == (formatted_body, styles)) + + test = "~strikethrough~" + formatted_body = "strikethrough" + styles = [('strikethrough', 1, 12, '')] + assert(parse_for_telegram(test) == (formatted_body, styles)) + + test = "`code span`" + formatted_body = "code span" + styles = [('code', 1, 8, '')] + assert(parse_for_telegram(test) == (formatted_body, styles)) + + test = """ +```python + def test_basic(): + test = "_underline_" + formatted_body = "underline" + assert(parse_for_telegram(test)[0] == formatted_body) +``` +""" + formatted_body = '\n def test_basic():\n test = "_underline_"\n formatted_body = "underline"\n assert(parse_for_telegram(test)[0] == formatted_body)\n' + styles = [('pre', 2, 148, 'python')] + assert(parse_for_telegram(test) == (formatted_body, styles)) + + test = "```\ncode block\n```" + formatted_body = "code block" + styles = [('pre', 1, 9, '')] + assert(parse_for_telegram(test) == (formatted_body, styles)) + + test = "||this message contains a spoiler||" + formatted_body = "this message contains a spoiler" + styles = [('spoiler', 1, 30, '')] + assert(parse_for_telegram(test) == (formatted_body, styles)) + +def test_quotes(): + test = ">single" + formatted_body = ">single" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">single arrow ->" + formatted_body = ">single arrow ->" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">single\n>grouped" + formatted_body = ">single\n>grouped" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">>double" + formatted_body = ">>double" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">>double\n>>double" + formatted_body = ">>double\n>>double" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">>double\n&>not quote" + formatted_body = ">>double\n&>not quote" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">>double\n>grouped single" + formatted_body = ">>double\n>grouped single" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">>>tripple\n>single\n>>double" + formatted_body = ">>>tripple\n>single\n>>double" + assert(parse_for_telegram(test)[0] == formatted_body) + +def test_code_blocks(): + test = "```\nhacker\ncode\n```" + formatted_body = "hacker\ncode" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "```python\nhacker code\n```" + formatted_body = "hacker code" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "```pythonaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\nhacker code\n```" + formatted_body = "hacker code" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "```python\nhacker code\n```\nnormal text" + formatted_body = "hacker code\nnormal text" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "```python\nhacker code\n```\nnormal text\n```java\npublic static void main(String [])\n```" + formatted_body = "hacker code\nnormal text\npublic static void main(String [])" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">```java\n>why are you quoting a code block\n>```" + formatted_body = ">why are you quoting a code block" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">>```\n>>double quote code block\n>single quote not in code block\nnormal text" + formatted_body = ">>double quote code block\n>single quote not in code block\nnormal text" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">```\n>please stop trying to break my parser ;-;" + formatted_body = ">please stop trying to break my parser ;-;" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">>```\n>>>>double quote code block\n>single quote not in code block\nnormal text" + formatted_body = ">>>>double quote code block\n>single quote not in code block\nnormal text" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "_```_ignored\ninvalid code block\n```" + formatted_body = "```ignored\ninvalid code block\n```" + assert(parse_for_telegram(test)[0] == formatted_body) + + +def test_escaped(): + test = "\\_no underline_" + formatted_body = "_no underline_" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "\\\\_no underline_" + formatted_body = "\\_no underline_" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">>>tripple\n\\>none\n>>double" + formatted_body = ">>>tripple\n>none\n>>double" + assert(parse_for_telegram(test)[0] == formatted_body) + +def test_nested(): + test = "`*~_code span_~*`" + formatted_body = "*~_code span_~*" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "*_~`code span`~_*" + formatted_body = "code span" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">*_~`code span`~_*" + formatted_body = ">code span" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "*bold star >*< star bold*" + formatted_body = "bold star >*< star bold" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "*_bold*_" + formatted_body = "_bold_" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "__underlined__" + formatted_body = "underlined" + assert(parse_for_telegram(test)[0] == formatted_body) + +def test_no_changes(): + test = "" + formatted_body = "" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "~~ empty `````` styles **" + formatted_body = "~~ empty `````` styles **" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "this is not an empty string" + formatted_body = "this is not an empty string" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "arrow ->" + formatted_body = "arrow ->" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = " > no quote" + formatted_body = " > no quote" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "_not underlined" + formatted_body = "_not underlined" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "|not a spoiler|" + formatted_body = "|not a spoiler|" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "||\nalso\nnot\na\nspoiler||" + formatted_body = "||\nalso\nnot\na\nspoiler||" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "`no code\nblock here`" + formatted_body = "`no code\nblock here`" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "invalid ```\ncode block\n```" + formatted_body = "invalid ```\ncode block\n```" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "```\ncode block\ninvalid```" + formatted_body = "```\ncode block\ninvalid```" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "```\ncode block\n```invalid" + formatted_body = "```\ncode block\n```invalid" + assert(parse_for_telegram(test)[0] == formatted_body) + +def test_assorted(): + test = "\n" + formatted_body = "\n" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "at the ||end||" + formatted_body = "at the end" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "in the ~middle~ here" + formatted_body = "in the middle here" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||" + formatted_body = "underline bold strikethrough >not quote spoiler\n>quote\nnothing\nnothing\n>>>>another quote with ```four```" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">```\n>do be do be dooo ba do be do be do ba\n>>>" + formatted_body = ">do be do be dooo ba do be do be do ba\n>>" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "\n\n>```\n>do be do be dooo ba do be do be do ba\na\n\n\naoeu\n" + formatted_body = "\n\n>do be do be dooo ba do be do be do ba\na\n\n\naoeu\n" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">```\n>do be do be dooo ba do be do be do ba\n>\n>\n>aoeu" + formatted_body = ">do be do be dooo ba do be do be do ba\n\n\naoeu" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">```\n>code block\n>```invalid end\n" + formatted_body = ">code block\n```invalid end\n" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "invalid ```\ncode block\n*bold*\n```" + formatted_body = "invalid ```\ncode block\nbold\n```" + assert(parse_for_telegram(test)[0] == formatted_body) + +def test_weird_utf8(): + test = "β€οΈπŸ’“πŸ’•πŸ’–πŸ’— ||πŸ’™πŸ’šπŸ’›πŸ’œπŸ–€|| πŸ’πŸ’žπŸ’Ÿβ£οΈ" + formatted_body = "β€οΈπŸ’“πŸ’•πŸ’–πŸ’— πŸ’™πŸ’šπŸ’›πŸ’œπŸ–€ πŸ’πŸ’žπŸ’Ÿβ£οΈ" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘§ _underline_πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘§" + formatted_body = "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘§ underlineπŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘§" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "\u202eRight to left" + formatted_body = "\u202eRight to left" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = ">\u202eRight to left quote?" + formatted_body = ">\u202eRight to left quote?" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "_Invisible\u200bseparator_" + formatted_body = "Invisible\u200bseparator" + assert(parse_for_telegram(test)[0] == formatted_body) + + test = "~\u200b~" + formatted_body = "\u200b" + assert(parse_for_telegram(test)[0] == formatted_body) -- cgit v1.2.3