summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/parser.rs1
-rw-r--r--src/telegram.rs17
-rw-r--r--tests/test_matrix.py (renamed from tests/test_matrix_style_parser.py)20
-rw-r--r--tests/test_telegram.py268
4 files changed, 289 insertions, 17 deletions
diff --git a/src/parser.rs b/src/parser.rs
index 53c38f6..4f5ae58 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -38,7 +38,6 @@ pub fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usi
}
match seek_end_block(chars, c, end_of_line, end, depth) {
Some(to) => {
- println!("to {}", to);
if to != index + 3 && is_quote_start(chars, index, depth) {
let keyword = if end_of_line == index + 3 {
"```".to_string()
diff --git a/src/telegram.rs b/src/telegram.rs
index 74473c6..0774143 100644
--- a/src/telegram.rs
+++ b/src/telegram.rs
@@ -32,12 +32,7 @@ pub fn parse_for_telegram(body: String) -> PyResult<(String, Vec<(String, usize,
}
}
- remove_tags.sort_by(|a, b| b.0.cmp(&a.0));
-
- for (index, end) in remove_tags {
- chars = [chars[..index].to_vec(), chars[end..].to_vec()].concat();
- }
-
+ // is_start (*<--- start, end -->*), index of all_indexes, format, index of tag, language of codeblock
let mut message_entities: Vec<(bool, usize, String, usize, String)> = Vec::with_capacity(styles.len() * 2);
let mut all_indexes: Vec<Vec<usize>> = Vec::with_capacity(styles.len());
for (keyword, start, remove_start, end, remove_end) in &styles {
@@ -49,7 +44,7 @@ pub fn parse_for_telegram(body: String) -> PyResult<(String, Vec<(String, usize,
} else {
"".to_string()
};
- all_indexes.push(vec![*start, *remove_start, *end, *remove_end]);
+ all_indexes.push(vec![*start, *remove_start - *start, *end, *remove_end - *end]);
let last_index = all_indexes.len() - 1;
message_entities.push((true, last_index, TELEGRAM_STYLES.iter().find(|&&(k, _)| k == keyword).unwrap().1.to_string(), *start, language));
message_entities.push((false, last_index, "".to_string(), *end, "".to_string()));
@@ -60,6 +55,12 @@ pub fn parse_for_telegram(body: String) -> PyResult<(String, Vec<(String, usize,
}
message_entities.sort_by(sort_message_entities);
+ remove_tags.sort_by(|a, b| b.0.cmp(&a.0));
+
+ for (index, end) in remove_tags {
+ chars = [chars[..index].to_vec(), chars[end..].to_vec()].concat();
+ }
+
let formatted_text = chars.into_iter().collect::<String>();
let utf16_lengths: Vec<usize> = utf8_to_utf16_length(&formatted_text);
@@ -78,7 +79,7 @@ pub fn parse_for_telegram(body: String) -> PyResult<(String, Vec<(String, usize,
formatted_text,
message_entities.into_iter()
.filter(|(is_start, _, _, _, _)| { *is_start } )
- .map(|(_, index, format, _, language)| { (format, utf16_lengths[all_indexes[index][0]], utf16_lengths[all_indexes[index][2]], language) })
+ .map(|(_, index, format, _, language)| { (format, utf16_lengths[all_indexes[index][0]], utf16_lengths[all_indexes[index][2] - 1] - utf16_lengths[all_indexes[index][0]], language) })
.collect()
))
}
diff --git a/tests/test_matrix_style_parser.py b/tests/test_matrix.py
index b8ed112..0bc6f1a 100644
--- a/tests/test_matrix_style_parser.py
+++ b/tests/test_matrix.py
@@ -30,14 +30,14 @@ def test_basic():
assert(format_body(test, MATRIX_FORMATS) == formatted_body)
test = """
- ```python
- def test_basic():
- test = "_underline_"
- formatted_body = "<em>underline</em>"
- assert(format_body(test, MATRIX_FORMATS) == formatted_body)
- ```
- """
- formatted_body = test = """<pre><code class="language-python">def test_basic():<br> test = "_underline_"<br> formatted_body = "<em>underline</em>"<br> assert(format_body(test, MATRIX_FORMATS) == (test, formatted_body))</pre></code><br>"""
+```python
+ def test_basic():
+ test = "_underline_"
+ formatted_body = "<em>underline</em>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+```
+"""
+ formatted_body = '<br><pre><code class="language-python"> def test_basic():<br> test = "_underline_"<br> formatted_body = "<em>underline</em>"<br> assert(format_body(test, MATRIX_FORMATS) == formatted_body)</code></pre><br>'
assert(format_body(test, MATRIX_FORMATS) == formatted_body)
test = "```\ncode block\n```"
@@ -90,6 +90,10 @@ def test_code_blocks():
formatted_body = "<pre><code class=\"language-python\">hacker code</code></pre>"
assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+ test = "```pythonaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\nhacker code\n```"
+ formatted_body = "<pre><code class=\"language-pythonaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\">hacker code</code></pre>"
+ assert(format_body(test, MATRIX_FORMATS) == formatted_body)
+
test = "```python\nhacker code\n```\nnormal text"
formatted_body = "<pre><code class=\"language-python\">hacker code</code></pre><br>normal text"
assert(format_body(test, MATRIX_FORMATS) == formatted_body)
diff --git a/tests/test_telegram.py b/tests/test_telegram.py
new file mode 100644
index 0000000..164a697
--- /dev/null
+++ b/tests/test_telegram.py
@@ -0,0 +1,268 @@
+from slidge_style_parser import parse_for_telegram
+
+def test_basic():
+ test = "_underline_"
+ formatted_body = "underline"
+ styles = [('italics', 1, 8, '')]
+ assert(parse_for_telegram(test) == (formatted_body, styles))
+
+ test = "*bold*"
+ formatted_body = "bold"
+ styles = [('bold', 1, 3, '')]
+ assert(parse_for_telegram(test) == (formatted_body, styles))
+
+ test = "~strikethrough~"
+ formatted_body = "strikethrough"
+ styles = [('strikethrough', 1, 12, '')]
+ assert(parse_for_telegram(test) == (formatted_body, styles))
+
+ test = "`code span`"
+ formatted_body = "code span"
+ styles = [('code', 1, 8, '')]
+ assert(parse_for_telegram(test) == (formatted_body, styles))
+
+ test = """
+```python
+ def test_basic():
+ test = "_underline_"
+ formatted_body = "underline"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+```
+"""
+ formatted_body = '\n def test_basic():\n test = "_underline_"\n formatted_body = "underline"\n assert(parse_for_telegram(test)[0] == formatted_body)\n'
+ styles = [('pre', 2, 148, 'python')]
+ assert(parse_for_telegram(test) == (formatted_body, styles))
+
+ test = "```\ncode block\n```"
+ formatted_body = "code block"
+ styles = [('pre', 1, 9, '')]
+ assert(parse_for_telegram(test) == (formatted_body, styles))
+
+ test = "||this message contains a spoiler||"
+ formatted_body = "this message contains a spoiler"
+ styles = [('spoiler', 1, 30, '')]
+ assert(parse_for_telegram(test) == (formatted_body, styles))
+
+def test_quotes():
+ test = ">single"
+ formatted_body = ">single"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = ">single arrow ->"
+ formatted_body = ">single arrow ->"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = ">single\n>grouped"
+ formatted_body = ">single\n>grouped"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = ">>double"
+ formatted_body = ">>double"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = ">>double\n>>double"
+ formatted_body = ">>double\n>>double"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = ">>double\n&>not quote"
+ formatted_body = ">>double\n&>not quote"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = ">>double\n>grouped single"
+ formatted_body = ">>double\n>grouped single"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = ">>>tripple\n>single\n>>double"
+ formatted_body = ">>>tripple\n>single\n>>double"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+def test_code_blocks():
+ test = "```\nhacker\ncode\n```"
+ formatted_body = "hacker\ncode"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "```python\nhacker code\n```"
+ formatted_body = "hacker code"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "```pythonaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\nhacker code\n```"
+ formatted_body = "hacker code"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "```python\nhacker code\n```\nnormal text"
+ formatted_body = "hacker code\nnormal text"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "```python\nhacker code\n```\nnormal text\n```java\npublic static void main(String [])\n```"
+ formatted_body = "hacker code\nnormal text\npublic static void main(String [])"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = ">```java\n>why are you quoting a code block\n>```"
+ formatted_body = ">why are you quoting a code block"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = ">>```\n>>double quote code block\n>single quote not in code block\nnormal text"
+ formatted_body = ">>double quote code block\n>single quote not in code block\nnormal text"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = ">```\n>please stop trying to break my parser ;-;"
+ formatted_body = ">please stop trying to break my parser ;-;"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = ">>```\n>>>>double quote code block\n>single quote not in code block\nnormal text"
+ formatted_body = ">>>>double quote code block\n>single quote not in code block\nnormal text"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "_```_ignored\ninvalid code block\n```"
+ formatted_body = "```ignored\ninvalid code block\n```"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+
+def test_escaped():
+ test = "\\_no underline_"
+ formatted_body = "_no underline_"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "\\\\_no underline_"
+ formatted_body = "\\_no underline_"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = ">>>tripple\n\\>none\n>>double"
+ formatted_body = ">>>tripple\n>none\n>>double"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+def test_nested():
+ test = "`*~_code span_~*`"
+ formatted_body = "*~_code span_~*"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "*_~`code span`~_*"
+ formatted_body = "code span"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = ">*_~`code span`~_*"
+ formatted_body = ">code span"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "*bold star >*< star bold*"
+ formatted_body = "bold star >*< star bold"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "*_bold*_"
+ formatted_body = "_bold_"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "__underlined__"
+ formatted_body = "underlined"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+def test_no_changes():
+ test = ""
+ formatted_body = ""
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "~~ empty `````` styles **"
+ formatted_body = "~~ empty `````` styles **"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "this is not an empty string"
+ formatted_body = "this is not an empty string"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "arrow ->"
+ formatted_body = "arrow ->"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = " > no quote"
+ formatted_body = " > no quote"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "_not underlined"
+ formatted_body = "_not underlined"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "|not a spoiler|"
+ formatted_body = "|not a spoiler|"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "||\nalso\nnot\na\nspoiler||"
+ formatted_body = "||\nalso\nnot\na\nspoiler||"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "`no code\nblock here`"
+ formatted_body = "`no code\nblock here`"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "invalid ```\ncode block\n```"
+ formatted_body = "invalid ```\ncode block\n```"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "```\ncode block\ninvalid```"
+ formatted_body = "```\ncode block\ninvalid```"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "```\ncode block\n```invalid"
+ formatted_body = "```\ncode block\n```invalid"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+def test_assorted():
+ test = "\n"
+ formatted_body = "\n"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "at the ||end||"
+ formatted_body = "at the end"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "in the ~middle~ here"
+ formatted_body = "in the middle here"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||"
+ formatted_body = "underline bold strikethrough >not quote spoiler\n>quote\nnothing\nnothing\n>>>>another quote with ```four```"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = ">```\n>do be do be dooo ba do be do be do ba\n>>>"
+ formatted_body = ">do be do be dooo ba do be do be do ba\n>>"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "\n\n>```\n>do be do be dooo ba do be do be do ba\na\n\n\naoeu\n"
+ formatted_body = "\n\n>do be do be dooo ba do be do be do ba\na\n\n\naoeu\n"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = ">```\n>do be do be dooo ba do be do be do ba\n>\n>\n>aoeu"
+ formatted_body = ">do be do be dooo ba do be do be do ba\n\n\naoeu"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = ">```\n>code block\n>```invalid end\n"
+ formatted_body = ">code block\n```invalid end\n"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "invalid ```\ncode block\n*bold*\n```"
+ formatted_body = "invalid ```\ncode block\nbold\n```"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+def test_weird_utf8():
+ test = "β€οΈπŸ’“πŸ’•πŸ’–πŸ’— ||πŸ’™πŸ’šπŸ’›πŸ’œπŸ–€|| πŸ’πŸ’žπŸ’Ÿβ£οΈ"
+ formatted_body = "β€οΈπŸ’“πŸ’•πŸ’–πŸ’— πŸ’™πŸ’šπŸ’›πŸ’œπŸ–€ πŸ’πŸ’žπŸ’Ÿβ£οΈ"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘§ _underline_πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘§"
+ formatted_body = "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘§ underlineπŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘§"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "\u202eRight to left"
+ formatted_body = "\u202eRight to left"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = ">\u202eRight to left quote?"
+ formatted_body = ">\u202eRight to left quote?"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "_Invisible\u200bseparator_"
+ formatted_body = "Invisible\u200bseparator"
+ assert(parse_for_telegram(test)[0] == formatted_body)
+
+ test = "~\u200b~"
+ formatted_body = "\u200b"
+ assert(parse_for_telegram(test)[0] == formatted_body)