summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSavagePeanut <sourcehut@lazytapir.com>2023-07-28 16:00:22 -0500
committerSavagePeanut <sourcehut@lazytapir.com>2023-07-30 15:23:11 -0500
commitf290f20011d6ccafe8a4f0adaaab0c157a2c0107 (patch)
treef07c68ebbd8bbb21bf19d92523d1c58308f850aa
style outgoing messages
-rw-r--r--.github/workflows/CI.yml120
-rw-r--r--.gitignore73
-rw-r--r--Cargo.lock273
-rw-r--r--Cargo.toml12
-rw-r--r--README.md10
-rw-r--r--pyproject.toml16
-rw-r--r--src/lib.rs217
-rw-r--r--tests/test_style_parser.py72
8 files changed, 793 insertions, 0 deletions
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
new file mode 100644
index 0000000..49ebda6
--- /dev/null
+++ b/.github/workflows/CI.yml
@@ -0,0 +1,120 @@
+# This file is autogenerated by maturin v0.14.17
+# To update, run
+#
+# maturin generate-ci github
+#
+name: CI
+
+on:
+ push:
+ branches:
+ - main
+ - master
+ tags:
+ - '*'
+ pull_request:
+ workflow_dispatch:
+
+permissions:
+ contents: read
+
+jobs:
+ linux:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ target: [x86_64, x86, aarch64, armv7, s390x, ppc64le]
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-python@v4
+ with:
+ python-version: '3.10'
+ - name: Build wheels
+ uses: PyO3/maturin-action@v1
+ with:
+ target: ${{ matrix.target }}
+ args: --release --out dist --find-interpreter
+ sccache: 'true'
+ manylinux: auto
+ - name: Upload wheels
+ uses: actions/upload-artifact@v3
+ with:
+ name: wheels
+ path: dist
+
+ windows:
+ runs-on: windows-latest
+ strategy:
+ matrix:
+ target: [x64, x86]
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-python@v4
+ with:
+ python-version: '3.10'
+ architecture: ${{ matrix.target }}
+ - name: Build wheels
+ uses: PyO3/maturin-action@v1
+ with:
+ target: ${{ matrix.target }}
+ args: --release --out dist --find-interpreter
+ sccache: 'true'
+ - name: Upload wheels
+ uses: actions/upload-artifact@v3
+ with:
+ name: wheels
+ path: dist
+
+ macos:
+ runs-on: macos-latest
+ strategy:
+ matrix:
+ target: [x86_64, aarch64]
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-python@v4
+ with:
+ python-version: '3.10'
+ - name: Build wheels
+ uses: PyO3/maturin-action@v1
+ with:
+ target: ${{ matrix.target }}
+ args: --release --out dist --find-interpreter
+ sccache: 'true'
+ - name: Upload wheels
+ uses: actions/upload-artifact@v3
+ with:
+ name: wheels
+ path: dist
+
+ sdist:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - name: Build sdist
+ uses: PyO3/maturin-action@v1
+ with:
+ command: sdist
+ args: --out dist
+ - name: Upload sdist
+ uses: actions/upload-artifact@v3
+ with:
+ name: wheels
+ path: dist
+
+ release:
+ name: Release
+ runs-on: ubuntu-latest
+ if: "startsWith(github.ref, 'refs/tags/')"
+ needs: [linux, windows, macos, sdist]
+ steps:
+ - uses: actions/download-artifact@v3
+ with:
+ name: wheels
+ - name: Publish to PyPI
+ uses: PyO3/maturin-action@v1
+ env:
+ MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
+ with:
+ command: upload
+ args: --skip-existing *
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f52d727
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,73 @@
+/target
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+.pytest_cache/
+*.py[cod]
+src/target/
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+.venv/
+env/
+bin/
+build/
+develop-eggs/
+dist/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+include/
+man/
+venv/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+pip-selfcheck.json
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.cache
+nosetests.xml
+coverage.xml
+
+# Translations
+*.mo
+
+# Mr Developer
+.mr.developer.cfg
+.project
+.pydevproject
+
+# Rope
+.ropeproject
+
+# Django stuff:
+*.log
+*.pot
+
+.DS_Store
+
+# Sphinx documentation
+docs/_build/
+
+# PyCharm
+.idea/
+
+# VSCode
+.vscode/
+
+# Pyenv
+.python-version \ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..48915a3
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,273 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "indoc"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306"
+
+[[package]]
+name = "libc"
+version = "0.2.147"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
+
+[[package]]
+name = "lock_api"
+version = "0.4.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16"
+dependencies = [
+ "autocfg",
+ "scopeguard",
+]
+
+[[package]]
+name = "memoffset"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
+
+[[package]]
+name = "parking_lot"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-targets",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.66"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "pyo3"
+version = "0.18.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3b1ac5b3731ba34fdaa9785f8d74d17448cd18f30cf19e0c7e7b1fdb5272109"
+dependencies = [
+ "cfg-if",
+ "indoc",
+ "libc",
+ "memoffset",
+ "parking_lot",
+ "pyo3-build-config",
+ "pyo3-ffi",
+ "pyo3-macros",
+ "unindent",
+]
+
+[[package]]
+name = "pyo3-build-config"
+version = "0.18.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9cb946f5ac61bb61a5014924910d936ebd2b23b705f7a4a3c40b05c720b079a3"
+dependencies = [
+ "once_cell",
+ "target-lexicon",
+]
+
+[[package]]
+name = "pyo3-ffi"
+version = "0.18.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd4d7c5337821916ea2a1d21d1092e8443cf34879e53a0ac653fbb98f44ff65c"
+dependencies = [
+ "libc",
+ "pyo3-build-config",
+]
+
+[[package]]
+name = "pyo3-macros"
+version = "0.18.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9d39c55dab3fc5a4b25bbd1ac10a2da452c4aca13bb450f22818a002e29648d"
+dependencies = [
+ "proc-macro2",
+ "pyo3-macros-backend",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "pyo3-macros-backend"
+version = "0.18.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97daff08a4c48320587b5224cc98d609e3c27b6d437315bd40b605c98eeb5918"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "smallvec"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
+
+[[package]]
+name = "style-parser"
+version = "0.1.0"
+dependencies = [
+ "pyo3",
+]
+
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "target-lexicon"
+version = "0.12.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d2faeef5759ab89935255b1a4cd98e0baf99d1085e37d36599c625dac49ae8e"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
+
+[[package]]
+name = "unindent"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c"
+
+[[package]]
+name = "windows-targets"
+version = "0.48.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..bed67eb
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "style-parser"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+[lib]
+name = "style_parser"
+crate-type = ["cdylib"]
+
+[dependencies]
+pyo3 = "0.18.1"
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..8ea0dff
--- /dev/null
+++ b/README.md
@@ -0,0 +1,10 @@
+A parsing library for Slidge. Supports parsing these attributes:
+
+"\_underline_"
+"\*bold*"
+"~strikethrough~"
+"\`code span`"
+"\```code block```"
+">quote"
+"||spoiler||"
+"\\\_escape style_"
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..7e6eee5
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,16 @@
+[build-system]
+requires = ["maturin>=0.14,<0.15"]
+build-backend = "maturin"
+
+[project]
+name = "style-parser"
+requires-python = ">=3.7"
+classifiers = [
+ "Programming Language :: Rust",
+ "Programming Language :: Python :: Implementation :: CPython",
+ "Programming Language :: Python :: Implementation :: PyPy",
+]
+
+
+[tool.maturin]
+features = ["pyo3/extension-module"]
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..f728b7c
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,217 @@
+use std::collections::HashMap;
+
+use pyo3::prelude::*;
+
+const KEYWORDS: [char; 5] = ['*', '_', '~', '`', '|'];
+const NO_SUB_PARSING_KEYWORDS: [char; 1] = ['`'];
+const QUOTE_KEYWORDS: [char; 1] = ['>'];
+const BLOCK_KEYWORDS: [(char, usize); 2] = [('`', 2), ('|', 1)];
+const PLACEHOLDER: &str = "\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}\u{200B}";
+
+#[pyfunction]
+fn format_body(body: String, new_tags: HashMap<String, (String, String)>) -> PyResult<String> {
+ let mut chars: Vec<char> = body.chars().collect();
+ if chars.len() < 1 {
+ return Ok(body);
+ }
+ let styles: Vec<(String, usize, usize)> = parse_with_limits(&chars, 0, chars.len() - 1, 0);
+ let parse_quotes = new_tags.contains_key(&">".to_string());
+
+ let mut tags: Vec<(usize, String, String, bool)> = vec![];
+ for style in styles {
+ let (keyword, start, end) = style;
+ if new_tags.contains_key(&keyword) {
+ tags.push((start, keyword.clone(), new_tags.get(&keyword).unwrap().0.clone(), false));
+ tags.push((end, keyword.clone(), new_tags.get(&keyword).unwrap().1.clone(), QUOTE_KEYWORDS.contains(&keyword.chars().next().unwrap())));
+ } else if keyword == ">>" && parse_quotes {
+ tags.push((start, keyword.clone(), "".to_string(), false));
+ }
+ }
+
+ tags.sort_by(|a, b| b.0.cmp(&a.0));
+
+ for tag in tags {
+ let (index, keyword, tag, is_end_quote_block) = tag;
+ let end = if is_end_quote_block {
+ index
+ } else if keyword == ">>" {
+ index + 1
+ } else {
+ index + keyword.len()
+ };
+ chars = [chars[..index].to_vec(), tag.chars().collect(), chars[end..].to_vec()].concat();
+ }
+
+ Ok(remove_non_escaped_backslashes(chars.into_iter().collect()))
+}
+
+fn remove_non_escaped_backslashes(text: String) -> String {
+ let tmp_string = text.replace("\\\\", PLACEHOLDER);
+ let tmp_string = tmp_string.replace("\\", "");
+ tmp_string.replace(PLACEHOLDER, "\\")
+}
+
+fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize)> {
+ let mut styles = Vec::new();
+ let mut index = start;
+ let end = end.min(chars.len() - 1);
+ println!("parse with limits start {}, end {}", start, end);
+
+ while index <= end {
+ if preceeded_by_backslash(chars, index, start) {
+ index += 1;
+ continue;
+ }
+
+ let c = chars[index];
+ if c == '|' && !is_char_repeating(chars, c, index, end) {
+ index += 1;
+ continue;
+ }
+
+ if QUOTE_KEYWORDS.contains(&c) {
+ if is_quote_start(chars, index, depth) {
+ let to = seek_end_of_quote(chars, index, end, depth);
+ styles.push((">".to_string(), index, to));
+ styles.append(&mut parse_with_limits(chars, index + 1, to, depth + 1));
+ index = to;
+ continue;
+ }
+ if depth > 0 {
+ styles.push((">>".to_string(), index, index + 1));
+ }
+ index += 1;
+ continue;
+ }
+
+ if !preceeded_by_whitespace(chars, index, start) || followed_by_whitespace(chars, index, end) {
+ index += 1;
+ continue;
+ }
+
+ if !KEYWORDS.contains(&c) {
+ index += 1;
+ continue;
+ }
+
+ if BLOCK_KEYWORDS.iter().any(|&(k, _)| k == c) && is_char_repeating(chars, c, index, end) {
+ let block_indicator_size = get_block_indicator_size(c);
+ match seek_end_block(chars, c, index + block_indicator_size + 1, end) {
+ Some(to) => {
+ if to != index + block_indicator_size * 2 - 1 {
+ let keyword = c.to_string().repeat(block_indicator_size+1);
+ styles.push((keyword, index, to));
+ if !NO_SUB_PARSING_KEYWORDS.contains(&c) {
+ styles.append(&mut parse_with_limits(chars, index + block_indicator_size + 1, to - 1, depth));
+ }
+ }
+ index = to + block_indicator_size;
+ continue;
+ }
+ None => ()
+ }
+ }
+
+ match seek_end(chars, c, index + 1, end) {
+ Some (to) => {
+ if to != index + 1 {
+ styles.push((c.to_string(), index, to));
+ if !NO_SUB_PARSING_KEYWORDS.contains(&c) {
+ styles.append(&mut parse_with_limits(chars, index + 1, to - 1, depth));
+ }
+ }
+ index = to;
+ }
+ None => ()
+ }
+ index += 1;
+ }
+ styles
+}
+
+fn is_char_repeating(chars: &Vec<char>, keyword: char, index: usize, end: usize) -> bool {
+ let block_indicator_size = get_block_indicator_size(keyword);
+
+ (0..block_indicator_size as usize)
+ .all(|i| index + i <= end && chars[index + i] == keyword)
+}
+
+fn preceeded_by_whitespace(chars: &Vec<char>, index: usize, start: usize) -> bool {
+ index == start || chars[index - 1].is_whitespace()
+}
+
+fn followed_by_whitespace(chars: &Vec<char>, index: usize, end: usize) -> bool {
+ index >= end || chars[index + 1].is_whitespace()
+}
+
+fn seek_end(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Option<usize> {
+ for i in start..=end {
+ let c = chars[i];
+ if c == '\n' {
+ return None;
+ }
+ if c == keyword
+ && !chars[i - 1].is_whitespace()
+ && !preceeded_by_backslash(chars, i, start)
+ {
+ return Some(i);
+ }
+ }
+ None
+}
+
+fn seek_end_of_quote(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> usize {
+ for i in start..=end {
+ if chars[i] == '\n' {
+ if i + 2 + depth > chars.len() {
+ return i;
+ }
+ if chars[i + 1..=i + 1 + depth].iter().any(|&c| !QUOTE_KEYWORDS.contains(&c)) {
+ return i;
+ }
+ }
+ }
+ end + 1
+}
+
+fn seek_end_block(chars: &Vec<char>, keyword: char, start: usize, end: usize) -> Option<usize> {
+ for i in start..=end {
+ if chars[i] == keyword
+ && is_char_repeating(chars, keyword, i + 1, end)
+ && !preceeded_by_backslash(chars, i, start)
+ {
+ return Some(i);
+ }
+ }
+ None
+}
+
+fn is_quote_start(chars: &Vec<char>, index: usize, depth: usize) -> bool {
+ index - depth == 0 || chars[index - 1 - depth] == '\n'
+}
+
+fn preceeded_by_backslash(chars: &Vec<char>, index: usize, start: usize) -> bool {
+ if index == start {
+ return false;
+ }
+ let mut num_backslashes = 0;
+ while index > num_backslashes && chars[index - 1 - num_backslashes] == '\\' {
+ num_backslashes += 1;
+ }
+ num_backslashes % 2 == 1
+}
+
+fn get_block_indicator_size(keyword: char) -> usize {
+ for &(k, v) in BLOCK_KEYWORDS.iter() {
+ if k == keyword {
+ return v;
+ }
+ }
+ 1 // shouldn't ever happen
+}
+
+#[pymodule]
+fn style_parser(_py: Python, m: &PyModule) -> PyResult<()> {
+ m.add_function(wrap_pyfunction!(format_body, m)?)?;
+ Ok(())
+}
diff --git a/tests/test_style_parser.py b/tests/test_style_parser.py
new file mode 100644
index 0000000..49f838b
--- /dev/null
+++ b/tests/test_style_parser.py
@@ -0,0 +1,72 @@
+from style_parser import format_body
+
+MATRIX_FORMATS = {
+ "_": ("<em>", "</em>"),
+ "*": ("<strong>", "</strong>"),
+ "~": ("<strike>", "</strike>"),
+ "`": ("<code>", "</code>"),
+ "```": ("<pre><code>", "</code></pre>"),
+ ">": ("<blockquote>", "</blockquote>"),
+ "||": ("<span data-mx-spoiler>", "</span>")
+}
+
+def test_basic():
+ assert(format_body("_underline_", MATRIX_FORMATS) == "<em>underline</em>")
+ assert(format_body("*bold*", MATRIX_FORMATS) == "<strong>bold</strong>")
+ assert(format_body("~strikethrough~", MATRIX_FORMATS) == "<strike>strikethrough</strike>")
+ assert(format_body("`code span`", MATRIX_FORMATS) == "<code>code span</code>")
+ assert(format_body("```code\nblock```", MATRIX_FORMATS) == "<pre><code>code\nblock</code></pre>")
+ assert(format_body("||spoiler||", MATRIX_FORMATS) == "<span data-mx-spoiler>spoiler</span>")
+
+def test_quotes():
+ assert(format_body(">single", MATRIX_FORMATS) == "<blockquote>single</blockquote>")
+ assert(format_body(">single\n>grouped", MATRIX_FORMATS) == "<blockquote>single\ngrouped</blockquote>")
+ assert(format_body(">>double", MATRIX_FORMATS) == "<blockquote><blockquote>double</blockquote></blockquote>")
+ assert(format_body(">>double\n>grouped single", MATRIX_FORMATS) == "<blockquote><blockquote>double</blockquote>\ngrouped single</blockquote>")
+ assert(format_body(">>>tripple\n>single\n>>double", MATRIX_FORMATS) == "<blockquote><blockquote><blockquote>tripple</blockquote></blockquote>\nsingle\n<blockquote>double</blockquote></blockquote>")
+
+def test_escaped():
+ assert(format_body("\\_no underline_", MATRIX_FORMATS) == "_no underline_")
+ assert(format_body("\\\\_no underline_", MATRIX_FORMATS) == "\\_no underline_")
+ assert(format_body(">>>tripple\n\\>none\n>>double", MATRIX_FORMATS) == "<blockquote><blockquote><blockquote>tripple</blockquote></blockquote></blockquote>\n>none\n<blockquote><blockquote>double</blockquote></blockquote>")
+
+def test_nested():
+ assert(format_body("`*~_code span_~*`", MATRIX_FORMATS) == "<code>*~_code span_~*</code>")
+ assert(format_body("*_~`code span`~_*", MATRIX_FORMATS) == "<strong><em><strike><code>code span</code></strike></em></strong>")
+ assert(format_body(">*_~`code span`~_*", MATRIX_FORMATS) == "<blockquote><strong><em><strike><code>code span</code></strike></em></strong></blockquote>")
+ assert(format_body("*bold*not bold*", MATRIX_FORMATS) == "<strong>bold</strong>not bold*")
+ assert(format_body("*_bold*_", MATRIX_FORMATS) == "<strong>_bold</strong>_")
+
+def test_no_changes():
+ assert(format_body("", MATRIX_FORMATS) == "")
+ assert(format_body("~~ empty `````` styles **", MATRIX_FORMATS) == "~~ empty `````` styles **")
+ assert(format_body("this is not an empty string", MATRIX_FORMATS) == "this is not an empty string")
+ assert(format_body("arrow ->", MATRIX_FORMATS) == "arrow ->")
+ assert(format_body(" > no quote", MATRIX_FORMATS) == " > no quote")
+ assert(format_body("_not underlined", MATRIX_FORMATS) == "_not underlined")
+ assert(format_body("|not a spoiler|", MATRIX_FORMATS) == "|not a spoiler|")
+ assert(format_body("__not underlined__", MATRIX_FORMATS) == "__not underlined__")
+ assert(format_body("`no code\nblock here`", MATRIX_FORMATS) == "`no code\nblock here`")
+
+def test_assorted():
+ assert(format_body("at the ```end```", MATRIX_FORMATS) == "at the <pre><code>end</code></pre>")
+ assert(format_body("in the ~middle~ here", MATRIX_FORMATS) == "in the <strike>middle</strike> here")
+ assert(format_body("_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||", MATRIX_FORMATS) == "<em>underline</em> <strong>bold</strong> <strike>strikethrough</strike> >not quote <span data-mx-spoiler>spoiler</span>\n<blockquote>quote</blockquote>\nnothing\nnothing\n<blockquote><blockquote><blockquote><blockquote>another quote with <span data-mx-spoiler><strike><em><strong><pre><code>four</code></pre></strong></em></strike></span></blockquote></blockquote></blockquote></blockquote>")
+
+def test_weird_utf8():
+ assert(format_body("โค๏ธ๐Ÿ’“๐Ÿ’•๐Ÿ’–๐Ÿ’— ```๐Ÿ’™๐Ÿ’š๐Ÿ’›๐Ÿ’œ๐Ÿ–ค``` ๐Ÿ’๐Ÿ’ž๐Ÿ’Ÿโฃ๏ธ", MATRIX_FORMATS) == "โค๏ธ๐Ÿ’“๐Ÿ’•๐Ÿ’–๐Ÿ’— <pre><code>๐Ÿ’™๐Ÿ’š๐Ÿ’›๐Ÿ’œ๐Ÿ–ค</code></pre> ๐Ÿ’๐Ÿ’ž๐Ÿ’Ÿโฃ๏ธ")
+ assert(format_body("๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง _underline_๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ง", MATRIX_FORMATS) == "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง <em>underline</em>๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ง")
+ assert(format_body("\u202eRight to left", MATRIX_FORMATS) == "\u202eRight to left")
+ assert(format_body(">\u202eRight to left quote?", MATRIX_FORMATS) == "<blockquote>\u202eRight to left quote?</blockquote>")
+ assert(format_body("_Invisible\u200bseparator_", MATRIX_FORMATS) == "<em>Invisible\u200bseparator</em>")
+ assert(format_body("~\u200b~", MATRIX_FORMATS) == "<strike>\u200b</strike>")
+
+LIMITED_FORMATS = {
+ "_": ("<em>", "</em>"),
+ "~": ("<strike>", "</strike>"),
+ "`": ("<code>", "</code>"),
+ "||": ("<span data-mx-spoiler>", "</span>")
+}
+
+def test_limited():
+ assert(format_body("_underline_ *bold* ~strikethrough~ >not quote ||spoiler||\n>quote\nnothing\nnothing\n>>>>another quote with ||~_*```four```*_~||", LIMITED_FORMATS) == "<em>underline</em> *bold* <strike>strikethrough</strike> >not quote <span data-mx-spoiler>spoiler</span>\n>quote\nnothing\nnothing\n>>>>another quote with <span data-mx-spoiler><strike><em>*```four```*</em></strike></span>")