summaryrefslogtreecommitdiff
path: root/src/parser.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/parser.rs')
-rw-r--r--src/parser.rs274
1 files changed, 274 insertions, 0 deletions
diff --git a/src/parser.rs b/src/parser.rs
new file mode 100644
index 0000000..8e96a69
--- /dev/null
+++ b/src/parser.rs
@@ -0,0 +1,274 @@
+const KEYWORDS: [char; 4] = ['*', '_', '~', '`'];
+const NO_SUB_PARSING_KEYWORDS: [char; 1] = ['`'];
+const QUOTE_KEYWORDS: [char; 1] = ['>'];
+
+pub fn parse_with_limits(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize, usize, usize)> {
+ let mut styles = Vec::new();
+ let mut index = start;
+ let end = end.min(chars.len() - 1);
+
+ while index <= end {
+ if preceeded_by_backslash(chars, index, start) {
+ index += 1;
+ continue;
+ }
+
+ let c = chars[index];
+ if QUOTE_KEYWORDS.contains(&c) {
+ if is_quote_start(chars, index, depth) {
+ let to = seek_end_of_quote(chars, index, end, depth);
+ styles.push((">".to_string(), index, index + 1, to, to));
+ styles.append(&mut parse_with_limits(chars, index + 1, to, depth + 1));
+ index = to;
+ continue;
+ }
+ if is_nested_quote(chars, index, depth) {
+ styles.push((">>".to_string(), index, index + 1, index + 1, index + 1));
+ }
+ index += 1;
+ continue;
+ }
+
+ if c == '`' && is_char_repeating(chars, c, 2, index + 1, end) {
+ let end_of_line = seek_end_of_line(chars, index + 1, end);
+ if end_of_line == end {
+ index += 3;
+ continue;
+ }
+ match seek_end_block(chars, c, end_of_line, end, depth) {
+ Some(to) => {
+ println!("to {}", to);
+ if to != index + 3 && is_quote_start(chars, index, depth) {
+ let keyword = if end_of_line == index + 3 {
+ "```".to_string()
+ } else {
+ "```language".to_string()
+ };
+ let remove_end = if depth > 0 && (to == end || to == chars.len()) {
+ to
+ } else {
+ to + 4 + depth
+ };
+ styles.push((keyword, index, end_of_line + 1, to, remove_end));
+ styles.append(&mut parse_quotes_in_code_block(chars, index + 3, to, depth));
+ index = to;
+ }
+ }
+ None => ()
+ }
+ index += 3;
+ continue;
+ }
+
+ if !preceeded_by_whitespace(chars, index, start) || followed_by_whitespace(chars, index, end) {
+ index += 1;
+ continue;
+ }
+
+ if c == '|' && is_char_repeating(chars, c, 1, index + 1, end) {
+ match seek_end(chars, c, index + 2, 1, end) {
+ Some(to) => {
+ if to != index + 2 {
+ let keyword = "||".to_string();
+ styles.push((keyword, index, index + 2, to, to + 2));
+ styles.append(&mut parse_with_limits(chars, index + 2, to - 1, depth));
+ }
+ index = to + 2;
+ continue;
+ }
+ None => ()
+ }
+ index += 2;
+ continue;
+ }
+
+ if !KEYWORDS.contains(&c) {
+ index += 1;
+ continue;
+ }
+
+ match seek_end(chars, c, index + 1, 0, end) {
+ Some (to) => {
+ if to != index + 1 {
+ styles.push((c.to_string(), index, index + 1, to, to + 1));
+ if !NO_SUB_PARSING_KEYWORDS.contains(&c) {
+ styles.append(&mut parse_with_limits(chars, index + 1, to - 1, depth));
+ }
+ }
+ index = to;
+ }
+ None => ()
+ }
+ index += 1;
+ }
+ styles
+}
+
+fn parse_quotes_in_code_block(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> Vec<(String, usize, usize, usize, usize)> {
+ let mut quotes = Vec::new();
+ let mut index = start;
+ let end = end.min(chars.len() - 1);
+
+ if depth < 1 {
+ return quotes;
+ }
+
+ while index <= end {
+ let c = chars[index];
+ if QUOTE_KEYWORDS.contains(&c) {
+ if is_nested_quote(chars, index, depth) {
+ quotes.push(("```>".to_string(), index, index + 1, index + 1, index + 1));
+ }
+ index += 1;
+ continue;
+ }
+ index += 1;
+ }
+ quotes
+}
+
+fn is_nested_quote(chars: &Vec<char>, start: usize, depth: usize) -> bool {
+ let mut index = start;
+ let mut count = 0;
+
+ while index > 0 {
+ if chars[index] == '\n' {
+ return true;
+ }
+ if !QUOTE_KEYWORDS.contains(&chars[index]) {
+ return false;
+ }
+ count += 1;
+ if count > depth {
+ return false;
+ }
+ index -= 1;
+ }
+ true
+}
+
+fn is_char_repeating(chars: &Vec<char>, keyword: char, repetitions: usize, index: usize, end: usize) -> bool {
+ (0..repetitions as usize)
+ .all(|i| index + i <= end && chars[index + i] == keyword)
+}
+
+fn preceeded_by_whitespace(chars: &Vec<char>, index: usize, start: usize) -> bool {
+ index == start || chars[index - 1].is_whitespace()
+}
+
+fn followed_by_whitespace(chars: &Vec<char>, index: usize, end: usize) -> bool {
+ index >= end || chars[index + 1].is_whitespace()
+}
+
+fn seek_end(chars: &Vec<char>, keyword: char, start: usize, repetitions: usize, end: usize) -> Option<usize> {
+ for i in start..=end {
+ let c = chars[i];
+ if c == '\n' {
+ return None;
+ }
+ if c == keyword
+ && !chars[i - 1].is_whitespace()
+ && !preceeded_by_backslash(chars, i, start)
+ && is_char_repeating(chars, keyword, repetitions, i + 1, end)
+ {
+ match seek_higher_order_end(chars, c, i + 1, repetitions, end) {
+ Some(higher_order_i) => {
+ return Some(higher_order_i);
+ }
+ None => {
+ return Some(i);
+ }
+ }
+ }
+ }
+ None
+}
+
+fn seek_higher_order_end(chars: &Vec<char>, keyword: char, start: usize, repetitions: usize, end: usize) -> Option<usize> {
+ for i in start..=end {
+ let c = chars[i];
+ if c == '\n' {
+ return None;
+ }
+ if c == keyword
+ && chars[i - 1].is_whitespace()
+ && !followed_by_whitespace(chars, i, end)
+ && is_char_repeating(chars, keyword, repetitions, i + 1, end)
+ {
+ return None; // "*bold* *<--- beginning of new bold>*"
+ }
+ if c == keyword
+ && !chars[i - 1].is_whitespace()
+ && followed_by_whitespace(chars, i, end)
+ && !preceeded_by_backslash(chars, i, start)
+ && is_char_repeating(chars, keyword, repetitions, i + 1, end)
+ {
+ return Some(i);
+ }
+ }
+ None
+}
+
+fn seek_end_of_line(chars: &Vec<char>, start: usize, end: usize) -> usize {
+ chars[start..=end]
+ .iter()
+ .enumerate()
+ .find(|&(_, &c)| c == '\n')
+ .map_or(end + 1, |(i, _)| start + i)
+}
+
+fn seek_end_of_quote(chars: &Vec<char>, start: usize, end: usize, depth: usize) -> usize {
+ for i in start..=end {
+ if chars[i] == '\n' {
+ if i + 2 + depth > chars.len() {
+ return i;
+ }
+ if chars[i + 1..=i + 1 + depth].iter().any(|&c| !QUOTE_KEYWORDS.contains(&c)) {
+ return i;
+ }
+ }
+ }
+ end + 1
+}
+
+fn seek_end_block(chars: &Vec<char>, keyword: char, start: usize, end: usize, depth: usize) -> Option<usize> {
+ for i in start..=end {
+ if chars[i] == '\n' {
+ if i + depth == end && chars[i + 1..i + 1 + depth].iter().all(|&c| QUOTE_KEYWORDS.contains(&c)) {
+ continue;
+ }
+ if i + 1 + depth > end {
+ return Some(i);
+ }
+ if seek_end_of_line(chars, i + 1, end) == i + depth + 4
+ && chars[i + 1..i + 1 + depth].iter().all(|&c| QUOTE_KEYWORDS.contains(&c))
+ && chars[i + 1 + depth] == keyword
+ && is_char_repeating(chars, keyword, 2, i + 1 + depth, end)
+ {
+ return Some(i);
+ }
+ }
+ }
+ if end == chars.len() - 1 {
+ if depth == 0 {
+ return None;
+ }
+ return Some(chars.len());
+ }
+ Some(end)
+}
+
+fn is_quote_start(chars: &Vec<char>, index: usize, depth: usize) -> bool {
+ index - depth == 0 || chars[index - 1 - depth] == '\n'
+}
+
+fn preceeded_by_backslash(chars: &Vec<char>, index: usize, start: usize) -> bool {
+ if index == start {
+ return false;
+ }
+ let mut num_backslashes = 0;
+ while index > num_backslashes && chars[index - 1 - num_backslashes] == '\\' {
+ num_backslashes += 1;
+ }
+ num_backslashes % 2 == 1
+}