src/stringtools.rs at 38fa60ad57c152829d2ee4a9027e3610672b076c · tholps.site/skidmark

tholps.site / skidmark
Tholp's bespoke website generator
skidmark / src / stringtools.rs
at 38fa60ad57c152829d2ee4a9027e3610672b076c 8.5 kB view raw
  1use core::fmt;
  2use std::{ascii::escape_default, error, fmt::Arguments, ops::Index, process::exit, thread::sleep};
  3
  4use super::DELIMITERS;
  5use crate::types::Token;
  6
  7pub fn collect_arguments(tokens: &[Token]) -> (Vec<String>, usize) {
  8    // Arguments vec and number of tokens consumed
  9    //let mut output = Vec::new();
 10    let mut split_tokens = Vec::new();
 11    for tok in tokens {
 12        for s in split_keep_delimiters(tok.contents.clone()) {
 13            split_tokens.push(s);
 14        }
 15    }
 16
 17    let mut quoted: bool = false;
 18    let mut entered: bool = false;
 19    let mut arg = "".to_string();
 20    let mut args: Vec<String> = Vec::new();
 21
 22    let mut in_token_count = 0;
 23
 24    for tok in split_tokens {
 25        in_token_count += 1; // This could be a problem if it something got split above..
 26        if tok.starts_with([' ', '\t']) && !quoted {
 27            continue;
 28        }
 29
 30        if !entered && tok.starts_with('(') {
 31            entered = true;
 32            continue;
 33        }
 34
 35        if !entered {
 36            continue;
 37        }
 38
 39        if !quoted && tok.starts_with(')') {
 40            break;
 41        }
 42
 43        for c in tok.chars() {
 44            if c == '\"' {
 45                quoted = !quoted;
 46                continue;
 47            }
 48
 49            arg.push(c);
 50        }
 51
 52        if !quoted {
 53            args.push(arg.clone());
 54            arg.clear();
 55        }
 56    }
 57
 58    return (args, in_token_count);
 59}
 60
 61pub fn collect_block(tokens: &[Token]) -> Option<(Vec<Token>, usize)> {
 62    let mut entered = false;
 63    let mut tokens_consumed: usize = 0;
 64    let mut entering_bracket_count = 0;
 65    let mut exiting_bracket_count = 0;
 66    let mut scope_count = 0; //incremented by '{{{', decremented by '}}}'
 67    let mut escaped = false;
 68
 69    let mut block: Vec<Token> = Vec::new();
 70
 71    // We dont really care about doing anything that in the block right now
 72    // maybe have the Token struct contain scope level later?
 73    let mut escaped_tok: Token = Token::new("\\".into(), 0, 0);
 74    for tok in tokens {
 75        tokens_consumed += 1;
 76        if !entered {
 77            if tok.contents.is_only_whitespace() {
 78                continue;
 79            }
 80            if tok.contents != "{"
 81            // Expected block start, got garbage
 82            {
 83                // println!("Expected block start, got {}",tok.contents);
 84                // for t in &block
 85                // {
 86                //     print!("{} ", t.contents);
 87                // }
 88                // exit(1);
 89                return None;
 90            }
 91        }
 92
 93        let mut escaped_used = false;
 94
 95        // Scope Start
 96        if tok.contents == "{" && !escaped {
 97            entering_bracket_count += 1;
 98
 99            if entering_bracket_count == 3 {
100                scope_count += 1;
101                entering_bracket_count = 0;
102                if !entered {
103                    entered = true;
104                }
105            }
106        } else {
107            entering_bracket_count = 0;
108            if escaped {
109                escaped_used = true;
110            }
111        }
112        // Scope End
113        if tok.contents == "}" && !escaped {
114            exiting_bracket_count += 1;
115            if exiting_bracket_count == 3 {
116                scope_count -= 1;
117                entering_bracket_count = 0;
118            }
119            if scope_count == 0 {
120                break;
121            }
122        } else {
123            exiting_bracket_count = 0;
124            if escaped {
125                escaped_used = true;
126            }
127        }
128
129        if escaped_used {
130            escaped = false;
131            block.push(escaped_tok.clone());
132        }
133
134        if tok.contents == "\\" {
135            escaped = true;
136            escaped_tok = tok.clone();
137        } else {
138            block.push(tok.clone());
139        }
140    }
141
142    if scope_count != 0 {
143        return None;
144    }
145
146    // if block.len() == 6
147    // // things get ugly if its empty
148    // {
149    //     let mut emptyblock = Vec::new();
150    //     emptyblock.push(Token::new(
151    //         "".into(),
152    //         tokens[0].origin_file,
153    //         tokens[0].line_number,
154    //     ));
155    //     return (emptyblock, tokens_consumed);
156    // }
157    // pop brackets, bad and ugly but idgaf
158    block.drain(..3);
159    block.drain(block.len() - 3..);
160    return Some((block, tokens_consumed));
161}
162
163// Theres no std function to have the delimiters be their own element in the out vector
164// clean it up a bit here
165pub fn split_keep_delimiters(instr: String) -> Vec<String> {
166    let split: Vec<&str> = instr.split_inclusive(DELIMITERS).collect();
167    let mut output = Vec::new();
168
169    for s in split {
170        if s.ends_with(DELIMITERS) {
171            let (token, ending) = s.split_at(s.len() - 1);
172            if token.len() > 0 {
173                output.push(token.to_string());
174            }
175            output.push(ending.to_string());
176            //println!("({}, {})", token.to_string(), ending.to_string())
177        } else {
178            output.push(s.to_string());
179        }
180    }
181    return output;
182}
183
184pub fn strings_to_tokens(in_strings: Vec<String>, origin_file: usize) -> Vec<Token> {
185    let mut tokens = Vec::new();
186    let mut line_count = 1;
187
188    for str in in_strings {
189        if str.len() == 0 {
190            continue;
191        }
192
193        let current_line = line_count;
194        for char in str.chars() {
195            if char == '\n' {
196                line_count += 1;
197            }
198        }
199        let token: Token = Token::new(str, origin_file, current_line);
200        tokens.push(token);
201    }
202
203    return tokens;
204}
205
206// Need to do some special case stuff so you can macros without spaces between
207// (something like "stuff!insert(..)" is split to ["stuff","!insert(..)"] so it can be acted on later)
208pub fn split_to_tokens(instr: String, origin_file: usize) -> Vec<Token> {
209    let split = split_keep_delimiters(instr);
210    let mut new_split: Vec<String> = Vec::new();
211    for s in split {
212        let prefix_offset = s.find(&['!', '&']);
213        if prefix_offset.is_some() {
214            let (first, second) = s.split_at(prefix_offset.unwrap());
215            //println!("\"{}\", \"{}\"", first, second);
216            if first.len() > 0 {
217                new_split.push(first.to_string());
218            }
219            if second.len() > 0 {
220                new_split.push(second.to_string());
221            }
222        } else {
223            if s.len() > 0 {
224                new_split.push(s);
225            }
226        }
227        //sleep(std::time::Duration::from_millis(10));
228    }
229    return strings_to_tokens(new_split, origin_file);
230}
231
232pub fn next_nonwhitespace_token(tokens: &Vec<Token>, index: usize) -> (bool, usize) {
233    while index < tokens.len() {
234        if tokens[index].contents.is_only_whitespace() {
235            continue;
236        }
237        return (true, index);
238    }
239    return (false, 0);
240}
241
242//trim whitespace from the ends
243pub fn trim_whitespace_tokens(tokens: &[Token]) -> &[Token] {
244    let mut start: usize = 0;
245    let mut end: usize = tokens.len();
246    for tok in tokens {
247        if !tok.contents.is_only_whitespace() {
248            break;
249        }
250        start = start + 1;
251    }
252
253    for tok in tokens.iter().rev() {
254        if !tok.contents.is_only_whitespace() {
255            break;
256        }
257        end = end - 1;
258    }
259
260    return &tokens[start..end];
261}
262
263pub fn find_pattern(tokens: &[Token], pat: String) -> Option<(usize, usize)> {
264    // (startpoint, length)
265    let split_pattern = split_to_tokens(pat, 0);
266    let mut pattern_index: usize = 0;
267    let mut token_index: usize = 0;
268    let mut working_pattern_index: usize = 0;
269
270    for t in tokens {
271        if t.contents == split_pattern[pattern_index].contents {
272            pattern_index += 1;
273        } else {
274            pattern_index = 0;
275            working_pattern_index = token_index + 1;
276        }
277
278        if pattern_index == split_pattern.len() {
279            return Some((working_pattern_index, split_pattern.len()));
280        }
281
282        token_index += 1;
283    }
284
285    None
286}
287
288pub trait WhitespaceChecks {
289    fn is_only_whitespace(&self) -> bool;
290    fn contains_whitespace(&self) -> bool;
291}
292
293impl WhitespaceChecks for String {
294    fn is_only_whitespace(&self) -> bool {
295        for c in self.chars() {
296            if !c.is_whitespace() {
297                return false;
298            }
299        }
300        return true;
301    }
302
303    fn contains_whitespace(&self) -> bool {
304        for c in self.chars() {
305            if c.is_whitespace() {
306                return true;
307            }
308        }
309        return false;
310    }
311}