Mirror: The magical sticky regex-based parser generator 馃
1export const parse = (quasis, expressions) => { 2 let quasiIndex = 0; 3 let stackIndex = 0; 4 5 const sequenceStack = []; 6 const rootSequence = { 7 type: 'sequence', 8 sequence: [], 9 alternation: null, 10 }; 11 12 let currentGroup = null; 13 let lastMatch; 14 let currentSequence = rootSequence; 15 16 while (stackIndex < quasis.length + expressions.length) { 17 if (stackIndex % 2 !== 0) { 18 const expression = expressions[stackIndex++ >> 1]; 19 20 currentSequence.sequence.push({ 21 type: 'expression', 22 expression, 23 quantifier: null, 24 }); 25 } 26 27 const quasi = quasis[stackIndex >> 1]; 28 while (quasiIndex < quasi.length) { 29 const char = quasi[quasiIndex++]; 30 31 if (char === ' ' || char === '\t' || char === '\r' || char === '\n') { 32 continue; 33 } else if (char === '|' && currentSequence.sequence.length > 0) { 34 currentSequence = currentSequence.alternation = { 35 type: 'sequence', 36 sequence: [], 37 alternation: null, 38 }; 39 40 continue; 41 } else if (char === ')' && currentSequence.sequence.length > 0) { 42 currentGroup = null; 43 currentSequence = sequenceStack.pop(); 44 if (currentSequence) continue; 45 } else if (char === '(') { 46 currentGroup = { 47 type: 'group', 48 sequence: { 49 type: 'sequence', 50 sequence: [], 51 alternation: null, 52 }, 53 capturing: true, 54 lookahead: null, 55 quantifier: null, 56 }; 57 58 sequenceStack.push(currentSequence); 59 currentSequence.sequence.push(currentGroup); 60 currentSequence = currentGroup.sequence; 61 continue; 62 } else if ( 63 char === '?' && 64 currentSequence.sequence.length === 0 && 65 currentGroup 66 ) { 67 const nextChar = quasi[quasiIndex++]; 68 if (!nextChar) { 69 throw new SyntaxError('Unexpected end of input'); 70 } 71 72 if (nextChar === ':') { 73 currentGroup.capturing = false; 74 continue; 75 } else if (nextChar === '=') { 76 currentGroup.capturing = false; 77 currentGroup.lookahead = 'positive'; 78 continue; 79 } else if (nextChar === '!') { 80 currentGroup.capturing = false; 81 currentGroup.lookahead = 'negative'; 82 continue; 83 } 84 } else if ( 85 (char === '?' || char === '+' || char === '*') && 86 (lastMatch = 87 currentSequence.sequence[currentSequence.sequence.length - 1]) 88 ) { 89 lastMatch.quantifier = 'optional'; 90 if (char === '+') lastMatch.quantifier = 'repeating'; 91 if (char === '*') lastMatch.quantifier = 'multiple'; 92 continue; 93 } 94 95 throw new SyntaxError('Unexpected token ' + char); 96 } 97 98 stackIndex++; 99 quasiIndex = 0; 100 } 101 102 return rootSequence; 103};