Mirror: The magical sticky regex-based parser generator 馃
1export const parse = (quasis, expressions) => { 2 let quasiIndex = 0; 3 let stackIndex = 0; 4 5 const sequenceStack = []; 6 const rootSequence = { 7 type: 'sequence', 8 sequence: [], 9 alternation: null, 10 }; 11 12 let currentGroup = null; 13 let lastMatch; 14 let currentSequence = rootSequence; 15 16 while (stackIndex < quasis.length + expressions.length) { 17 if (stackIndex % 2 !== 0) { 18 const expression = expressions[stackIndex++ >> 1]; 19 20 currentSequence.sequence.push({ 21 type: 'expression', 22 expression, 23 quantifier: null, 24 }); 25 } 26 27 const quasi = quasis[stackIndex >> 1]; 28 while (quasiIndex < quasi.length) { 29 const char = quasi[quasiIndex++]; 30 31 if (char === ' ' || char === '\t' || char === '\r' || char === '\n') { 32 continue; 33 } else if (char === '|' && currentSequence.sequence.length > 0) { 34 currentSequence = currentSequence.alternation = { 35 type: 'sequence', 36 sequence: [], 37 alternation: null, 38 }; 39 40 continue; 41 } else if (char === ')' && currentSequence.sequence.length > 0) { 42 currentGroup = null; 43 currentSequence = sequenceStack.pop(); 44 if (currentSequence) continue; 45 } else if (char === '(') { 46 currentGroup = { 47 type: 'group', 48 sequence: { 49 type: 'sequence', 50 sequence: [], 51 alternation: null, 52 }, 53 capture: null, 54 quantifier: null, 55 }; 56 57 sequenceStack.push(currentSequence); 58 currentSequence.sequence.push(currentGroup); 59 currentSequence = currentGroup.sequence; 60 continue; 61 } else if ( 62 char === '?' && 63 currentSequence.sequence.length === 0 && 64 currentGroup 65 ) { 66 const nextChar = quasi[quasiIndex++]; 67 if (!nextChar) { 68 throw new SyntaxError('Unexpected end of input'); 69 } 70 71 if (nextChar === ':') { 72 currentGroup.capture = nextChar; 73 continue; 74 } else if (nextChar === '=') { 75 currentGroup.capture = nextChar; 76 continue; 77 } else if (nextChar === '!') { 78 currentGroup.capture = nextChar; 79 continue; 80 } 81 } else if ( 82 (char === '?' || char === '+' || char === '*') && 83 (lastMatch = 84 currentSequence.sequence[currentSequence.sequence.length - 1]) 85 ) { 86 lastMatch.quantifier = char; 87 continue; 88 } 89 90 throw new SyntaxError('Unexpected token ' + char); 91 } 92 93 stackIndex++; 94 quasiIndex = 0; 95 } 96 97 return rootSequence; 98};