Mirror: The magical sticky regex-based parser generator 🧙

Merge pull request #13 from kitten/feat/lookahead-shorthand

Add support for lookahead/non-capturing shorthand syntax

+2 -1
src/codegen.js
···
`;
const astExpression = (ast, depth, opts) => {
+
const capture = !!opts.capture && !ast.capture;
const restoreLength =
(opts.length && opts.abort && js`${_node}.length = ln${opts.length};`) ||
'';
const expression = `${ast.expression.id}(${_state})`;
return js`
if ((${_match} = ${ast.expression.id}(${_state})) != null) {
-
${opts.capture ? js`${_node}.push(${_match})` : ''}
+
${capture ? js`${_node}.push(${_match})` : ''}
} else {
${opts.onAbort}
${restoreIndex(opts.index)}
+50
src/core.test.js
···
);
});
+
describe('non-capturing shorthand', () => {
+
const node = match('node')`${/1/} :${/2/}+`;
+
it.each`
+
input | result | lastIndex
+
${'12'} | ${['1']} | ${2}
+
${'122'} | ${['1']} | ${3}
+
${'13'} | ${undefined} | ${0}
+
${'1'} | ${undefined} | ${0}
+
${'_'} | ${undefined} | ${0}
+
`(
+
'should return $result when $input is passed',
+
({ input, result, lastIndex }) => {
+
expectToParse(node, input, result, lastIndex);
+
}
+
);
+
});
+
describe('non-capturing group with plus matcher, then required matcher', () => {
const node = match('node')`(?: ${/1/}+) ${/2/}`;
it.each`
···
);
});
+
describe('positive lookahead shorthand', () => {
+
const node = match('node')`=${/1/} ${/\d/}`;
+
it.each`
+
input | result | lastIndex
+
${'1'} | ${['1']} | ${1}
+
${'13'} | ${['1']} | ${1}
+
${'2'} | ${undefined} | ${0}
+
${'_'} | ${undefined} | ${0}
+
`(
+
'should return $result when $input is passed',
+
({ input, result, lastIndex }) => {
+
expectToParse(node, input, result, lastIndex);
+
}
+
);
+
});
+
describe('positive lookahead group with plus matcher', () => {
const node = match('node')`(?= ${/1/}+) ${/\d/}`;
it.each`
···
describe('negative lookahead group', () => {
const node = match('node')`(?! ${/1/}) ${/\d/}`;
+
it.each`
+
input | result | lastIndex
+
${'2'} | ${['2']} | ${1}
+
${'23'} | ${['2']} | ${1}
+
${'1'} | ${undefined} | ${0}
+
${'1'} | ${undefined} | ${0}
+
${'_'} | ${undefined} | ${0}
+
`(
+
'should return $result when $input is passed',
+
({ input, result, lastIndex }) => {
+
expectToParse(node, input, result, lastIndex);
+
}
+
);
+
});
+
+
describe('negative lookahead shorthand', () => {
+
const node = match('node')`!${/1/} ${/\d/}`;
it.each`
input | result | lastIndex
${'2'} | ${['2']} | ${1}
+14 -7
src/parser.js
···
let currentGroup = null;
let lastMatch;
let currentSequence = rootSequence;
+
let capture;
for (
let quasiIndex = 0, stackIndex = 0;
···
stackIndex++
) {
if (stackIndex % 2 !== 0) {
-
currentSequence.push({
-
expression: expressions[stackIndex++ >> 1],
-
});
+
const expression = expressions[stackIndex++ >> 1];
+
currentSequence.push({ expression, capture });
+
capture = undefined;
}
const quasi = quasis[stackIndex >> 1];
···
if (!currentSequence) syntaxError(char);
} else if (char === '(') {
sequenceStack.push(currentSequence);
-
currentSequence.push((currentGroup = { sequence: [] }));
+
currentSequence.push((currentGroup = { sequence: [], capture }));
currentSequence = currentGroup.sequence;
+
capture = undefined;
+
} else if (char === ':' || char === '=' || char === '!') {
+
capture = char;
+
const nextChar = quasi[quasiIndex];
+
if (quasi[quasiIndex] && quasi[quasiIndex] !== '(') syntaxError(char);
} else if (char === '?' && !currentSequence.length && currentGroup) {
-
const nextChar = quasi[quasiIndex++];
-
if (nextChar === ':' || nextChar === '=' || nextChar === '!') {
-
currentGroup.capture = nextChar;
+
capture = quasi[quasiIndex++];
+
if (capture === ':' || capture === '=' || capture === '!') {
+
currentGroup.capture = capture;
+
capture = undefined;
} else {
syntaxError(char);
}
+62 -15
src/parser.test.js
···
expect(ast).toHaveProperty('0.sequence.0.quantifier', undefined);
});
-
it('supports non-capturing groups', () => {
-
const ast = parseTag`(?: ${1})`;
-
expect(ast).toHaveProperty('length', 1);
-
expect(ast).toHaveProperty('0.capture', ':');
-
expect(ast).toHaveProperty('0.sequence.length', 1);
+
describe('non-capturing syntax', () => {
+
it('supports regex-like syntax', () => {
+
const ast = parseTag`(?: ${1})`;
+
expect(ast).toHaveProperty('length', 1);
+
expect(ast).toHaveProperty('0.capture', ':');
+
expect(ast).toHaveProperty('0.sequence.length', 1);
+
});
+
+
it('supports shorthand', () => {
+
let ast = parseTag`:${1}`;
+
expect(ast).toHaveProperty('length', 1);
+
expect(ast).toHaveProperty('0.capture', ':');
+
expect(ast).toHaveProperty('0.expression', 1);
+
ast = parseTag`:(${1})`;
+
expect(ast).toHaveProperty('length', 1);
+
expect(ast).toHaveProperty('0.capture', ':');
+
expect(ast).toHaveProperty('0.sequence.length', 1);
+
});
+
+
it('fails on invalid usage', () => {
+
expect(() => parseTag`${1} : ${2}`).toThrow();
+
expect(() => parseTag`${1} :|${2}`).toThrow();
+
});
});
-
it('supports positive lookahead groups', () => {
-
const ast = parseTag`(?= ${1})`;
-
expect(ast).toHaveProperty('length', 1);
-
expect(ast).toHaveProperty('0.capture', '=');
-
expect(ast).toHaveProperty('0.sequence.length', 1);
+
describe('positive lookaheads syntax', () => {
+
it('supports regex-like syntax', () => {
+
const ast = parseTag`(?= ${1})`;
+
expect(ast).toHaveProperty('length', 1);
+
expect(ast).toHaveProperty('0.capture', '=');
+
expect(ast).toHaveProperty('0.sequence.length', 1);
+
});
+
+
it('supports shorthand', () => {
+
let ast = parseTag`=${1}`;
+
expect(ast).toHaveProperty('length', 1);
+
expect(ast).toHaveProperty('0.capture', '=');
+
expect(ast).toHaveProperty('0.expression', 1);
+
ast = parseTag`=(${1})`;
+
expect(ast).toHaveProperty('length', 1);
+
expect(ast).toHaveProperty('0.capture', '=');
+
expect(ast).toHaveProperty('0.sequence.length', 1);
+
});
});
-
it('supports negative lookahead groups', () => {
-
const ast = parseTag`(?! ${1})`;
-
expect(ast).toHaveProperty('length', 1);
-
expect(ast).toHaveProperty('0.capture', '!');
-
expect(ast).toHaveProperty('0.sequence.length', 1);
+
describe('negative lookaheads syntax', () => {
+
it('supports regex-like syntax', () => {
+
const ast = parseTag`(?! ${1})`;
+
expect(ast).toHaveProperty('length', 1);
+
expect(ast).toHaveProperty('0.capture', '!');
+
expect(ast).toHaveProperty('0.sequence.length', 1);
+
});
+
+
it('supports shorthand', () => {
+
let ast = parseTag`!${1}`;
+
expect(ast).toHaveProperty('length', 1);
+
expect(ast).toHaveProperty('0.capture', '!');
+
expect(ast).toHaveProperty('0.expression', 1);
+
ast = parseTag`!(${1})`;
+
expect(ast).toHaveProperty('length', 1);
+
expect(ast).toHaveProperty('0.capture', '!');
+
expect(ast).toHaveProperty('0.sequence.length', 1);
+
});
});
it('supports groups with alternates', () => {
expect(parseTag`(${1} | ${2}) ${3}`).toMatchInlineSnapshot(`
Array [
Object {
+
"capture": undefined,
"sequence": Array [
Object {
+
"capture": undefined,
"expression": 1,
},
],
},
Object {
+
"capture": undefined,
"expression": 3,
},
]