function moveNext(scope, t) { if (t == "\n") { scope.currentLine++; scope.currentChar = 1; } else { scope.currentChar++; } } function match(d, t) { return d.regex ? d.t.test(t) : d.t.startsWith(t); } function hasMatch(scope, m) { const def = m.def; m.line = scope.currentLine; m.char = scope.currentChar; if (def.filter) return; if (def.startScope) { var p = scope.current; var n = []; n.parent = p; n.scope = def.startScope; n.push(m); p.push(n); scope.current = n; } else if (def.endScope && scope.current.scope === def.endScope) { scope.current.push(m); if (scope.current.parent) { scope.current = scope.current.parent; } } else { scope.current.push(m); } } function noMatch(scope, t) { scope.current.push({ def: { type: 'UNKNOWN' }, value: t, line: scope.currentLine, char: scope.currentChar }); } // t = current text, c = next character function parseToken(scope, t, c, tc) { moveNext(scope, t); let currentMatch = null; for (const d of scope.defs) { // next character matches also? token not complete: return if (c !== '' && match(d, tc)) { return; } // test if token matches definition if (match(d, t)) { currentMatch = { def: d, value: t }; break; } } if (currentMatch) { hasMatch(scope, currentMatch); } else { noMatch(scope, t); } scope.break = true; } function createScope(gram) { let scope = { defs: [], root: [], break: false, currentLine: 1, currentChar: 1 }; scope.current = scope.root; gram.defs.forEach((def) => { let t = def.t; if (!t) return; if (!Array.isArray(t)) { t = [t]; } for (let i = 0; i < t.length; i += 1) { let token = def.regex ? new RegExp('^' + t[i] + '$') : t[i]; scope.defs.push({ type: def.type, regex: def.regex, t: token, filter: def.filter, startScope: def.startScope, endScope: def.endScope }); } }); return scope; } function lex(scope, input) { let l = 0, r = 1; while (r <= input.length) { let t = input.slice(l, r); let c = input.slice(r, r + 1); parseToken(scope, t, c, t + c); if (scope.break) { scope.break = false; l = r; } r += 1; } return scope.root; } const grammar = { name: 'mangoGrammar', defs: [ { type: 'brace-start', t: '(', startScope: 'brace' }, { type: 'brace-end', t: ')', endScope: 'brace' }, { type: 'name', t: '[_a-zA-Z][_a-zA-Z0-9]*', regex: true }, { type: 'compare-operator', t: ['==', '!='] }, { type: 'combine-operator', t: ['&&', '||'] }, { type: 'number', t: '[0-9]+(\\.?[0-9]*)', regex: true }, { type: 'whitespace', t: '[^\\S\\r\\n]+', regex: true, filter: true }, { type: 'string', //t: '"([^"]*)"?', t: '"([^"\\\\]|\\\\.|\\\\)*"?', regex: true } ] }; const input = "(category == 1.456123 && name = \"a b \\\"c\\\" \\r\\n d\\\"\" && bookNumber != 1) || bookNumber == 66"; const scope = createScope(grammar); const result = lex(scope, input); const mapVal = (m) => { if (Array.isArray(m)) { return m.map(mapVal); } return [m.line, m.char, m.value, m.def.type]; }; console.log(input); console.log(result.map(mapVal)); // [ [ [ 1, 2, '(', 'brace-start' ], // [ 1, 10, 'category', 'name' ], // [ 1, 13, '==', 'compare-operator' ] // [ 1, 22, '1.456123', 'number' ], // [ 1, 25, '&&', 'combine-operator' ] // [ 1, 30, 'name', 'name' ], // [ 1, 32, '=', 'compare-operator' ], // [ 1, 42, '"a b c d"', 'string' ], // [ 1, 45, '&&', 'combine-operator' ] // [ 1, 56, 'bookNumber', 'name' ], // [ 1, 59, '!=', 'compare-operator' ] // [ 1, 61, '1', 'number' ], // [ 1, 62, ')', 'brace-end' ] ], // [ 1, 65, '||', 'combine-operator' ], // [ 1, 76, 'bookNumber', 'name' ], // [ 1, 79, '==', 'compare-operator' ], // [ 1, 82, '66', 'number' ] ]
251500cookie-checkSimple javascript parser