function moveNext(scope, t) {
if (t == "\n") {
scope.currentLine++;
scope.currentChar = 1;
} else {
scope.currentChar++;
}
}
function match(d, t) {
return d.regex ? d.t.test(t) : d.t.startsWith(t);
}
function hasMatch(scope, m) {
const def = m.def;
m.line = scope.currentLine;
m.char = scope.currentChar;
if (def.filter) return;
if (def.startScope) {
var p = scope.current;
var n = [];
n.parent = p;
n.scope = def.startScope;
n.push(m);
p.push(n);
scope.current = n;
} else if (def.endScope && scope.current.scope === def.endScope) {
scope.current.push(m);
if (scope.current.parent) {
scope.current = scope.current.parent;
}
} else {
scope.current.push(m);
}
}
function noMatch(scope, t) {
scope.current.push({
def: { type: 'UNKNOWN' },
value: t,
line: scope.currentLine,
char: scope.currentChar
});
}
// t = current text, c = next character
function parseToken(scope, t, c, tc) {
moveNext(scope, t);
let currentMatch = null;
for (const d of scope.defs) {
// next character matches also? token not complete: return
if (c !== '' && match(d, tc)) {
return;
}
// test if token matches definition
if (match(d, t)) {
currentMatch = { def: d, value: t };
break;
}
}
if (currentMatch) {
hasMatch(scope, currentMatch);
} else {
noMatch(scope, t);
}
scope.break = true;
}
function createScope(gram) {
let scope = {
defs: [],
root: [],
break: false,
currentLine: 1,
currentChar: 1
};
scope.current = scope.root;
gram.defs.forEach((def) => {
let t = def.t;
if (!t) return;
if (!Array.isArray(t)) { t = [t]; }
for (let i = 0; i < t.length; i += 1) {
let token = def.regex ? new RegExp('^' + t[i] + '$') : t[i];
scope.defs.push({ type: def.type, regex: def.regex, t: token, filter: def.filter, startScope: def.startScope, endScope: def.endScope });
}
});
return scope;
}
function lex(scope, input) {
let l = 0, r = 1;
while (r <= input.length) {
let t = input.slice(l, r);
let c = input.slice(r, r + 1);
parseToken(scope, t, c, t + c);
if (scope.break) {
scope.break = false;
l = r;
}
r += 1;
}
return scope.root;
}
const grammar = {
name: 'mangoGrammar',
defs: [
{
type: 'brace-start',
t: '(',
startScope: 'brace'
},
{
type: 'brace-end',
t: ')',
endScope: 'brace'
},
{
type: 'name',
t: '[_a-zA-Z][_a-zA-Z0-9]*',
regex: true
},
{
type: 'compare-operator',
t: ['==', '!=']
},
{
type: 'combine-operator',
t: ['&&', '||']
},
{
type: 'number',
t: '[0-9]+(\\.?[0-9]*)',
regex: true
},
{
type: 'whitespace',
t: '[^\\S\\r\\n]+',
regex: true,
filter: true
},
{
type: 'string',
//t: '"([^"]*)"?',
t: '"([^"\\\\]|\\\\.|\\\\)*"?',
regex: true
}
]
};
const input = "(category == 1.456123 && name = \"a b \\\"c\\\" \\r\\n d\\\"\" && bookNumber != 1) || bookNumber == 66";
const scope = createScope(grammar);
const result = lex(scope, input);
const mapVal = (m) => {
if (Array.isArray(m)) {
return m.map(mapVal);
}
return [m.line, m.char, m.value, m.def.type];
};
console.log(input);
console.log(result.map(mapVal));
// [ [ [ 1, 2, '(', 'brace-start' ],
// [ 1, 10, 'category', 'name' ],
// [ 1, 13, '==', 'compare-operator' ]
// [ 1, 22, '1.456123', 'number' ],
// [ 1, 25, '&&', 'combine-operator' ]
// [ 1, 30, 'name', 'name' ],
// [ 1, 32, '=', 'compare-operator' ],
// [ 1, 42, '"a b c d"', 'string' ],
// [ 1, 45, '&&', 'combine-operator' ]
// [ 1, 56, 'bookNumber', 'name' ],
// [ 1, 59, '!=', 'compare-operator' ]
// [ 1, 61, '1', 'number' ],
// [ 1, 62, ')', 'brace-end' ] ],
// [ 1, 65, '||', 'combine-operator' ],
// [ 1, 76, 'bookNumber', 'name' ],
// [ 1, 79, '==', 'compare-operator' ],
// [ 1, 82, '66', 'number' ] ]
251500cookie-checkSimple javascript parser