190 lines
4.5 KiB
JavaScript
190 lines
4.5 KiB
JavaScript
|
'use strict';
|
||
|
|
||
|
var Lexer = exports.Lexer = function() {
|
||
|
this.pos = 0;
|
||
|
this.buf = null;
|
||
|
this.buflen = 0;
|
||
|
|
||
|
// Operator table, mapping operator -> token name
|
||
|
this.optable = {
|
||
|
'+': 'PLUS',
|
||
|
'-': 'MINUS',
|
||
|
'*': 'MULTIPLY',
|
||
|
'.': 'PERIOD',
|
||
|
'\\': 'BACKSLASH',
|
||
|
':': 'COLON',
|
||
|
'%': 'PERCENT',
|
||
|
'|': 'PIPE',
|
||
|
'!': 'EXCLAMATION',
|
||
|
'?': 'QUESTION',
|
||
|
'#': 'POUND',
|
||
|
'&': 'AMPERSAND',
|
||
|
';': 'SEMI',
|
||
|
',': 'COMMA',
|
||
|
'(': 'L_PAREN',
|
||
|
')': 'R_PAREN',
|
||
|
'<': 'L_ANG',
|
||
|
'>': 'R_ANG',
|
||
|
'{': 'L_BRACE',
|
||
|
'}': 'R_BRACE',
|
||
|
'[': 'L_BRACKET',
|
||
|
']': 'R_BRACKET',
|
||
|
'=': 'EQUALS'
|
||
|
};
|
||
|
}
|
||
|
|
||
|
// Initialize the Lexer's buffer. This resets the lexer's internal
|
||
|
// state and subsequent tokens will be returned starting with the
|
||
|
// beginning of the new buffer.
|
||
|
Lexer.prototype.input = function(buf) {
|
||
|
this.pos = 0;
|
||
|
this.buf = buf;
|
||
|
this.buflen = buf.length;
|
||
|
}
|
||
|
|
||
|
// Get the next token from the current buffer. A token is an object with
|
||
|
// the following properties:
|
||
|
// - name: name of the pattern that this token matched (taken from rules).
|
||
|
// - value: actual string value of the token.
|
||
|
// - pos: offset in the current buffer where the token starts.
|
||
|
//
|
||
|
// If there are no more tokens in the buffer, returns null. In case of
|
||
|
// an error throws Error.
|
||
|
Lexer.prototype.token = function() {
|
||
|
this._skipnontokens();
|
||
|
if (this.pos >= this.buflen) {
|
||
|
return null;
|
||
|
}
|
||
|
|
||
|
// The char at this.pos is part of a real token. Figure out which.
|
||
|
var c = this.buf.charAt(this.pos);
|
||
|
|
||
|
// '/' is treated specially, because it starts a comment if followed by
|
||
|
// another '/'. If not followed by another '/', it's the DIVIDE
|
||
|
// operator.
|
||
|
if (c === '/') {
|
||
|
var next_c = this.buf.charAt(this.pos + 1);
|
||
|
if (next_c === '/') {
|
||
|
return this._process_comment();
|
||
|
} else {
|
||
|
return {name: 'DIVIDE', value: '/', pos: this.pos++};
|
||
|
}
|
||
|
} else {
|
||
|
// Look it up in the table of operators
|
||
|
var op = this.optable[c];
|
||
|
if (op !== undefined) {
|
||
|
return {name: op, value: c, pos: this.pos++};
|
||
|
} else {
|
||
|
// Not an operator - so it's the beginning of another token.
|
||
|
if (Lexer._isalpha(c)) {
|
||
|
return this._process_identifier();
|
||
|
} else if (Lexer._isdigit(c)) {
|
||
|
return this._process_number();
|
||
|
} else if (c === '"') {
|
||
|
return this._process_quote();
|
||
|
} else {
|
||
|
throw Error('Token error at ' + this.pos);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
Lexer._isnewline = function(c) {
|
||
|
return c === '\r' || c === '\n';
|
||
|
}
|
||
|
|
||
|
Lexer._isdigit = function(c) {
|
||
|
return c >= '0' && c <= '9';
|
||
|
}
|
||
|
|
||
|
Lexer._isalpha = function(c) {
|
||
|
return (c >= 'a' && c <= 'z') ||
|
||
|
(c >= 'A' && c <= 'Z') ||
|
||
|
c === '_' || c === '$';
|
||
|
}
|
||
|
|
||
|
Lexer._isalphanum = function(c) {
|
||
|
return (c >= 'a' && c <= 'z') ||
|
||
|
(c >= 'A' && c <= 'Z') ||
|
||
|
(c >= '0' && c <= '9') ||
|
||
|
c === '_' || c === '$';
|
||
|
}
|
||
|
|
||
|
Lexer.prototype._process_number = function() {
|
||
|
var endpos = this.pos + 1;
|
||
|
while (endpos < this.buflen &&
|
||
|
Lexer._isdigit(this.buf.charAt(endpos))) {
|
||
|
endpos++;
|
||
|
}
|
||
|
|
||
|
var tok = {
|
||
|
name: 'NUMBER',
|
||
|
value: this.buf.substring(this.pos, endpos),
|
||
|
pos: this.pos
|
||
|
};
|
||
|
this.pos = endpos;
|
||
|
return tok;
|
||
|
}
|
||
|
|
||
|
Lexer.prototype._process_comment = function() {
|
||
|
var endpos = this.pos + 2;
|
||
|
// Skip until the end of the line
|
||
|
var c = this.buf.charAt(this.pos + 2);
|
||
|
while (endpos < this.buflen &&
|
||
|
!Lexer._isnewline(this.buf.charAt(endpos))) {
|
||
|
endpos++;
|
||
|
}
|
||
|
|
||
|
var tok = {
|
||
|
name: 'COMMENT',
|
||
|
value: this.buf.substring(this.pos, endpos),
|
||
|
pos: this.pos
|
||
|
};
|
||
|
this.pos = endpos + 1;
|
||
|
return tok;
|
||
|
}
|
||
|
|
||
|
Lexer.prototype._process_identifier = function() {
|
||
|
var endpos = this.pos + 1;
|
||
|
while (endpos < this.buflen &&
|
||
|
Lexer._isalphanum(this.buf.charAt(endpos))) {
|
||
|
endpos++;
|
||
|
}
|
||
|
|
||
|
var tok = {
|
||
|
name: 'IDENTIFIER',
|
||
|
value: this.buf.substring(this.pos, endpos),
|
||
|
pos: this.pos
|
||
|
};
|
||
|
this.pos = endpos;
|
||
|
return tok;
|
||
|
}
|
||
|
|
||
|
Lexer.prototype._process_quote = function() {
|
||
|
// this.pos points at the opening quote. Find the ending quote.
|
||
|
var end_index = this.buf.indexOf('"', this.pos + 1);
|
||
|
|
||
|
if (end_index === -1) {
|
||
|
throw Error('Unterminated quote at ' + this.pos);
|
||
|
} else {
|
||
|
var tok = {
|
||
|
name: 'QUOTE',
|
||
|
value: this.buf.substring(this.pos, end_index + 1),
|
||
|
pos: this.pos
|
||
|
};
|
||
|
this.pos = end_index + 1;
|
||
|
return tok;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
Lexer.prototype._skipnontokens = function() {
|
||
|
while (this.pos < this.buflen) {
|
||
|
var c = this.buf.charAt(this.pos);
|
||
|
if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
|
||
|
this.pos++;
|
||
|
} else {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|