converted lexer from function to closure

This commit is contained in:
Misko Hevery 2010-08-18 17:50:21 -07:00
parent 9632c99b0a
commit 625f32b7eb
2 changed files with 124 additions and 154 deletions

View file

@ -1,11 +1,3 @@
function Lexer(text, parsStrings){
this.text = text;
// UTC dates have 20 characters, we send them through parser
this.dateParseLength = parsStrings ? 20 : -1;
this.tokens = [];
this.index = 0;
}
OPERATORS = {
'null':function(self){return _null;},
'true':function(self){return true;},
@ -33,143 +25,133 @@ OPERATORS = {
};
ESCAPE = {"n":"\n", "f":"\f", "r":"\r", "t":"\t", "v":"\v", "'":"'", '"':'"'};
Lexer.prototype = {
peek: function() {
if (this.index + 1 < this.text.length) {
return this.text.charAt(this.index + 1);
} else {
return false;
}
},
function lex(text, parseStrings){
var dateParseLength = parseStrings ? 20 : -1,
tokens = [],
index = 0,
canStartRegExp = true;
parse: function() {
var tokens = this.tokens;
var canStartRegExp = true;
while (this.index < this.text.length) {
var ch = this.text.charAt(this.index);
if (ch == '"' || ch == "'") {
this.readString(ch);
canStartRegExp = true;
} else if (ch == '(' || ch == '[') {
tokens.push({index:this.index, text:ch});
this.index++;
} else if (ch == '{' ) {
var peekCh = this.peek();
if (peekCh == ':' || peekCh == '(') {
tokens.push({index:this.index, text:ch + peekCh});
this.index++;
} else {
tokens.push({index:this.index, text:ch});
}
this.index++;
canStartRegExp = true;
} else if (ch == ')' || ch == ']' || ch == '}' ) {
tokens.push({index:this.index, text:ch});
this.index++;
canStartRegExp = false;
} else if ( ch == ':' || ch == '.' || ch == ',' || ch == ';') {
tokens.push({index:this.index, text:ch});
this.index++;
canStartRegExp = true;
} else if ( canStartRegExp && ch == '/' ) {
this.readRegexp();
canStartRegExp = false;
} else if ( this.isNumber(ch) ) {
this.readNumber();
canStartRegExp = false;
} else if (this.isIdent(ch)) {
this.readIdent();
canStartRegExp = false;
} else if (this.isWhitespace(ch)) {
this.index++;
while (index < text.length) {
var ch = text.charAt(index);
if (ch == '"' || ch == "'") {
readString(ch);
canStartRegExp = true;
} else if (ch == '(' || ch == '[') {
tokens.push({index:index, text:ch});
index++;
} else if (ch == '{' ) {
var peekCh = peek();
if (peekCh == ':' || peekCh == '(') {
tokens.push({index:index, text:ch + peekCh});
index++;
} else {
var ch2 = ch + this.peek();
var fn = OPERATORS[ch];
var fn2 = OPERATORS[ch2];
if (fn2) {
tokens.push({index:this.index, text:ch2, fn:fn2});
this.index += 2;
} else if (fn) {
tokens.push({index:this.index, text:ch, fn:fn});
this.index += 1;
} else {
throw "Lexer Error: Unexpected next character [" +
this.text.substring(this.index) +
"] in expression '" + this.text +
"' at column '" + (this.index+1) + "'.";
}
canStartRegExp = true;
tokens.push({index:index, text:ch});
}
index++;
canStartRegExp = true;
} else if (ch == ')' || ch == ']' || ch == '}' ) {
tokens.push({index:index, text:ch});
index++;
canStartRegExp = false;
} else if ( ch == ':' || ch == '.' || ch == ',' || ch == ';') {
tokens.push({index:index, text:ch});
index++;
canStartRegExp = true;
} else if ( canStartRegExp && ch == '/' ) {
readRegexp();
canStartRegExp = false;
} else if ( isNumber(ch) ) {
readNumber();
canStartRegExp = false;
} else if (isIdent(ch)) {
readIdent();
canStartRegExp = false;
} else if (isWhitespace(ch)) {
index++;
} else {
var ch2 = ch + peek(),
fn = OPERATORS[ch],
fn2 = OPERATORS[ch2];
if (fn2) {
tokens.push({index:index, text:ch2, fn:fn2});
index += 2;
} else if (fn) {
tokens.push({index:index, text:ch, fn:fn});
index += 1;
} else {
throw "Lexer Error: Unexpected next character [" +
text.substring(index) +
"] in expression '" + text +
"' at column '" + (index+1) + "'.";
}
canStartRegExp = true;
}
return tokens;
},
}
return tokens;
isNumber: function(ch) {
function peek() {
return index + 1 < text.length ? text.charAt(index + 1) : false;
}
function isNumber(ch) {
return '0' <= ch && ch <= '9';
},
isWhitespace: function(ch) {
}
function isWhitespace(ch) {
return ch == ' ' || ch == '\r' || ch == '\t' ||
ch == '\n' || ch == '\v';
},
isIdent: function(ch) {
}
function isIdent(ch) {
return 'a' <= ch && ch <= 'z' ||
'A' <= ch && ch <= 'Z' ||
'_' == ch || ch == '$';
},
readNumber: function() {
}
function readNumber() {
var number = "";
var start = this.index;
while (this.index < this.text.length) {
var ch = this.text.charAt(this.index);
if (ch == '.' || this.isNumber(ch)) {
var start = index;
while (index < text.length) {
var ch = text.charAt(index);
if (ch == '.' || isNumber(ch)) {
number += ch;
} else {
break;
}
this.index++;
index++;
}
number = 1 * number;
this.tokens.push({index:start, text:number,
tokens.push({index:start, text:number,
fn:function(){return number;}});
},
readIdent: function() {
}
function readIdent() {
var ident = "";
var start = this.index;
while (this.index < this.text.length) {
var ch = this.text.charAt(this.index);
if (ch == '.' || this.isIdent(ch) || this.isNumber(ch)) {
var start = index;
while (index < text.length) {
var ch = text.charAt(index);
if (ch == '.' || isIdent(ch) || isNumber(ch)) {
ident += ch;
} else {
break;
}
this.index++;
index++;
}
var fn = OPERATORS[ident];
if (!fn) {
fn = getterFn(ident);
fn.isAssignable = ident;
}
this.tokens.push({index:start, text:ident, fn:fn});
},
readString: function(quote) {
var start = this.index;
var dateParseLength = this.dateParseLength;
this.index++;
tokens.push({index:start, text:ident, fn:fn});
}
function readString(quote) {
var start = index;
index++;
var string = "";
var rawString = quote;
var escape = false;
while (this.index < this.text.length) {
var ch = this.text.charAt(this.index);
while (index < text.length) {
var ch = text.charAt(index);
rawString += ch;
if (escape) {
if (ch == 'u') {
var hex = this.text.substring(this.index + 1, this.index + 5);
this.index += 4;
var hex = text.substring(index + 1, index + 5);
index += 4;
string += String.fromCharCode(parseInt(hex, 16));
} else {
var rep = ESCAPE[ch];
@ -183,8 +165,8 @@ Lexer.prototype = {
} else if (ch == '\\') {
escape = true;
} else if (ch == quote) {
this.index++;
this.tokens.push({index:start, text:rawString, string:string,
index++;
tokens.push({index:start, text:rawString, string:string,
fn:function(){
return (string.length == dateParseLength) ?
angular['String']['toDate'](string) : string;
@ -193,20 +175,19 @@ Lexer.prototype = {
} else {
string += ch;
}
this.index++;
index++;
}
throw "Lexer Error: Unterminated quote [" +
this.text.substring(start) + "] starting at column '" +
(start+1) + "' in expression '" + this.text + "'.";
},
readRegexp: function(quote) {
var start = this.index;
this.index++;
text.substring(start) + "] starting at column '" +
(start+1) + "' in expression '" + text + "'.";
}
function readRegexp(quote) {
var start = index;
index++;
var regexp = "";
var escape = false;
while (this.index < this.text.length) {
var ch = this.text.charAt(this.index);
while (index < text.length) {
var ch = text.charAt(index);
if (escape) {
regexp += ch;
escape = false;
@ -214,32 +195,32 @@ Lexer.prototype = {
regexp += ch;
escape = true;
} else if (ch === '/') {
this.index++;
index++;
var flags = "";
if (this.isIdent(this.text.charAt(this.index))) {
this.readIdent();
flags = this.tokens.pop().text;
if (isIdent(text.charAt(index))) {
readIdent();
flags = tokens.pop().text;
}
var compiledRegexp = new RegExp(regexp, flags);
this.tokens.push({index:start, text:regexp, flags:flags,
tokens.push({index:start, text:regexp, flags:flags,
fn:function(){return compiledRegexp;}});
return;
} else {
regexp += ch;
}
this.index++;
index++;
}
throw "Lexer Error: Unterminated RegExp [" +
this.text.substring(start) + "] starting at column '" +
(start+1) + "' in expression '" + this.text + "'.";
text.substring(start) + "] starting at column '" +
(start+1) + "' in expression '" + text + "'.";
}
};
}
/////////////////////////////////////////
function Parser(text, parseStrings){
this.text = text;
this.tokens = new Lexer(text, parseStrings).parse();
this.tokens = lex(text, parseStrings);
this.index = 0;
}

View file

@ -1,8 +1,7 @@
LexerTest = TestCase('LexerTest');
LexerTest.prototype.testTokenizeAString = function(){
var lexer = new Lexer("a.bc[22]+1.3|f:'a\\\'c':\"d\\\"e\"");
var tokens = lexer.parse();
var tokens = lex("a.bc[22]+1.3|f:'a\\\'c':\"d\\\"e\"");
var i = 0;
assertEquals(tokens[i].index, 0);
assertEquals(tokens[i].text, 'a.bc');
@ -53,8 +52,7 @@ LexerTest.prototype.testTokenizeAString = function(){
};
LexerTest.prototype.testTokenizeUndefined = function(){
var lexer = new Lexer("undefined");
var tokens = lexer.parse();
var tokens = lex("undefined");
var i = 0;
assertEquals(tokens[i].index, 0);
assertEquals(tokens[i].text, 'undefined');
@ -64,8 +62,7 @@ LexerTest.prototype.testTokenizeUndefined = function(){
LexerTest.prototype.testTokenizeRegExp = function(){
var lexer = new Lexer("/r 1/");
var tokens = lexer.parse();
var tokens = lex("/r 1/");
var i = 0;
assertEquals(tokens[i].index, 0);
assertEquals(tokens[i].text, 'r 1');
@ -74,8 +71,7 @@ LexerTest.prototype.testTokenizeRegExp = function(){
LexerTest.prototype.testQuotedString = function(){
var str = "['\\'', \"\\\"\"]";
var lexer = new Lexer(str);
var tokens = lexer.parse();
var tokens = lex(str);
assertEquals(1, tokens[1].index);
assertEquals("'", tokens[1].string);
@ -87,22 +83,19 @@ LexerTest.prototype.testQuotedString = function(){
LexerTest.prototype.testQuotedStringEscape = function(){
var str = '"\\"\\n\\f\\r\\t\\v\\u00A0"';
var lexer = new Lexer(str);
var tokens = lexer.parse();
var tokens = lex(str);
assertEquals('"\n\f\r\t\v\u00A0', tokens[0].string);
};
LexerTest.prototype.testTokenizeUnicode = function(){
var lexer = new Lexer('"\\u00A0"');
var tokens = lexer.parse();
var tokens = lex('"\\u00A0"');
assertEquals(1, tokens.length);
assertEquals('\u00a0', tokens[0].string);
};
LexerTest.prototype.testTokenizeRegExpWithOptions = function(){
var lexer = new Lexer("/r/g");
var tokens = lexer.parse();
var tokens = lex("/r/g");
var i = 0;
assertEquals(tokens[i].index, 0);
assertEquals(tokens[i].text, 'r');
@ -111,8 +104,7 @@ LexerTest.prototype.testTokenizeRegExpWithOptions = function(){
};
LexerTest.prototype.testTokenizeRegExpWithEscape = function(){
var lexer = new Lexer("/\\/\\d/");
var tokens = lexer.parse();
var tokens = lex("/\\/\\d/");
var i = 0;
assertEquals(tokens[i].index, 0);
assertEquals(tokens[i].text, '\\/\\d');
@ -120,15 +112,13 @@ LexerTest.prototype.testTokenizeRegExpWithEscape = function(){
};
LexerTest.prototype.testIgnoreWhitespace = function(){
var lexer = new Lexer("a \t \n \r b");
var tokens = lexer.parse();
var tokens = lex("a \t \n \r b");
assertEquals(tokens[0].text, 'a');
assertEquals(tokens[1].text, 'b');
};
LexerTest.prototype.testRelation = function(){
var lexer = new Lexer("! == != < > <= >=");
var tokens = lexer.parse();
var tokens = lex("! == != < > <= >=");
assertEquals(tokens[0].text, '!');
assertEquals(tokens[1].text, '==');
assertEquals(tokens[2].text, '!=');
@ -139,8 +129,7 @@ LexerTest.prototype.testRelation = function(){
};
LexerTest.prototype.testStatements = function(){
var lexer = new Lexer("a;b;");
var tokens = lexer.parse();
var tokens = lex("a;b;");
assertEquals(tokens[0].text, 'a');
assertEquals(tokens[1].text, ';');
assertEquals(tokens[2].text, 'b');
@ -148,7 +137,7 @@ LexerTest.prototype.testStatements = function(){
};
LexerTest.prototype.testNumber = function(){
var tokens = new Lexer("0.5").parse();
var tokens = lex("0.5");
expect(tokens[0].text).toEqual(0.5);
};