00001
00002
00003
00004 #include "sqpcheader.h"
00005 #include <ctype.h>
00006 #include <stdlib.h>
00007 #include "sqtable.h"
00008 #include "sqstring.h"
00009 #include "sqcompiler.h"
00010 #include "sqlexer.h"
00011
00012 #define CUR_CHAR (_currdata)
00013 #define RETURN_TOKEN(t) { _prevtoken = _curtoken; _curtoken = t; return t;}
00014 #define IS_EOB() (CUR_CHAR <= SQUIRREL_EOB)
00015 #define NEXT() {Next();_currentcolumn++;}
00016 #define INIT_TEMP_STRING() { _longstr.resize(0);}
00017 #define APPEND_CHAR(c) { _longstr.push_back(c);}
00018 #define TERMINATE_BUFFER() {_longstr.push_back(_SC('\0'));}
00019 #define ADD_KEYWORD(key,id) _keywords->NewSlot( SQString::Create(ss, _SC(#key)) ,SQInteger(id))
00020
00021 SQLexer::SQLexer(){}
00022 SQLexer::~SQLexer()
00023 {
00024 _keywords->Release();
00025 }
00026
00027 void SQLexer::Init(SQSharedState *ss, SQLEXREADFUNC rg, SQUserPointer up,CompilerErrorFunc efunc,void *ed)
00028 {
00029 _errfunc = efunc;
00030 _errtarget = ed;
00031 _sharedstate = ss;
00032 _keywords = SQTable::Create(ss, 26);
00033 ADD_KEYWORD(while, TK_WHILE);
00034 ADD_KEYWORD(do, TK_DO);
00035 ADD_KEYWORD(if, TK_IF);
00036 ADD_KEYWORD(else, TK_ELSE);
00037 ADD_KEYWORD(break, TK_BREAK);
00038 ADD_KEYWORD(continue, TK_CONTINUE);
00039 ADD_KEYWORD(return, TK_RETURN);
00040 ADD_KEYWORD(null, TK_NULL);
00041 ADD_KEYWORD(function, TK_FUNCTION);
00042 ADD_KEYWORD(local, TK_LOCAL);
00043 ADD_KEYWORD(for, TK_FOR);
00044 ADD_KEYWORD(foreach, TK_FOREACH);
00045 ADD_KEYWORD(in, TK_IN);
00046 ADD_KEYWORD(typeof, TK_TYPEOF);
00047 ADD_KEYWORD(delegate, TK_DELEGATE);
00048 ADD_KEYWORD(delete, TK_DELETE);
00049 ADD_KEYWORD(try, TK_TRY);
00050 ADD_KEYWORD(catch, TK_CATCH);
00051 ADD_KEYWORD(throw, TK_THROW);
00052 ADD_KEYWORD(clone, TK_CLONE);
00053 ADD_KEYWORD(yield, TK_YIELD);
00054 ADD_KEYWORD(resume, TK_RESUME);
00055 ADD_KEYWORD(switch, TK_SWITCH);
00056 ADD_KEYWORD(case, TK_CASE);
00057 ADD_KEYWORD(default, TK_DEFAULT);
00058 ADD_KEYWORD(this, TK_THIS);
00059 ADD_KEYWORD(parent,TK_PARENT);
00060 ADD_KEYWORD(class,TK_CLASS);
00061 ADD_KEYWORD(extends,TK_EXTENDS);
00062 ADD_KEYWORD(constructor,TK_CONSTRUCTOR);
00063 ADD_KEYWORD(instanceof,TK_INSTANCEOF);
00064 ADD_KEYWORD(vargc,TK_VARGC);
00065 ADD_KEYWORD(vargv,TK_VARGV);
00066 ADD_KEYWORD(true,TK_TRUE);
00067 ADD_KEYWORD(false,TK_FALSE);
00068 ADD_KEYWORD(static,TK_STATIC);
00069 ADD_KEYWORD(enum,TK_ENUM);
00070 ADD_KEYWORD(const,TK_CONST);
00071
00072 _readf = rg;
00073 _up = up;
00074 _lasttokenline = _currentline = 1;
00075 _currentcolumn = 0;
00076 _prevtoken = -1;
00077 Next();
00078 }
00079
00080 void SQLexer::Error(const SQChar *err)
00081 {
00082 _errfunc(_errtarget,err);
00083 }
00084
00085 void SQLexer::Next()
00086 {
00087 SQInteger t = _readf(_up);
00088 if(t > MAX_CHAR) Error(_SC("Invalid character"));
00089 if(t != 0) {
00090 _currdata = (LexChar)t;
00091 return;
00092 }
00093 _currdata = SQUIRREL_EOB;
00094 }
00095
00096 const SQChar *SQLexer::Tok2Str(SQInteger tok)
00097 {
00098 SQObjectPtr itr, key, val;
00099 SQInteger nitr;
00100 while((nitr = _keywords->Next(false,itr, key, val)) != -1) {
00101 itr = (SQInteger)nitr;
00102 if(((SQInteger)_integer(val)) == tok)
00103 return _stringval(key);
00104 }
00105 return NULL;
00106 }
00107
00108 void SQLexer::LexBlockComment()
00109 {
00110 bool done = false;
00111 while(!done) {
00112 switch(CUR_CHAR) {
00113 case _SC('*'): { NEXT(); if(CUR_CHAR == _SC('/')) { done = true; NEXT(); }}; continue;
00114 case _SC('\n'): _currentline++; NEXT(); continue;
00115 case SQUIRREL_EOB: Error(_SC("missing \"*/\" in comment"));
00116 default: NEXT();
00117 }
00118 }
00119 }
00120
00121 SQInteger SQLexer::Lex()
00122 {
00123 _lasttokenline = _currentline;
00124 while(CUR_CHAR != SQUIRREL_EOB) {
00125 switch(CUR_CHAR){
00126 case _SC('\t'): case _SC('\r'): case _SC(' '): NEXT(); continue;
00127 case _SC('\n'):
00128 _currentline++;
00129 _prevtoken=_curtoken;
00130 _curtoken=_SC('\n');
00131 NEXT();
00132 _currentcolumn=1;
00133 continue;
00134 case _SC('/'):
00135 NEXT();
00136 switch(CUR_CHAR){
00137 case _SC('*'):
00138 NEXT();
00139 LexBlockComment();
00140 continue;
00141 case _SC('/'):
00142 do { NEXT(); } while (CUR_CHAR != _SC('\n') && (!IS_EOB()));
00143 continue;
00144 case _SC('='):
00145 NEXT();
00146 RETURN_TOKEN(TK_DIVEQ);
00147 continue;
00148 case _SC('>'):
00149 NEXT();
00150 RETURN_TOKEN(TK_ATTR_CLOSE);
00151 continue;
00152 default:
00153 RETURN_TOKEN('/');
00154 }
00155 case _SC('='):
00156 NEXT();
00157 if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') }
00158 else { NEXT(); RETURN_TOKEN(TK_EQ); }
00159 case _SC('<'):
00160 NEXT();
00161 if ( CUR_CHAR == _SC('=') ) { NEXT(); RETURN_TOKEN(TK_LE) }
00162 else if ( CUR_CHAR == _SC('-') ) { NEXT(); RETURN_TOKEN(TK_NEWSLOT); }
00163 else if ( CUR_CHAR == _SC('<') ) { NEXT(); RETURN_TOKEN(TK_SHIFTL); }
00164 else if ( CUR_CHAR == _SC('/') ) { NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); }
00165
00166 else { RETURN_TOKEN('<') }
00167 case _SC('>'):
00168 NEXT();
00169 if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);}
00170 else if(CUR_CHAR == _SC('>')){
00171 NEXT();
00172 if(CUR_CHAR == _SC('>')){
00173 NEXT();
00174 RETURN_TOKEN(TK_USHIFTR);
00175 }
00176 RETURN_TOKEN(TK_SHIFTR);
00177 }
00178 else { RETURN_TOKEN('>') }
00179 case _SC('!'):
00180 NEXT();
00181 if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('!')}
00182 else { NEXT(); RETURN_TOKEN(TK_NE); }
00183 case _SC('@'): {
00184 SQInteger stype;
00185 NEXT();
00186 if(CUR_CHAR != _SC('"'))
00187 Error(_SC("string expected"));
00188 if((stype=ReadString(_SC('"'),true))!=-1) {
00189 RETURN_TOKEN(stype);
00190 }
00191 Error(_SC("error parsing the string"));
00192 }
00193 case _SC('"'):
00194 case _SC('\''): {
00195 SQInteger stype;
00196 if((stype=ReadString(CUR_CHAR,false))!=-1){
00197 RETURN_TOKEN(stype);
00198 }
00199 Error(_SC("error parsing the string"));
00200 }
00201 case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'):
00202 case _SC(';'): case _SC(','): case _SC('?'): case _SC('^'): case _SC('~'):
00203 {SQInteger ret = CUR_CHAR;
00204 NEXT(); RETURN_TOKEN(ret); }
00205 case _SC('.'):
00206 NEXT();
00207 if (CUR_CHAR != _SC('.')){ RETURN_TOKEN('.') }
00208 NEXT();
00209 if (CUR_CHAR != _SC('.')){ Error(_SC("invalid token '..'")); }
00210 NEXT();
00211 RETURN_TOKEN(TK_VARPARAMS);
00212 case _SC('&'):
00213 NEXT();
00214 if (CUR_CHAR != _SC('&')){ RETURN_TOKEN('&') }
00215 else { NEXT(); RETURN_TOKEN(TK_AND); }
00216 case _SC('|'):
00217 NEXT();
00218 if (CUR_CHAR != _SC('|')){ RETURN_TOKEN('|') }
00219 else { NEXT(); RETURN_TOKEN(TK_OR); }
00220 case _SC(':'):
00221 NEXT();
00222 if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(':') }
00223 else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); }
00224 case _SC('*'):
00225 NEXT();
00226 if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);}
00227 else RETURN_TOKEN('*');
00228 case _SC('%'):
00229 NEXT();
00230 if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);}
00231 else RETURN_TOKEN('%');
00232 case _SC('-'):
00233 NEXT();
00234 if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);}
00235 else if (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);}
00236 else RETURN_TOKEN('-');
00237 case _SC('+'):
00238 NEXT();
00239 if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);}
00240 else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);}
00241 else RETURN_TOKEN('+');
00242 case SQUIRREL_EOB:
00243 return 0;
00244 default:{
00245 if (scisdigit(CUR_CHAR)) {
00246 SQInteger ret = ReadNumber();
00247 RETURN_TOKEN(ret);
00248 }
00249 else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) {
00250 SQInteger t = ReadID();
00251 RETURN_TOKEN(t);
00252 }
00253 else {
00254 SQInteger c = CUR_CHAR;
00255 if (sciscntrl((int)c)) Error(_SC("unexpected character(control)"));
00256 NEXT();
00257 RETURN_TOKEN(c);
00258 }
00259 RETURN_TOKEN(0);
00260 }
00261 }
00262 }
00263 return 0;
00264 }
00265
00266 SQInteger SQLexer::GetIDType(SQChar *s)
00267 {
00268 SQObjectPtr t;
00269 if(_keywords->Get(SQString::Create(_sharedstate, s), t)) {
00270 return SQInteger(_integer(t));
00271 }
00272 return TK_IDENTIFIER;
00273 }
00274
00275
00276 SQInteger SQLexer::ReadString(SQChar ndelim,bool verbatim)
00277 {
00278 INIT_TEMP_STRING();
00279 NEXT();
00280 if(IS_EOB()) return -1;
00281 for(;;) {
00282 while(CUR_CHAR != ndelim) {
00283 switch(CUR_CHAR) {
00284 case SQUIRREL_EOB:
00285 Error(_SC("unfinished string"));
00286 return -1;
00287 case _SC('\n'):
00288 if(!verbatim) Error(_SC("newline in a constant"));
00289 APPEND_CHAR(CUR_CHAR); NEXT();
00290 _currentline++;
00291 break;
00292 case _SC('\\'):
00293 if(verbatim) {
00294 APPEND_CHAR('\\'); NEXT();
00295 }
00296 else {
00297 NEXT();
00298 switch(CUR_CHAR) {
00299 case _SC('x'): NEXT(); {
00300 if(!isxdigit(CUR_CHAR)) Error(_SC("hexadecimal number expected"));
00301 const SQInteger maxdigits = 4;
00302 SQChar temp[maxdigits+1];
00303 SQInteger n = 0;
00304 while(isxdigit(CUR_CHAR) && n < maxdigits) {
00305 temp[n] = CUR_CHAR;
00306 n++;
00307 NEXT();
00308 }
00309 temp[n] = 0;
00310 SQChar *sTemp;
00311 APPEND_CHAR((SQChar)scstrtoul(temp,&sTemp,16));
00312 }
00313 break;
00314 case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;
00315 case _SC('a'): APPEND_CHAR(_SC('\a')); NEXT(); break;
00316 case _SC('b'): APPEND_CHAR(_SC('\b')); NEXT(); break;
00317 case _SC('n'): APPEND_CHAR(_SC('\n')); NEXT(); break;
00318 case _SC('r'): APPEND_CHAR(_SC('\r')); NEXT(); break;
00319 case _SC('v'): APPEND_CHAR(_SC('\v')); NEXT(); break;
00320 case _SC('f'): APPEND_CHAR(_SC('\f')); NEXT(); break;
00321 case _SC('0'): APPEND_CHAR(_SC('\0')); NEXT(); break;
00322 case _SC('\\'): APPEND_CHAR(_SC('\\')); NEXT(); break;
00323 case _SC('"'): APPEND_CHAR(_SC('"')); NEXT(); break;
00324 case _SC('\''): APPEND_CHAR(_SC('\'')); NEXT(); break;
00325 default:
00326 Error(_SC("unrecognised escaper char"));
00327 break;
00328 }
00329 }
00330 break;
00331 default:
00332 APPEND_CHAR(CUR_CHAR);
00333 NEXT();
00334 }
00335 }
00336 NEXT();
00337 if(verbatim && CUR_CHAR == '"') {
00338 APPEND_CHAR(CUR_CHAR);
00339 NEXT();
00340 }
00341 else {
00342 break;
00343 }
00344 }
00345 TERMINATE_BUFFER();
00346 SQInteger len = _longstr.size()-1;
00347 if(ndelim == _SC('\'')) {
00348 if(len == 0) Error(_SC("empty constant"));
00349 if(len > 1) Error(_SC("constant too long"));
00350 _nvalue = _longstr[0];
00351 return TK_INTEGER;
00352 }
00353 _svalue = &_longstr[0];
00354 return TK_STRING_LITERAL;
00355 }
00356
00357 void LexHexadecimal(const SQChar *s,SQUnsignedInteger *res)
00358 {
00359 *res = 0;
00360 while(*s != 0)
00361 {
00362 if(scisdigit(*s)) *res = (*res)*16+((*s++)-'0');
00363 else if(scisxdigit(*s)) *res = (*res)*16+(toupper(*s++)-'A'+10);
00364 else { assert(0); }
00365 }
00366 }
00367
00368 void LexInteger(const SQChar *s,SQUnsignedInteger *res)
00369 {
00370 *res = 0;
00371 while(*s != 0)
00372 {
00373 *res = (*res)*10+((*s++)-'0');
00374 }
00375 }
00376
00377 SQInteger scisodigit(SQChar c) { return c >= _SC('0') && c <= _SC('7'); }
00378
00379 void LexOctal(const SQChar *s,SQUnsignedInteger *res)
00380 {
00381 *res = 0;
00382 while(*s != 0)
00383 {
00384 if(scisodigit(*s)) *res = (*res)*8+((*s++)-'0');
00385 else { assert(0); }
00386 }
00387 }
00388
00389 SQInteger isexponent(SQInteger c) { return c == 'e' || c=='E'; }
00390
00391
00392 #define MAX_HEX_DIGITS (sizeof(SQInteger)*2)
00393 SQInteger SQLexer::ReadNumber()
00394 {
00395 #define TINT 1
00396 #define TFLOAT 2
00397 #define THEX 3
00398 #define TSCIENTIFIC 4
00399 #define TOCTAL 5
00400 SQInteger type = TINT, firstchar = CUR_CHAR;
00401 SQChar *sTemp;
00402 INIT_TEMP_STRING();
00403 NEXT();
00404 if(firstchar == _SC('0') && (toupper(CUR_CHAR) == _SC('X') || scisodigit(CUR_CHAR)) ) {
00405 if(scisodigit(CUR_CHAR)) {
00406 type = TOCTAL;
00407 while(scisodigit(CUR_CHAR)) {
00408 APPEND_CHAR(CUR_CHAR);
00409 NEXT();
00410 }
00411 if(scisdigit(CUR_CHAR)) Error(_SC("invalid octal number"));
00412 }
00413 else {
00414 NEXT();
00415 type = THEX;
00416 while(isxdigit(CUR_CHAR)) {
00417 APPEND_CHAR(CUR_CHAR);
00418 NEXT();
00419 }
00420 if(_longstr.size() > MAX_HEX_DIGITS) Error(_SC("too many digits for an Hex number"));
00421 }
00422 }
00423 else {
00424 APPEND_CHAR((int)firstchar);
00425 while (CUR_CHAR == _SC('.') || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR)) {
00426 if(CUR_CHAR == _SC('.')) type = TFLOAT;
00427 if(isexponent(CUR_CHAR)) {
00428 if(type != TFLOAT) Error(_SC("invalid numeric format"));
00429 type = TSCIENTIFIC;
00430 APPEND_CHAR(CUR_CHAR);
00431 NEXT();
00432 if(CUR_CHAR == '+' || CUR_CHAR == '-'){
00433 APPEND_CHAR(CUR_CHAR);
00434 NEXT();
00435 }
00436 if(!scisdigit(CUR_CHAR)) Error(_SC("exponent expected"));
00437 }
00438
00439 APPEND_CHAR(CUR_CHAR);
00440 NEXT();
00441 }
00442 }
00443 TERMINATE_BUFFER();
00444 switch(type) {
00445 case TSCIENTIFIC:
00446 case TFLOAT:
00447 _fvalue = (SQFloat)scstrtod(&_longstr[0],&sTemp);
00448 return TK_FLOAT;
00449 case TINT:
00450 LexInteger(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
00451 return TK_INTEGER;
00452 case THEX:
00453 LexHexadecimal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
00454 return TK_INTEGER;
00455 case TOCTAL:
00456 LexOctal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
00457 return TK_INTEGER;
00458 }
00459 return 0;
00460 }
00461
00462 SQInteger SQLexer::ReadID()
00463 {
00464 SQInteger res;
00465 INIT_TEMP_STRING();
00466 do {
00467 APPEND_CHAR(CUR_CHAR);
00468 NEXT();
00469 } while(scisalnum(CUR_CHAR) || CUR_CHAR == _SC('_'));
00470 TERMINATE_BUFFER();
00471 res = GetIDType(&_longstr[0]);
00472 if(res == TK_IDENTIFIER || res == TK_CONSTRUCTOR) {
00473 _svalue = &_longstr[0];
00474 }
00475 return res;
00476 }