sqlexer.cpp

00001 /*
00002   see copyright notice in squirrel.h
00003 */
00004 #include "sqpcheader.h"
00005 #include <ctype.h>
00006 #include <stdlib.h>
00007 #include "sqtable.h"
00008 #include "sqstring.h"
00009 #include "sqcompiler.h"
00010 #include "sqlexer.h"
00011 
00012 #define CUR_CHAR (_currdata)
00013 #define RETURN_TOKEN(t) { _prevtoken = _curtoken; _curtoken = t; return t;}
00014 #define IS_EOB() (CUR_CHAR <= SQUIRREL_EOB)
00015 #define NEXT() {Next();_currentcolumn++;}
00016 #define INIT_TEMP_STRING() { _longstr.resize(0);}
00017 #define APPEND_CHAR(c) { _longstr.push_back(c);}
00018 #define TERMINATE_BUFFER() {_longstr.push_back(_SC('\0'));}
00019 #define ADD_KEYWORD(key,id) _keywords->NewSlot( SQString::Create(ss, _SC(#key)) ,SQInteger(id))
00020 
00021 SQLexer::SQLexer(){}
00022 SQLexer::~SQLexer()
00023 {
00024   _keywords->Release();
00025 }
00026 
00027 void SQLexer::Init(SQSharedState *ss, SQLEXREADFUNC rg, SQUserPointer up,CompilerErrorFunc efunc,void *ed)
00028 {
00029   _errfunc = efunc;
00030   _errtarget = ed;
00031   _sharedstate = ss;
00032   _keywords = SQTable::Create(ss, 26);
00033   ADD_KEYWORD(while, TK_WHILE);
00034   ADD_KEYWORD(do, TK_DO);
00035   ADD_KEYWORD(if, TK_IF);
00036   ADD_KEYWORD(else, TK_ELSE);
00037   ADD_KEYWORD(break, TK_BREAK);
00038   ADD_KEYWORD(continue, TK_CONTINUE);
00039   ADD_KEYWORD(return, TK_RETURN);
00040   ADD_KEYWORD(null, TK_NULL);
00041   ADD_KEYWORD(function, TK_FUNCTION);
00042   ADD_KEYWORD(local, TK_LOCAL);
00043   ADD_KEYWORD(for, TK_FOR);
00044   ADD_KEYWORD(foreach, TK_FOREACH);
00045   ADD_KEYWORD(in, TK_IN);
00046   ADD_KEYWORD(typeof, TK_TYPEOF);
00047   ADD_KEYWORD(delegate, TK_DELEGATE);
00048   ADD_KEYWORD(delete, TK_DELETE);
00049   ADD_KEYWORD(try, TK_TRY);
00050   ADD_KEYWORD(catch, TK_CATCH);
00051   ADD_KEYWORD(throw, TK_THROW);
00052   ADD_KEYWORD(clone, TK_CLONE);
00053   ADD_KEYWORD(yield, TK_YIELD);
00054   ADD_KEYWORD(resume, TK_RESUME);
00055   ADD_KEYWORD(switch, TK_SWITCH);
00056   ADD_KEYWORD(case, TK_CASE);
00057   ADD_KEYWORD(default, TK_DEFAULT);
00058   ADD_KEYWORD(this, TK_THIS);
00059   ADD_KEYWORD(parent,TK_PARENT);
00060   ADD_KEYWORD(class,TK_CLASS);
00061   ADD_KEYWORD(extends,TK_EXTENDS);
00062   ADD_KEYWORD(constructor,TK_CONSTRUCTOR);
00063   ADD_KEYWORD(instanceof,TK_INSTANCEOF);
00064   ADD_KEYWORD(vargc,TK_VARGC);
00065   ADD_KEYWORD(vargv,TK_VARGV);
00066   ADD_KEYWORD(true,TK_TRUE);
00067   ADD_KEYWORD(false,TK_FALSE);
00068   ADD_KEYWORD(static,TK_STATIC);
00069   ADD_KEYWORD(enum,TK_ENUM);
00070   ADD_KEYWORD(const,TK_CONST);
00071 
00072   _readf = rg;
00073   _up = up;
00074   _lasttokenline = _currentline = 1;
00075   _currentcolumn = 0;
00076   _prevtoken = -1;
00077   Next();
00078 }
00079 
00080 void SQLexer::Error(const SQChar *err)
00081 {
00082   _errfunc(_errtarget,err);
00083 }
00084 
00085 void SQLexer::Next()
00086 {
00087   SQInteger t = _readf(_up);
00088   if(t > MAX_CHAR) Error(_SC("Invalid character"));
00089   if(t != 0) {
00090     _currdata = (LexChar)t;
00091     return;
00092   }
00093   _currdata = SQUIRREL_EOB;
00094 }
00095 
00096 const SQChar *SQLexer::Tok2Str(SQInteger tok)
00097 {
00098   SQObjectPtr itr, key, val;
00099   SQInteger nitr;
00100   while((nitr = _keywords->Next(false,itr, key, val)) != -1) {
00101     itr = (SQInteger)nitr;
00102     if(((SQInteger)_integer(val)) == tok)
00103       return _stringval(key);
00104   }
00105   return NULL;
00106 }
00107 
00108 void SQLexer::LexBlockComment()
00109 {
00110   bool done = false;
00111   while(!done) {
00112     switch(CUR_CHAR) {
00113       case _SC('*'): { NEXT(); if(CUR_CHAR == _SC('/')) { done = true; NEXT(); }}; continue;
00114       case _SC('\n'): _currentline++; NEXT(); continue;
00115       case SQUIRREL_EOB: Error(_SC("missing \"*/\" in comment"));
00116       default: NEXT();
00117     }
00118   }
00119 }
00120 
00121 SQInteger SQLexer::Lex()
00122 {
00123   _lasttokenline = _currentline;
00124   while(CUR_CHAR != SQUIRREL_EOB) {
00125     switch(CUR_CHAR){
00126     case _SC('\t'): case _SC('\r'): case _SC(' '): NEXT(); continue;
00127     case _SC('\n'):
00128       _currentline++;
00129       _prevtoken=_curtoken;
00130       _curtoken=_SC('\n');
00131       NEXT();
00132       _currentcolumn=1;
00133       continue;
00134     case _SC('/'):
00135       NEXT();
00136       switch(CUR_CHAR){
00137       case _SC('*'):
00138         NEXT();
00139         LexBlockComment();
00140         continue;
00141       case _SC('/'):
00142         do { NEXT(); } while (CUR_CHAR != _SC('\n') && (!IS_EOB()));
00143         continue;
00144       case _SC('='):
00145         NEXT();
00146         RETURN_TOKEN(TK_DIVEQ);
00147         continue;
00148       case _SC('>'):
00149         NEXT();
00150         RETURN_TOKEN(TK_ATTR_CLOSE);
00151         continue;
00152       default:
00153         RETURN_TOKEN('/');
00154       }
00155     case _SC('='):
00156       NEXT();
00157       if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('=') }
00158       else { NEXT(); RETURN_TOKEN(TK_EQ); }
00159     case _SC('<'):
00160       NEXT();
00161       if ( CUR_CHAR == _SC('=') ) { NEXT(); RETURN_TOKEN(TK_LE) }
00162       else if ( CUR_CHAR == _SC('-') ) { NEXT(); RETURN_TOKEN(TK_NEWSLOT); }
00163       else if ( CUR_CHAR == _SC('<') ) { NEXT(); RETURN_TOKEN(TK_SHIFTL); }
00164       else if ( CUR_CHAR == _SC('/') ) { NEXT(); RETURN_TOKEN(TK_ATTR_OPEN); }
00165       //else if ( CUR_CHAR == _SC('[') ) { NEXT(); ReadMultilineString(); RETURN_TOKEN(TK_STRING_LITERAL); }
00166       else { RETURN_TOKEN('<') }
00167     case _SC('>'):
00168       NEXT();
00169       if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_GE);}
00170       else if(CUR_CHAR == _SC('>')){
00171         NEXT();
00172         if(CUR_CHAR == _SC('>')){
00173           NEXT();
00174           RETURN_TOKEN(TK_USHIFTR);
00175         }
00176         RETURN_TOKEN(TK_SHIFTR);
00177       }
00178       else { RETURN_TOKEN('>') }
00179     case _SC('!'):
00180       NEXT();
00181       if (CUR_CHAR != _SC('=')){ RETURN_TOKEN('!')}
00182       else { NEXT(); RETURN_TOKEN(TK_NE); }
00183     case _SC('@'): {
00184       SQInteger stype;
00185       NEXT();
00186       if(CUR_CHAR != _SC('"'))
00187         Error(_SC("string expected"));
00188       if((stype=ReadString(_SC('"'),true))!=-1) {
00189         RETURN_TOKEN(stype);
00190       }
00191       Error(_SC("error parsing the string"));
00192              }
00193     case _SC('"'):
00194     case _SC('\''): {
00195       SQInteger stype;
00196       if((stype=ReadString(CUR_CHAR,false))!=-1){
00197         RETURN_TOKEN(stype);
00198       }
00199       Error(_SC("error parsing the string"));
00200       }
00201     case _SC('{'): case _SC('}'): case _SC('('): case _SC(')'): case _SC('['): case _SC(']'):
00202     case _SC(';'): case _SC(','): case _SC('?'): case _SC('^'): case _SC('~'):
00203       {SQInteger ret = CUR_CHAR;
00204       NEXT(); RETURN_TOKEN(ret); }
00205     case _SC('.'):
00206       NEXT();
00207       if (CUR_CHAR != _SC('.')){ RETURN_TOKEN('.') }
00208       NEXT();
00209       if (CUR_CHAR != _SC('.')){ Error(_SC("invalid token '..'")); }
00210       NEXT();
00211       RETURN_TOKEN(TK_VARPARAMS);
00212     case _SC('&'):
00213       NEXT();
00214       if (CUR_CHAR != _SC('&')){ RETURN_TOKEN('&') }
00215       else { NEXT(); RETURN_TOKEN(TK_AND); }
00216     case _SC('|'):
00217       NEXT();
00218       if (CUR_CHAR != _SC('|')){ RETURN_TOKEN('|') }
00219       else { NEXT(); RETURN_TOKEN(TK_OR); }
00220     case _SC(':'):
00221       NEXT();
00222       if (CUR_CHAR != _SC(':')){ RETURN_TOKEN(':') }
00223       else { NEXT(); RETURN_TOKEN(TK_DOUBLE_COLON); }
00224     case _SC('*'):
00225       NEXT();
00226       if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MULEQ);}
00227       else RETURN_TOKEN('*');
00228     case _SC('%'):
00229       NEXT();
00230       if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MODEQ);}
00231       else RETURN_TOKEN('%');
00232     case _SC('-'):
00233       NEXT();
00234       if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_MINUSEQ);}
00235       else if  (CUR_CHAR == _SC('-')){ NEXT(); RETURN_TOKEN(TK_MINUSMINUS);}
00236       else RETURN_TOKEN('-');
00237     case _SC('+'):
00238       NEXT();
00239       if (CUR_CHAR == _SC('=')){ NEXT(); RETURN_TOKEN(TK_PLUSEQ);}
00240       else if (CUR_CHAR == _SC('+')){ NEXT(); RETURN_TOKEN(TK_PLUSPLUS);}
00241       else RETURN_TOKEN('+');
00242     case SQUIRREL_EOB:
00243       return 0;
00244     default:{
00245         if (scisdigit(CUR_CHAR)) {
00246           SQInteger ret = ReadNumber();
00247           RETURN_TOKEN(ret);
00248         }
00249         else if (scisalpha(CUR_CHAR) || CUR_CHAR == _SC('_')) {
00250           SQInteger t = ReadID();
00251           RETURN_TOKEN(t);
00252         }
00253         else {
00254           SQInteger c = CUR_CHAR;
00255           if (sciscntrl((int)c)) Error(_SC("unexpected character(control)"));
00256           NEXT();
00257           RETURN_TOKEN(c);
00258         }
00259         RETURN_TOKEN(0);
00260       }
00261     }
00262   }
00263   return 0;
00264 }
00265 
00266 SQInteger SQLexer::GetIDType(SQChar *s)
00267 {
00268   SQObjectPtr t;
00269   if(_keywords->Get(SQString::Create(_sharedstate, s), t)) {
00270     return SQInteger(_integer(t));
00271   }
00272   return TK_IDENTIFIER;
00273 }
00274 
00275 
00276 SQInteger SQLexer::ReadString(SQChar ndelim,bool verbatim)
00277 {
00278   INIT_TEMP_STRING();
00279   NEXT();
00280   if(IS_EOB()) return -1;
00281   for(;;) {
00282     while(CUR_CHAR != ndelim) {
00283       switch(CUR_CHAR) {
00284       case SQUIRREL_EOB:
00285         Error(_SC("unfinished string"));
00286         return -1;
00287       case _SC('\n'):
00288         if(!verbatim) Error(_SC("newline in a constant"));
00289         APPEND_CHAR(CUR_CHAR); NEXT();
00290         _currentline++;
00291         break;
00292       case _SC('\\'):
00293         if(verbatim) {
00294           APPEND_CHAR('\\'); NEXT();
00295         }
00296         else {
00297           NEXT();
00298           switch(CUR_CHAR) {
00299           case _SC('x'): NEXT(); {
00300             if(!isxdigit(CUR_CHAR)) Error(_SC("hexadecimal number expected"));
00301             const SQInteger maxdigits = 4;
00302             SQChar temp[maxdigits+1];
00303             SQInteger n = 0;
00304             while(isxdigit(CUR_CHAR) && n < maxdigits) {
00305               temp[n] = CUR_CHAR;
00306               n++;
00307               NEXT();
00308             }
00309             temp[n] = 0;
00310             SQChar *sTemp;
00311             APPEND_CHAR((SQChar)scstrtoul(temp,&sTemp,16));
00312           }
00313             break;
00314           case _SC('t'): APPEND_CHAR(_SC('\t')); NEXT(); break;
00315           case _SC('a'): APPEND_CHAR(_SC('\a')); NEXT(); break;
00316           case _SC('b'): APPEND_CHAR(_SC('\b')); NEXT(); break;
00317           case _SC('n'): APPEND_CHAR(_SC('\n')); NEXT(); break;
00318           case _SC('r'): APPEND_CHAR(_SC('\r')); NEXT(); break;
00319           case _SC('v'): APPEND_CHAR(_SC('\v')); NEXT(); break;
00320           case _SC('f'): APPEND_CHAR(_SC('\f')); NEXT(); break;
00321           case _SC('0'): APPEND_CHAR(_SC('\0')); NEXT(); break;
00322           case _SC('\\'): APPEND_CHAR(_SC('\\')); NEXT(); break;
00323           case _SC('"'): APPEND_CHAR(_SC('"')); NEXT(); break;
00324           case _SC('\''): APPEND_CHAR(_SC('\'')); NEXT(); break;
00325           default:
00326             Error(_SC("unrecognised escaper char"));
00327           break;
00328           }
00329         }
00330         break;
00331       default:
00332         APPEND_CHAR(CUR_CHAR);
00333         NEXT();
00334       }
00335     }
00336     NEXT();
00337     if(verbatim && CUR_CHAR == '"') { //double quotation
00338       APPEND_CHAR(CUR_CHAR);
00339       NEXT();
00340     }
00341     else {
00342       break;
00343     }
00344   }
00345   TERMINATE_BUFFER();
00346   SQInteger len = _longstr.size()-1;
00347   if(ndelim == _SC('\'')) {
00348     if(len == 0) Error(_SC("empty constant"));
00349     if(len > 1) Error(_SC("constant too long"));
00350     _nvalue = _longstr[0];
00351     return TK_INTEGER;
00352   }
00353   _svalue = &_longstr[0];
00354   return TK_STRING_LITERAL;
00355 }
00356 
00357 void LexHexadecimal(const SQChar *s,SQUnsignedInteger *res)
00358 {
00359   *res = 0;
00360   while(*s != 0)
00361   {
00362     if(scisdigit(*s)) *res = (*res)*16+((*s++)-'0');
00363     else if(scisxdigit(*s)) *res = (*res)*16+(toupper(*s++)-'A'+10);
00364     else { assert(0); }
00365   }
00366 }
00367 
00368 void LexInteger(const SQChar *s,SQUnsignedInteger *res)
00369 {
00370   *res = 0;
00371   while(*s != 0)
00372   {
00373     *res = (*res)*10+((*s++)-'0');
00374   }
00375 }
00376 
00377 SQInteger scisodigit(SQChar c) { return c >= _SC('0') && c <= _SC('7'); }
00378 
00379 void LexOctal(const SQChar *s,SQUnsignedInteger *res)
00380 {
00381   *res = 0;
00382   while(*s != 0)
00383   {
00384     if(scisodigit(*s)) *res = (*res)*8+((*s++)-'0');
00385     else { assert(0); }
00386   }
00387 }
00388 
00389 SQInteger isexponent(SQInteger c) { return c == 'e' || c=='E'; }
00390 
00391 
00392 #define MAX_HEX_DIGITS (sizeof(SQInteger)*2)
00393 SQInteger SQLexer::ReadNumber()
00394 {
00395 #define TINT 1
00396 #define TFLOAT 2
00397 #define THEX 3
00398 #define TSCIENTIFIC 4
00399 #define TOCTAL 5
00400   SQInteger type = TINT, firstchar = CUR_CHAR;
00401   SQChar *sTemp;
00402   INIT_TEMP_STRING();
00403   NEXT();
00404   if(firstchar == _SC('0') && (toupper(CUR_CHAR) == _SC('X') || scisodigit(CUR_CHAR)) ) {
00405     if(scisodigit(CUR_CHAR)) {
00406       type = TOCTAL;
00407       while(scisodigit(CUR_CHAR)) {
00408         APPEND_CHAR(CUR_CHAR);
00409         NEXT();
00410       }
00411       if(scisdigit(CUR_CHAR)) Error(_SC("invalid octal number"));
00412     }
00413     else {
00414       NEXT();
00415       type = THEX;
00416       while(isxdigit(CUR_CHAR)) {
00417         APPEND_CHAR(CUR_CHAR);
00418         NEXT();
00419       }
00420       if(_longstr.size() > MAX_HEX_DIGITS) Error(_SC("too many digits for an Hex number"));
00421     }
00422   }
00423   else {
00424     APPEND_CHAR((int)firstchar);
00425     while (CUR_CHAR == _SC('.') || scisdigit(CUR_CHAR) || isexponent(CUR_CHAR)) {
00426             if(CUR_CHAR == _SC('.')) type = TFLOAT;
00427       if(isexponent(CUR_CHAR)) {
00428         if(type != TFLOAT) Error(_SC("invalid numeric format"));
00429         type = TSCIENTIFIC;
00430         APPEND_CHAR(CUR_CHAR);
00431         NEXT();
00432         if(CUR_CHAR == '+' || CUR_CHAR == '-'){
00433           APPEND_CHAR(CUR_CHAR);
00434           NEXT();
00435         }
00436         if(!scisdigit(CUR_CHAR)) Error(_SC("exponent expected"));
00437       }
00438 
00439       APPEND_CHAR(CUR_CHAR);
00440       NEXT();
00441     }
00442   }
00443   TERMINATE_BUFFER();
00444   switch(type) {
00445   case TSCIENTIFIC:
00446   case TFLOAT:
00447     _fvalue = (SQFloat)scstrtod(&_longstr[0],&sTemp);
00448     return TK_FLOAT;
00449   case TINT:
00450     LexInteger(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
00451     return TK_INTEGER;
00452   case THEX:
00453     LexHexadecimal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
00454     return TK_INTEGER;
00455   case TOCTAL:
00456     LexOctal(&_longstr[0],(SQUnsignedInteger *)&_nvalue);
00457     return TK_INTEGER;
00458   }
00459   return 0;
00460 }
00461 
00462 SQInteger SQLexer::ReadID()
00463 {
00464   SQInteger res;
00465   INIT_TEMP_STRING();
00466   do {
00467     APPEND_CHAR(CUR_CHAR);
00468     NEXT();
00469   } while(scisalnum(CUR_CHAR) || CUR_CHAR == _SC('_'));
00470   TERMINATE_BUFFER();
00471   res = GetIDType(&_longstr[0]);
00472   if(res == TK_IDENTIFIER || res == TK_CONSTRUCTOR) {
00473     _svalue = &_longstr[0];
00474   }
00475   return res;
00476 }

Generated on Wed Jul 15 20:35:56 2009 for OpenTTD by  doxygen 1.5.6