1: 
   2: // Compiler implementation of the D programming language
   3: // Copyright (c) 1999-2011 by Digital Mars
   4: // All Rights Reserved
   5: // written by Walter Bright
   6: // http://www.digitalmars.com
   7: // License for redistribution is by either the Artistic License
   8: // in artistic.txt, or the GNU General Public License in gnu.txt.
   9: // See the included readme.txt for details.
  10: 
  11: /* Lexical Analyzer */
  12: 
  13: #include <stdio.h>
  14: #include <string.h>
  15: #include <ctype.h>
  16: #include <stdarg.h>
  17: #include <errno.h>
  18: #include <wchar.h>
  19: #include <stdlib.h>
  20: static char __file__[] = __FILE__;      /* for tassert.h                */
  21: #include        "tassert.h"
  22: #include <time.h>       // for time() and ctime()
  23: 
  24: #include "rmem.h"
  25: 
  26: #include "stringtable.h"
  27: 
  28: #include "lexer.h"
  29: #include "utf.h"
  30: #include "identifier.h"
  31: #include "id.h"
  32: #include "module.h"
  33: 
  34: #if _WIN32 && __DMC__
  35: // from \dm\src\include\setlocal.h
  36: extern "C" char * __cdecl __locale_decpoint;
  37: #endif
  38: 
  39: extern int HtmlNamedEntity(unsigned char *p, int length);
  40: 
  41: #define LS 0x2028       // UTF line separator
  42: #define PS 0x2029       // UTF paragraph separator
  43: 
  44: void unittest_lexer();
  45: 
  46: /********************************************
  47:  * Do our own char maps
  48:  */
  49: 
  50: static unsigned char cmtable[256];
  51: 
  52: const int CMoctal =     0x1;
  53: const int CMhex =       0x2;
  54: const int CMidchar =    0x4;
  55: 
  56: inline unsigned char isoctal (unsigned char c) { return cmtable[c] & CMoctal; }
  57: inline unsigned char ishex   (unsigned char c) { return cmtable[c] & CMhex; }
  58: inline unsigned char isidchar(unsigned char c) { return cmtable[c] & CMidchar; }
  59: 
  60: static void cmtable_init()
  61: {
  62:     for (unsigned c = 0; c < sizeof(cmtable) / sizeof(cmtable[0]); c++)
  63:     {
  64:         if ('0' <= c && c <= '7')
  65:             cmtable[c] |= CMoctal;
  66:         if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))
  67:             cmtable[c] |= CMhex;
  68:         if (isalnum(c) || c == '_')
  69:             cmtable[c] |= CMidchar;
  70:     }
  71: }
  72: 
  73: 
  74: /************************* Token **********************************************/
  75: 
  76: const char *Token::tochars[TOKMAX];
  77: 
  78: void *Token::operator new(size_t size)
  79: {   Token *t;
  80: 
  81:     if (Lexer::freelist)
  82:     {
  83:         t = Lexer::freelist;
  84:         Lexer::freelist = t->next;
  85:         return t;
  86:     }
  87: 
  88:     return ::operator new(size);
  89: }
  90: 
  91: #ifdef DEBUG
  92: void Token::print()
  93: {
  94:     fprintf(stdmsg, "%s\n", toChars());
  95: }
  96: #endif
  97: 
  98: const char *Token::toChars()
  99: {   const char *p;
 100:     static char buffer[3 + 3 * sizeof(value) + 1];
 101: 
 102:     p = buffer;
 103:     switch (value)
 104:     {
 105:         case TOKint32v:
 106: #if IN_GCC
 107:             sprintf(buffer,"%d",(d_int32)int64value);
 108: #else
 109:             sprintf(buffer,"%d",int32value);
warning C4996: 'sprintf': This function or variable may be unsafe. Consider using sprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h(371) : see declaration of 'sprintf'
110: #endif 111: break; 112: 113: case TOKuns32v: 114: case TOKcharv: 115: case TOKwcharv: 116: case TOKdcharv: 117: #if IN_GCC 118: sprintf(buffer,"%uU",(d_uns32)uns64value); 119: #else 120: sprintf(buffer,"%uU",uns32value);
warning C4996: 'sprintf': This function or variable may be unsafe. Consider using sprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h(371) : see declaration of 'sprintf'
121: #endif 122: break; 123: 124: case TOKint64v: 125: sprintf(buffer,"%jdL",int64value);
warning C4996: 'sprintf': This function or variable may be unsafe. Consider using sprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h(371) : see declaration of 'sprintf'
warning C6271: Extra argument passed to 'sprintf': parameter '3' is not used by the format string
126: break; 127: 128: case TOKuns64v: 129: sprintf(buffer,"%juUL",uns64value);
warning C4996: 'sprintf': This function or variable may be unsafe. Consider using sprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h(371) : see declaration of 'sprintf'
warning C6271: Extra argument passed to 'sprintf': parameter '3' is not used by the format string
130: break; 131: 132: #if IN_GCC 133: case TOKfloat32v: 134: case TOKfloat64v: 135: case TOKfloat80v: 136: float80value.format(buffer, sizeof(buffer)); 137: break; 138: case TOKimaginary32v: 139: case TOKimaginary64v: 140: case TOKimaginary80v: 141: float80value.format(buffer, sizeof(buffer)); 142: // %% buffer 143: strcat(buffer, "i"); 144: break; 145: #else 146: case TOKfloat32v: 147: sprintf(buffer,"%Lgf", float80value);
warning C4996: 'sprintf': This function or variable may be unsafe. Consider using sprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h(371) : see declaration of 'sprintf'
148: break; 149: 150: case TOKfloat64v: 151: sprintf(buffer,"%Lg", float80value);
warning C4996: 'sprintf': This function or variable may be unsafe. Consider using sprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h(371) : see declaration of 'sprintf'
152: break; 153: 154: case TOKfloat80v: 155: sprintf(buffer,"%LgL", float80value);
warning C4996: 'sprintf': This function or variable may be unsafe. Consider using sprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h(371) : see declaration of 'sprintf'
156: break; 157: 158: case TOKimaginary32v: 159: sprintf(buffer,"%Lgfi", float80value);
warning C4996: 'sprintf': This function or variable may be unsafe. Consider using sprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h(371) : see declaration of 'sprintf'
160: break; 161: 162: case TOKimaginary64v: 163: sprintf(buffer,"%Lgi", float80value);
warning C4996: 'sprintf': This function or variable may be unsafe. Consider using sprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h(371) : see declaration of 'sprintf'
164: break; 165: 166: case TOKimaginary80v: 167: sprintf(buffer,"%LgLi", float80value);
warning C4996: 'sprintf': This function or variable may be unsafe. Consider using sprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h(371) : see declaration of 'sprintf'
168: break; 169: #endif 170: 171: case TOKstring: 172: #if CSTRINGS 173: p = string; 174: #else 175: { OutBuffer buf; 176: 177: buf.writeByte('"'); 178: for (size_t i = 0; i < len; ) 179: { unsigned c; 180: 181: utf_decodeChar((unsigned char *)ustring, len, &i, &c); 182: switch (c) 183: { 184: case 0: 185: break; 186: 187: case '"': 188: case '\\': 189: buf.writeByte('\\'); 190: default: 191: if (isprint(c)) 192: buf.writeByte(c); 193: else if (c <= 0x7F) 194: buf.printf("\\x%02x", c); 195: else if (c <= 0xFFFF) 196: buf.printf("\\u%04x", c); 197: else 198: buf.printf("\\U%08x", c); 199: continue; 200: } 201: break; 202: } 203: buf.writeByte('"'); 204: if (postfix) 205: buf.writeByte('"'); 206: buf.writeByte(0); 207: p = (char *)buf.extractData(); 208: } 209: #endif 210: break; 211: 212: case TOKidentifier: 213: case TOKenum: 214: case TOKstruct: 215: case TOKimport: 216: case BASIC_TYPES: 217: p = ident->toChars(); 218: break; 219: 220: default: 221: p = toChars(value); 222: break; 223: } 224: return p; 225: } 226: 227: const char *Token::toChars(enum TOK value) 228: { const char *p; 229: static char buffer[3 + 3 * sizeof(value) + 1]; 230: 231: p = tochars[value]; 232: if (!p) 233: { sprintf(buffer,"TOK%d",value);
warning C4996: 'sprintf': This function or variable may be unsafe. Consider using sprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h(371) : see declaration of 'sprintf'
234: p = buffer; 235: } 236: return p; 237: } 238: 239: /*************************** Lexer ********************************************/ 240: 241: Token *Lexer::freelist = NULL; 242: StringTable Lexer::stringtable; 243: OutBuffer Lexer::stringbuffer; 244: 245: Lexer::Lexer(Module *mod, 246: unsigned char *base, unsigned begoffset, unsigned endoffset, 247: int doDocComment, int commentToken) 248: : loc(mod, 1) 249: { 250: //printf("Lexer::Lexer(%p,%d)\n",base,length); 251: //printf("lexer.mod = %p, %p\n", mod, this->loc.mod); 252: memset(&token,0,sizeof(token)); 253: this->base = base; 254: this->end = base + endoffset; 255: p = base + begoffset; 256: this->mod = mod; 257: this->doDocComment = doDocComment; 258: this->anyToken = 0; 259: this->commentToken = commentToken; 260: //initKeywords(); 261: 262: /* If first line starts with '#!', ignore the line 263: */ 264: 265: if (p[0] == '#' && p[1] =='!') 266: { 267: p += 2; 268: while (1) 269: { unsigned char c = *p; 270: switch (c) 271: { 272: case '\n': 273: p++; 274: break; 275: 276: case '\r': 277: p++; 278: if (*p == '\n') 279: p++; 280: break; 281: 282: case 0: 283: case 0x1A: 284: break; 285: 286: default: 287: if (c & 0x80) 288: { unsigned u = decodeUTF(); 289: if (u == PS || u == LS) 290: break; 291: } 292: p++; 293: continue; 294: } 295: break; 296: } 297: loc.linnum = 2; 298: } 299: } 300: 301: 302: void Lexer::error(const char *format, ...) 303: { 304: if (mod && !global.gag) 305: { 306: char *p = loc.toChars(); 307: if (*p) 308: fprintf(stdmsg, "%s: ", p); 309: mem.free(p); 310: 311: va_list ap; 312: va_start(ap, format); 313: vfprintf(stdmsg, format, ap); 314: va_end(ap); 315: 316: fprintf(stdmsg, "\n"); 317: fflush(stdmsg); 318: 319: if (global.errors >= 20) // moderate blizzard of cascading messages 320: fatal(); 321: } 322: global.errors++; 323: } 324: 325: void Lexer::error(Loc loc, const char *format, ...) 326: { 327: if (mod && !global.gag) 328: { 329: char *p = loc.toChars(); 330: if (*p) 331: fprintf(stdmsg, "%s: ", p); 332: mem.free(p); 333: 334: va_list ap; 335: va_start(ap, format); 336: vfprintf(stdmsg, format, ap); 337: va_end(ap); 338: 339: fprintf(stdmsg, "\n"); 340: fflush(stdmsg); 341: 342: if (global.errors >= 20) // moderate blizzard of cascading messages 343: fatal(); 344: } 345: global.errors++; 346: } 347: 348: TOK Lexer::nextToken() 349: { Token *t; 350: 351: if (token.next) 352: { 353: t = token.next; 354: memcpy(&token,t,sizeof(Token)); 355: t->next = freelist; 356: freelist = t; 357: } 358: else 359: { 360: scan(&token); 361: } 362: //token.print(); 363: return token.value; 364: } 365: 366: Token *Lexer::peek(Token *ct) 367: { Token *t; 368: 369: if (ct->next) 370: t = ct->next; 371: else 372: { 373: t = new Token(); 374: scan(t); 375: t->next = NULL; 376: ct->next = t; 377: } 378: return t; 379: } 380: 381: /*********************** 382: * Look ahead at next token's value. 383: */ 384: 385: TOK Lexer::peekNext() 386: { 387: return peek(&token)->value; 388: } 389: 390: /*********************** 391: * Look 2 tokens ahead at value. 392: */ 393: 394: TOK Lexer::peekNext2() 395: { 396: Token *t = peek(&token); 397: return peek(t)->value; 398: } 399: 400: /********************************* 401: * tk is on the opening (. 402: * Look ahead and return token that is past the closing ). 403: */ 404: 405: Token *Lexer::peekPastParen(Token *tk) 406: { 407: //printf("peekPastParen()\n"); 408: int parens = 1; 409: int curlynest = 0; 410: while (1) 411: { 412: tk = peek(tk); 413: //tk->print(); 414: switch (tk->value) 415: { 416: case TOKlparen: 417: parens++; 418: continue; 419: 420: case TOKrparen: 421: --parens; 422: if (parens) 423: continue; 424: tk = peek(tk); 425: break; 426: 427: case TOKlcurly: 428: curlynest++; 429: continue; 430: 431: case TOKrcurly: 432: if (--curlynest >= 0) 433: continue; 434: break; 435: 436: case TOKsemicolon: 437: if (curlynest) 438: continue; 439: break; 440: 441: case TOKeof: 442: break; 443: 444: default: 445: continue; 446: } 447: return tk; 448: } 449: } 450: 451: /********************************** 452: * Determine if string is a valid Identifier. 453: * Placed here because of commonality with Lexer functionality. 454: * Returns: 455: * 0 invalid 456: */ 457: 458: int Lexer::isValidIdentifier(char *p) 459: { 460: size_t len; 461: size_t idx; 462: 463: if (!p || !*p) 464: goto Linvalid; 465: 466: if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars 467: goto Linvalid; 468: 469: len = strlen(p); 470: idx = 0; 471: while (p[idx]) 472: { dchar_t dc; 473: 474: const char *q = utf_decodeChar((unsigned char *)p, len, &idx, &dc); 475: if (q) 476: goto Linvalid; 477: 478: if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_')) 479: goto Linvalid; 480: } 481: return 1; 482: 483: Linvalid: 484: return 0; 485: } 486: 487: /**************************** 488: * Turn next token in buffer into a token. 489: */ 490: 491: void Lexer::scan(Token *t) 492: { 493: unsigned lastLine = loc.linnum; 494: unsigned linnum; 495: 496: t->blockComment = NULL; 497: t->lineComment = NULL; 498: while (1) 499: { 500: t->ptr = p; 501: //printf("p = %p, *p = '%c'\n",p,*p); 502: switch (*p) 503: { 504: case 0: 505: case 0x1A: 506: t->value = TOKeof; // end of file 507: return; 508: 509: case ' ': 510: case '\t': 511: case '\v': 512: case '\f': 513: p++; 514: continue; // skip white space 515: 516: case '\r': 517: p++; 518: if (*p != '\n') // if CR stands by itself 519: loc.linnum++; 520: continue; // skip white space 521: 522: case '\n': 523: p++; 524: loc.linnum++; 525: continue; // skip white space 526: 527: case '0': case '1': case '2': case '3': case '4': 528: case '5': case '6': case '7': case '8': case '9': 529: t->value = number(t); 530: return; 531: 532: #if CSTRINGS 533: case '\'': 534: t->value = charConstant(t, 0); 535: return; 536: 537: case '"': 538: t->value = stringConstant(t,0); 539: return; 540: 541: case 'l': 542: case 'L': 543: if (p[1] == '\'') 544: { 545: p++; 546: t->value = charConstant(t, 1); 547: return; 548: } 549: else if (p[1] == '"') 550: { 551: p++; 552: t->value = stringConstant(t, 1); 553: return; 554: } 555: #else 556: case '\'': 557: t->value = charConstant(t,0); 558: return; 559: 560: case 'r': 561: if (p[1] != '"') 562: goto case_ident; 563: p++; 564: case '`': 565: t->value = wysiwygStringConstant(t, *p); 566: return; 567: 568: case 'x': 569: if (p[1] != '"') 570: goto case_ident; 571: p++; 572: t->value = hexStringConstant(t); 573: return; 574: 575: #if DMDV2 576: case 'q': 577: if (p[1] == '"') 578: { 579: p++; 580: t->value = delimitedStringConstant(t); 581: return; 582: } 583: else if (p[1] == '{') 584: { 585: p++; 586: t->value = tokenStringConstant(t); 587: return; 588: } 589: else 590: goto case_ident; 591: #endif 592: 593: case '"': 594: t->value = escapeStringConstant(t,0); 595: return; 596: 597: #if ! TEXTUAL_ASSEMBLY_OUT 598: case '\\': // escaped string literal 599: { unsigned c; 600: unsigned char *pstart = p; 601: 602: stringbuffer.reset(); 603: do 604: { 605: p++; 606: switch (*p) 607: { 608: case 'u': 609: case 'U': 610: case '&': 611: c = escapeSequence(); 612: stringbuffer.writeUTF8(c); 613: break; 614: 615: default: 616: c = escapeSequence(); 617: stringbuffer.writeByte(c); 618: break; 619: } 620: } while (*p == '\\'); 621: t->len = stringbuffer.offset; 622: stringbuffer.writeByte(0); 623: t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); 624: memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); 625: t->postfix = 0; 626: t->value = TOKstring; 627: #if DMDV2 628: if (!global.params.useDeprecated) 629: error("Escape String literal %.*s is deprecated, use double quoted string literal \"%.*s\" instead", p - pstart, pstart, p - pstart, pstart); 630: #endif 631: return; 632: } 633: #endif 634: 635: case 'l': 636: case 'L': 637: #endif 638: case 'a': case 'b': case 'c': case 'd': case 'e': 639: case 'f': case 'g': case 'h': case 'i': case 'j': 640: case 'k': case 'm': case 'n': case 'o': 641: #if DMDV2 642: case 'p': /*case 'q': case 'r':*/ case 's': case 't': 643: #else 644: case 'p': case 'q': /*case 'r':*/ case 's': case 't': 645: #endif 646: case 'u': case 'v': case 'w': /*case 'x':*/ case 'y': 647: case 'z': 648: case 'A': case 'B': case 'C': case 'D': case 'E': 649: case 'F': case 'G': case 'H': case 'I': case 'J': 650: case 'K': case 'M': case 'N': case 'O': 651: case 'P': case 'Q': case 'R': case 'S': case 'T': 652: case 'U': case 'V': case 'W': case 'X': case 'Y': 653: case 'Z': 654: case '_': 655: case_ident: 656: { unsigned char c; 657: 658: while (1) 659: { 660: c = *++p; 661: if (isidchar(c)) 662: continue; 663: else if (c & 0x80) 664: { unsigned char *s = p; 665: unsigned u = decodeUTF(); 666: if (isUniAlpha(u)) 667: continue; 668: error("char 0x%04x not allowed in identifier", u); 669: p = s; 670: } 671: break; 672: } 673: 674: StringValue *sv = stringtable.update((char *)t->ptr, p - t->ptr); 675: Identifier *id = (Identifier *) sv->ptrvalue; 676: if (!id) 677: { id = new Identifier(sv->lstring.string,TOKidentifier); 678: sv->ptrvalue = id; 679: } 680: t->ident = id; 681: t->value = (enum TOK) id->value; 682: anyToken = 1; 683: if (*t->ptr == '_') // if special identifier token 684: { 685: static char date[11+1]; 686: static char time[8+1]; 687: static char timestamp[24+1]; 688: 689: if (!date[0]) // lazy evaluation 690: { time_t t;
warning C6246: Local declaration of 't' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '491' of 'c:\projects\extern\d\dmd\src\lexer.c': Lines: 491
691: char *p; 692: 693: ::time(&t); 694: p = ctime(&t);
warning C4996: 'ctime': This function or variable may be unsafe. Consider using ctime_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\time.inl(86) : see declaration of 'ctime'
695: assert(p); 696: sprintf(date, "%.6s %.4s", p + 4, p + 20);
warning C4996: 'sprintf': This function or variable may be unsafe. Consider using sprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h(371) : see declaration of 'sprintf'
697: sprintf(time, "%.8s", p + 11);
warning C4996: 'sprintf': This function or variable may be unsafe. Consider using sprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h(371) : see declaration of 'sprintf'
698: sprintf(timestamp, "%.24s", p);
warning C4996: 'sprintf': This function or variable may be unsafe. Consider using sprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h(371) : see declaration of 'sprintf'
699: } 700: 701: #if DMDV1 702: if (mod && id == Id::FILE) 703: { 704: t->ustring = (unsigned char *)(loc.filename ? loc.filename : mod->ident->toChars()); 705: goto Lstr; 706: } 707: else if (mod && id == Id::LINE) 708: { 709: t->value = TOKint64v; 710: t->uns64value = loc.linnum; 711: } 712: else 713: #endif 714: if (id == Id::DATE) 715: { 716: t->ustring = (unsigned char *)date; 717: goto Lstr; 718: } 719: else if (id == Id::TIME) 720: { 721: t->ustring = (unsigned char *)time; 722: goto Lstr; 723: } 724: else if (id == Id::VENDOR) 725: { 726: t->ustring = (unsigned char *)"Digital Mars D"; 727: goto Lstr; 728: } 729: else if (id == Id::TIMESTAMP) 730: { 731: t->ustring = (unsigned char *)timestamp; 732: Lstr: 733: t->value = TOKstring; 734: Llen:
warning C4102: 'Llen' : unreferenced label
735: t->postfix = 0; 736: t->len = strlen((char *)t->ustring); 737: } 738: else if (id == Id::VERSIONX) 739: { unsigned major = 0; 740: unsigned minor = 0; 741: 742: for (const char *p = global.version + 1; 1; p++) 743: { 744: char c = *p;
warning C6246: Local declaration of 'c' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '656' of 'c:\projects\extern\d\dmd\src\lexer.c': Lines: 656
745: if (isdigit(c))
warning C6328: 'char' passed as parameter '1' when 'unsigned char' is required in call to 'isdigit'
746: minor = minor * 10 + c - '0'; 747: else if (c == '.') 748: { major = minor; 749: minor = 0; 750: } 751: else 752: break; 753: } 754: t->value = TOKint64v; 755: t->uns64value = major * 1000 + minor; 756: } 757: #if DMDV2 758: else if (id == Id::EOFX) 759: { 760: t->value = TOKeof; 761: // Advance scanner to end of file 762: while (!(*p == 0 || *p == 0x1A)) 763: p++; 764: } 765: #endif 766: } 767: //printf("t->value = %d\n",t->value); 768: return; 769: } 770: 771: case '/': 772: p++; 773: switch (*p) 774: { 775: case '=': 776: p++; 777: t->value = TOKdivass; 778: return; 779: 780: case '*': 781: p++; 782: linnum = loc.linnum; 783: while (1) 784: { 785: while (1) 786: { unsigned char c = *p; 787: switch (c) 788: { 789: case '/': 790: break; 791: 792: case '\n': 793: loc.linnum++; 794: p++; 795: continue; 796: 797: case '\r': 798: p++; 799: if (*p != '\n') 800: loc.linnum++; 801: continue; 802: 803: case 0: 804: case 0x1A: 805: error("unterminated /* */ comment"); 806: p = end; 807: t->value = TOKeof; 808: return; 809: 810: default: 811: if (c & 0x80) 812: { unsigned u = decodeUTF(); 813: if (u == PS || u == LS) 814: loc.linnum++; 815: } 816: p++; 817: continue; 818: } 819: break; 820: } 821: p++; 822: if (p[-2] == '*' && p - 3 != t->ptr) 823: break; 824: } 825: if (commentToken) 826: { 827: t->value = TOKcomment; 828: return; 829: } 830: else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr) 831: { // if /** but not /**/ 832: getDocComment(t, lastLine == linnum); 833: } 834: continue; 835: 836: case '/': // do // style comments 837: linnum = loc.linnum; 838: while (1) 839: { unsigned char c = *++p; 840: switch (c) 841: { 842: case '\n': 843: break; 844: 845: case '\r': 846: if (p[1] == '\n') 847: p++; 848: break; 849: 850: case 0: 851: case 0x1A: 852: if (commentToken) 853: { 854: p = end; 855: t->value = TOKcomment; 856: return; 857: } 858: if (doDocComment && t->ptr[2] == '/') 859: getDocComment(t, lastLine == linnum); 860: p = end; 861: t->value = TOKeof; 862: return; 863: 864: default: 865: if (c & 0x80) 866: { unsigned u = decodeUTF(); 867: if (u == PS || u == LS) 868: break; 869: } 870: continue; 871: } 872: break; 873: } 874: 875: if (commentToken) 876: { 877: p++; 878: loc.linnum++; 879: t->value = TOKcomment; 880: return; 881: } 882: if (doDocComment && t->ptr[2] == '/') 883: getDocComment(t, lastLine == linnum); 884: 885: p++; 886: loc.linnum++; 887: continue; 888: 889: case '+': 890: { int nest; 891: 892: linnum = loc.linnum; 893: p++; 894: nest = 1; 895: while (1) 896: { unsigned char c = *p; 897: switch (c) 898: { 899: case '/': 900: p++; 901: if (*p == '+') 902: { 903: p++; 904: nest++; 905: } 906: continue; 907: 908: case '+': 909: p++; 910: if (*p == '/') 911: { 912: p++; 913: if (--nest == 0) 914: break; 915: } 916: continue; 917: 918: case '\r': 919: p++; 920: if (*p != '\n') 921: loc.linnum++; 922: continue; 923: 924: case '\n': 925: loc.linnum++; 926: p++; 927: continue; 928: 929: case 0: 930: case 0x1A: 931: error("unterminated /+ +/ comment"); 932: p = end; 933: t->value = TOKeof; 934: return; 935: 936: default: 937: if (c & 0x80) 938: { unsigned u = decodeUTF(); 939: if (u == PS || u == LS) 940: loc.linnum++; 941: } 942: p++; 943: continue; 944: } 945: break; 946: } 947: if (commentToken) 948: { 949: t->value = TOKcomment; 950: return; 951: } 952: if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr) 953: { // if /++ but not /++/ 954: getDocComment(t, lastLine == linnum); 955: } 956: continue; 957: } 958: } 959: t->value = TOKdiv; 960: return; 961: 962: case '.': 963: p++; 964: if (isdigit(*p)) 965: { /* Note that we don't allow ._1 and ._ as being 966: * valid floating point numbers. 967: */ 968: p--; 969: t->value = inreal(t); 970: } 971: else if (p[0] == '.') 972: { 973: if (p[1] == '.') 974: { p += 2; 975: t->value = TOKdotdotdot; 976: } 977: else 978: { p++; 979: t->value = TOKslice; 980: } 981: } 982: else 983: t->value = TOKdot; 984: return; 985: 986: case '&': 987: p++; 988: if (*p == '=') 989: { p++; 990: t->value = TOKandass; 991: } 992: else if (*p == '&') 993: { p++; 994: t->value = TOKandand; 995: } 996: else 997: t->value = TOKand; 998: return; 999: 1000: case '|': 1001: p++; 1002: if (*p == '=') 1003: { p++; 1004: t->value = TOKorass; 1005: } 1006: else if (*p == '|') 1007: { p++; 1008: t->value = TOKoror; 1009: } 1010: else 1011: t->value = TOKor; 1012: return; 1013: 1014: case '-': 1015: p++; 1016: if (*p == '=') 1017: { p++; 1018: t->value = TOKminass; 1019: } 1020: #if 0 1021: else if (*p == '>') 1022: { p++; 1023: t->value = TOKarrow; 1024: } 1025: #endif 1026: else if (*p == '-') 1027: { p++; 1028: t->value = TOKminusminus; 1029: } 1030: else 1031: t->value = TOKmin; 1032: return; 1033: 1034: case '+': 1035: p++; 1036: if (*p == '=') 1037: { p++; 1038: t->value = TOKaddass; 1039: } 1040: else if (*p == '+') 1041: { p++; 1042: t->value = TOKplusplus; 1043: } 1044: else 1045: t->value = TOKadd; 1046: return; 1047: 1048: case '<': 1049: p++; 1050: if (*p == '=') 1051: { p++; 1052: t->value = TOKle; // <= 1053: } 1054: else if (*p == '<') 1055: { p++; 1056: if (*p == '=') 1057: { p++; 1058: t->value = TOKshlass; // <<= 1059: } 1060: else 1061: t->value = TOKshl; // << 1062: } 1063: else if (*p == '>') 1064: { p++; 1065: if (*p == '=') 1066: { p++; 1067: t->value = TOKleg; // <>= 1068: } 1069: else 1070: t->value = TOKlg; // <> 1071: } 1072: else 1073: t->value = TOKlt; // < 1074: return; 1075: 1076: case '>': 1077: p++; 1078: if (*p == '=') 1079: { p++; 1080: t->value = TOKge; // >= 1081: } 1082: else if (*p == '>') 1083: { p++; 1084: if (*p == '=') 1085: { p++; 1086: t->value = TOKshrass; // >>= 1087: } 1088: else if (*p == '>') 1089: { p++; 1090: if (*p == '=') 1091: { p++; 1092: t->value = TOKushrass; // >>>= 1093: } 1094: else 1095: t->value = TOKushr; // >>> 1096: } 1097: else 1098: t->value = TOKshr; // >> 1099: } 1100: else 1101: t->value = TOKgt; // > 1102: return; 1103: 1104: case '!': 1105: p++; 1106: if (*p == '=') 1107: { p++; 1108: if (*p == '=' && global.params.Dversion == 1) 1109: { p++; 1110: t->value = TOKnotidentity; // !== 1111: } 1112: else 1113: t->value = TOKnotequal; // != 1114: } 1115: else if (*p == '<') 1116: { p++; 1117: if (*p == '>') 1118: { p++; 1119: if (*p == '=') 1120: { p++; 1121: t->value = TOKunord; // !<>= 1122: } 1123: else 1124: t->value = TOKue; // !<> 1125: } 1126: else if (*p == '=') 1127: { p++; 1128: t->value = TOKug; // !<= 1129: } 1130: else 1131: t->value = TOKuge; // !< 1132: } 1133: else if (*p == '>') 1134: { p++; 1135: if (*p == '=') 1136: { p++; 1137: t->value = TOKul; // !>= 1138: } 1139: else 1140: t->value = TOKule; // !> 1141: } 1142: else 1143: t->value = TOKnot; // ! 1144: return; 1145: 1146: case '=': 1147: p++; 1148: if (*p == '=') 1149: { p++; 1150: if (*p == '=' && global.params.Dversion == 1) 1151: { p++; 1152: t->value = TOKidentity; // === 1153: } 1154: else 1155: t->value = TOKequal; // == 1156: } 1157: else 1158: t->value = TOKassign; // = 1159: return; 1160: 1161: case '~': 1162: p++; 1163: if (*p == '=') 1164: { p++; 1165: t->value = TOKcatass; // ~= 1166: } 1167: else 1168: t->value = TOKtilde; // ~ 1169: return; 1170: 1171: #if DMDV2 1172: case '^': 1173: p++; 1174: if (*p == '^') 1175: { p++; 1176: if (*p == '=') 1177: { p++; 1178: t->value = TOKpowass; // ^^= 1179: } 1180: else 1181: t->value = TOKpow; // ^^ 1182: } 1183: else if (*p == '=') 1184: { p++; 1185: t->value = TOKxorass; // ^= 1186: } 1187: else 1188: t->value = TOKxor; // ^ 1189: return; 1190: #endif 1191: 1192: #define SINGLE(c,tok) case c: p++; t->value = tok; return; 1193: 1194: SINGLE('(', TOKlparen) 1195: SINGLE(')', TOKrparen) 1196: SINGLE('[', TOKlbracket) 1197: SINGLE(']', TOKrbracket) 1198: SINGLE('{', TOKlcurly) 1199: SINGLE('}', TOKrcurly) 1200: SINGLE('?', TOKquestion) 1201: SINGLE(',', TOKcomma) 1202: SINGLE(';', TOKsemicolon) 1203: SINGLE(':', TOKcolon) 1204: SINGLE('$', TOKdollar) 1205: #if DMDV2 1206: SINGLE('@', TOKat) 1207: #endif 1208: #undef SINGLE 1209: 1210: #define DOUBLE(c1,tok1,c2,tok2) \ 1211: case c1: \ 1212: p++; \ 1213: if (*p == c2) \ 1214: { p++; \ 1215: t->value = tok2; \ 1216: } \ 1217: else \ 1218: t->value = tok1; \ 1219: return; 1220: 1221: DOUBLE('*', TOKmul, '=', TOKmulass) 1222: DOUBLE('%', TOKmod, '=', TOKmodass) 1223: #if DMDV1 1224: DOUBLE('^', TOKxor, '=', TOKxorass) 1225: #endif 1226: #undef DOUBLE 1227: 1228: case '#': 1229: p++; 1230: pragma(); 1231: continue; 1232: 1233: default: 1234: { unsigned c = *p; 1235: 1236: if (c & 0x80) 1237: { c = decodeUTF(); 1238: 1239: // Check for start of unicode identifier 1240: if (isUniAlpha(c)) 1241: goto case_ident; 1242: 1243: if (c == PS || c == LS) 1244: { 1245: loc.linnum++; 1246: p++; 1247: continue; 1248: } 1249: } 1250: if (c < 0x80 && isprint(c)) 1251: error("unsupported char '%c'", c); 1252: else 1253: error("unsupported char 0x%02x", c); 1254: p++; 1255: continue; 1256: } 1257: } 1258: } 1259: } 1260: 1261: /******************************************* 1262: * Parse escape sequence. 1263: */ 1264: 1265: unsigned Lexer::escapeSequence() 1266: { unsigned c = *p; 1267: 1268: #ifdef TEXTUAL_ASSEMBLY_OUT 1269: return c; 1270: #endif 1271: int n; 1272: int ndigits; 1273: 1274: switch (c) 1275: { 1276: case '\'': 1277: case '"': 1278: case '?': 1279: case '\\': 1280: Lconsume: 1281: p++; 1282: break; 1283: 1284: case 'a': c = 7; goto Lconsume; 1285: case 'b': c = 8; goto Lconsume; 1286: case 'f': c = 12; goto Lconsume; 1287: case 'n': c = 10; goto Lconsume; 1288: case 'r': c = 13; goto Lconsume; 1289: case 't': c = 9; goto Lconsume; 1290: case 'v': c = 11; goto Lconsume; 1291: 1292: case 'u': 1293: ndigits = 4; 1294: goto Lhex; 1295: case 'U': 1296: ndigits = 8; 1297: goto Lhex; 1298: case 'x': 1299: ndigits = 2; 1300: Lhex: 1301: p++; 1302: c = *p; 1303: if (ishex(c)) 1304: { unsigned v; 1305: 1306: n = 0; 1307: v = 0; 1308: while (1) 1309: { 1310: if (isdigit(c)) 1311: c -= '0'; 1312: else if (islower(c)) 1313: c -= 'a' - 10; 1314: else 1315: c -= 'A' - 10; 1316: v = v * 16 + c; 1317: c = *++p; 1318: if (++n == ndigits) 1319: break; 1320: if (!ishex(c)) 1321: { error("escape hex sequence has %d hex digits instead of %d", n, ndigits); 1322: break; 1323: } 1324: } 1325: if (ndigits != 2 && !utf_isValidDchar(v)) 1326: { error("invalid UTF character \\U%08x", v); 1327: v = '?'; // recover with valid UTF character 1328: } 1329: c = v; 1330: } 1331: else 1332: error("undefined escape hex sequence \\%c\n",c); 1333: break; 1334: 1335: case '&': // named character entity 1336: for (unsigned char *idstart = ++p; 1; p++) 1337: { 1338: switch (*p) 1339: { 1340: case ';': 1341: c = HtmlNamedEntity(idstart, p - idstart); 1342: if (c == ~0) 1343: { error("unnamed character entity &%.*s;", (int)(p - idstart), idstart); 1344: c = ' '; 1345: } 1346: p++; 1347: break; 1348: 1349: default: 1350: if (isalpha(*p) || 1351: (p != idstart + 1 && isdigit(*p))) 1352: continue; 1353: error("unterminated named entity"); 1354: break; 1355: } 1356: break; 1357: } 1358: break; 1359: 1360: case 0: 1361: case 0x1A: // end of file 1362: c = '\\'; 1363: break; 1364: 1365: default: 1366: if (isoctal(c)) 1367: { unsigned v; 1368: 1369: n = 0; 1370: v = 0; 1371: do 1372: { 1373: v = v * 8 + (c - '0'); 1374: c = *++p; 1375: } while (++n < 3 && isoctal(c)); 1376: c = v; 1377: if (c > 0xFF) 1378: error("0%03o is larger than a byte", c); 1379: } 1380: else 1381: error("undefined escape sequence \\%c\n",c); 1382: break; 1383: } 1384: return c; 1385: } 1386: 1387: /************************************** 1388: */ 1389: 1390: TOK Lexer::wysiwygStringConstant(Token *t, int tc) 1391: { unsigned c; 1392: Loc start = loc; 1393: 1394: p++; 1395: stringbuffer.reset(); 1396: while (1) 1397: { 1398: c = *p++; 1399: switch (c) 1400: { 1401: case '\n': 1402: loc.linnum++; 1403: break; 1404: 1405: case '\r': 1406: if (*p == '\n') 1407: continue; // ignore 1408: c = '\n'; // treat EndOfLine as \n character 1409: loc.linnum++; 1410: break; 1411: 1412: case 0: 1413: case 0x1A: 1414: error("unterminated string constant starting at %s", start.toChars()); 1415: t->ustring = (unsigned char *)""; 1416: t->len = 0; 1417: t->postfix = 0; 1418: return TOKstring; 1419: 1420: case '"': 1421: case '`': 1422: if (c == tc) 1423: { 1424: t->len = stringbuffer.offset; 1425: stringbuffer.writeByte(0); 1426: t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); 1427: memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); 1428: stringPostfix(t); 1429: return TOKstring; 1430: } 1431: break; 1432: 1433: default: 1434: if (c & 0x80) 1435: { p--; 1436: unsigned u = decodeUTF(); 1437: p++; 1438: if (u == PS || u == LS) 1439: loc.linnum++; 1440: stringbuffer.writeUTF8(u); 1441: continue; 1442: } 1443: break; 1444: } 1445: stringbuffer.writeByte(c); 1446: } 1447: } 1448: 1449: /************************************** 1450: * Lex hex strings: 1451: * x"0A ae 34FE BD" 1452: */ 1453: 1454: TOK Lexer::hexStringConstant(Token *t) 1455: { unsigned c; 1456: Loc start = loc; 1457: unsigned n = 0; 1458: unsigned v; 1459: 1460: p++; 1461: stringbuffer.reset(); 1462: while (1) 1463: { 1464: c = *p++; 1465: switch (c) 1466: { 1467: case ' ': 1468: case '\t': 1469: case '\v': 1470: case '\f': 1471: continue; // skip white space 1472: 1473: case '\r': 1474: if (*p == '\n') 1475: continue; // ignore 1476: // Treat isolated '\r' as if it were a '\n' 1477: case '\n': 1478: loc.linnum++; 1479: continue; 1480: 1481: case 0: 1482: case 0x1A: 1483: error("unterminated string constant starting at %s", start.toChars()); 1484: t->ustring = (unsigned char *)""; 1485: t->len = 0; 1486: t->postfix = 0; 1487: return TOKstring; 1488: 1489: case '"': 1490: if (n & 1) 1491: { error("odd number (%d) of hex characters in hex string", n); 1492: stringbuffer.writeByte(v); 1493: } 1494: t->len = stringbuffer.offset; 1495: stringbuffer.writeByte(0); 1496: t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); 1497: memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); 1498: stringPostfix(t); 1499: return TOKstring; 1500: 1501: default: 1502: if (c >= '0' && c <= '9') 1503: c -= '0'; 1504: else if (c >= 'a' && c <= 'f') 1505: c -= 'a' - 10; 1506: else if (c >= 'A' && c <= 'F') 1507: c -= 'A' - 10; 1508: else if (c & 0x80) 1509: { p--; 1510: unsigned u = decodeUTF(); 1511: p++; 1512: if (u == PS || u == LS) 1513: loc.linnum++; 1514: else 1515: error("non-hex character \\u%04x", u); 1516: } 1517: else 1518: error("non-hex character '%c'", c); 1519: if (n & 1) 1520: { v = (v << 4) | c; 1521: stringbuffer.writeByte(v); 1522: } 1523: else 1524: v = c; 1525: n++; 1526: break; 1527: } 1528: } 1529: } 1530: 1531: 1532: #if DMDV2 1533: /************************************** 1534: * Lex delimited strings: 1535: * q"(foo(xxx))" // "foo(xxx)" 1536: * q"[foo(]" // "foo(" 1537: * q"/foo]/" // "foo]" 1538: * q"HERE 1539: * foo 1540: * HERE" // "foo\n" 1541: * Input: 1542: * p is on the " 1543: */ 1544: 1545: TOK Lexer::delimitedStringConstant(Token *t) 1546: { unsigned c; 1547: Loc start = loc; 1548: unsigned delimleft = 0; 1549: unsigned delimright = 0; 1550: unsigned nest = 1; 1551: unsigned nestcount; 1552: Identifier *hereid = NULL; 1553: unsigned blankrol = 0; 1554: unsigned startline = 0; 1555: 1556: p++; 1557: stringbuffer.reset(); 1558: while (1) 1559: { 1560: c = *p++; 1561: //printf("c = '%c'\n", c); 1562: switch (c) 1563: { 1564: case '\n': 1565: Lnextline: 1566: loc.linnum++; 1567: startline = 1; 1568: if (blankrol) 1569: { blankrol = 0; 1570: continue; 1571: } 1572: if (hereid) 1573: { 1574: stringbuffer.writeUTF8(c); 1575: continue; 1576: } 1577: break; 1578: 1579: case '\r': 1580: if (*p == '\n') 1581: continue; // ignore 1582: c = '\n'; // treat EndOfLine as \n character 1583: goto Lnextline; 1584: 1585: case 0: 1586: case 0x1A: 1587: goto Lerror; 1588: 1589: default: 1590: if (c & 0x80) 1591: { p--; 1592: c = decodeUTF(); 1593: p++; 1594: if (c == PS || c == LS) 1595: goto Lnextline; 1596: } 1597: break; 1598: } 1599: if (delimleft == 0) 1600: { delimleft = c; 1601: nest = 1; 1602: nestcount = 1; 1603: if (c == '(') 1604: delimright = ')'; 1605: else if (c == '{') 1606: delimright = '}'; 1607: else if (c == '[') 1608: delimright = ']'; 1609: else if (c == '<') 1610: delimright = '>'; 1611: else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) 1612: { // Start of identifier; must be a heredoc 1613: Token t;
warning C6246: Local declaration of 't' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '1545' of 'c:\projects\extern\d\dmd\src\lexer.c': Lines: 1545
1614: p--; 1615: scan(&t); // read in heredoc identifier 1616: if (t.value != TOKidentifier) 1617: { error("identifier expected for heredoc, not %s", t.toChars()); 1618: delimright = c; 1619: } 1620: else 1621: { hereid = t.ident; 1622: //printf("hereid = '%s'\n", hereid->toChars()); 1623: blankrol = 1; 1624: } 1625: nest = 0; 1626: } 1627: else 1628: { delimright = c; 1629: nest = 0; 1630: #if DMDV2 1631: if (isspace(c)) 1632: error("delimiter cannot be whitespace"); 1633: #endif 1634: } 1635: } 1636: else 1637: { 1638: if (blankrol) 1639: { error("heredoc rest of line should be blank"); 1640: blankrol = 0; 1641: continue; 1642: } 1643: if (nest == 1) 1644: { 1645: if (c == delimleft) 1646: nestcount++; 1647: else if (c == delimright) 1648: { nestcount--; 1649: if (nestcount == 0) 1650: goto Ldone; 1651: } 1652: } 1653: else if (c == delimright) 1654: goto Ldone; 1655: if (startline && isalpha(c) 1656: #if DMDV2 1657: && hereid 1658: #endif 1659: ) 1660: { Token t;
warning C6246: Local declaration of 't' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '1545' of 'c:\projects\extern\d\dmd\src\lexer.c': Lines: 1545
1661: unsigned char *psave = p; 1662: p--; 1663: scan(&t); // read in possible heredoc identifier 1664: //printf("endid = '%s'\n", t.ident->toChars()); 1665: if (t.value == TOKidentifier && t.ident->equals(hereid)) 1666: { /* should check that rest of line is blank 1667: */ 1668: goto Ldone; 1669: } 1670: p = psave; 1671: } 1672: stringbuffer.writeUTF8(c); 1673: startline = 0; 1674: } 1675: } 1676: 1677: Ldone: 1678: if (*p == '"') 1679: p++; 1680: else 1681: error("delimited string must end in %c\"", delimright); 1682: t->len = stringbuffer.offset; 1683: stringbuffer.writeByte(0); 1684: t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); 1685: memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); 1686: stringPostfix(t); 1687: return TOKstring; 1688: 1689: Lerror: 1690: error("unterminated string constant starting at %s", start.toChars()); 1691: t->ustring = (unsigned char *)""; 1692: t->len = 0; 1693: t->postfix = 0; 1694: return TOKstring; 1695: } 1696: 1697: /************************************** 1698: * Lex delimited strings: 1699: * q{ foo(xxx) } // " foo(xxx) " 1700: * q{foo(} // "foo(" 1701: * q{{foo}"}"} // "{foo}"}"" 1702: * Input: 1703: * p is on the q 1704: */ 1705: 1706: TOK Lexer::tokenStringConstant(Token *t) 1707: { 1708: unsigned nest = 1; 1709: Loc start = loc; 1710: unsigned char *pstart = ++p; 1711: 1712: while (1) 1713: { Token tok; 1714: 1715: scan(&tok); 1716: switch (tok.value) 1717: { 1718: case TOKlcurly: 1719: nest++; 1720: continue; 1721: 1722: case TOKrcurly: 1723: if (--nest == 0) 1724: goto Ldone; 1725: continue; 1726: 1727: case TOKeof: 1728: goto Lerror; 1729: 1730: default: 1731: continue; 1732: } 1733: } 1734: 1735: Ldone: 1736: t->len = p - 1 - pstart; 1737: t->ustring = (unsigned char *)mem.malloc(t->len + 1); 1738: memcpy(t->ustring, pstart, t->len); 1739: t->ustring[t->len] = 0; 1740: stringPostfix(t); 1741: return TOKstring; 1742: 1743: Lerror: 1744: error("unterminated token string constant starting at %s", start.toChars()); 1745: t->ustring = (unsigned char *)""; 1746: t->len = 0; 1747: t->postfix = 0; 1748: return TOKstring; 1749: } 1750: 1751: #endif 1752: 1753: 1754: /************************************** 1755: */ 1756: 1757: TOK Lexer::escapeStringConstant(Token *t, int wide) 1758: { unsigned c; 1759: Loc start = loc; 1760: 1761: p++; 1762: stringbuffer.reset(); 1763: while (1) 1764: { 1765: c = *p++; 1766: switch (c) 1767: { 1768: #if !( TEXTUAL_ASSEMBLY_OUT ) 1769: case '\\': 1770: switch (*p) 1771: { 1772: case 'u': 1773: case 'U': 1774: case '&': 1775: c = escapeSequence(); 1776: stringbuffer.writeUTF8(c); 1777: continue; 1778: 1779: default: 1780: c = escapeSequence(); 1781: break; 1782: } 1783: break; 1784: #endif 1785: case '\n': 1786: loc.linnum++; 1787: break; 1788: 1789: case '\r': 1790: if (*p == '\n') 1791: continue; // ignore 1792: c = '\n'; // treat EndOfLine as \n character 1793: loc.linnum++; 1794: break; 1795: 1796: case '"': 1797: t->len = stringbuffer.offset; 1798: stringbuffer.writeByte(0); 1799: t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset); 1800: memcpy(t->ustring, stringbuffer.data, stringbuffer.offset); 1801: stringPostfix(t); 1802: return TOKstring; 1803: 1804: case 0: 1805: case 0x1A: 1806: p--; 1807: error("unterminated string constant starting at %s", start.toChars()); 1808: t->ustring = (unsigned char *)""; 1809: t->len = 0; 1810: t->postfix = 0; 1811: return TOKstring; 1812: 1813: default: 1814: if (c & 0x80) 1815: { 1816: p--; 1817: c = decodeUTF(); 1818: if (c == LS || c == PS) 1819: { c = '\n'; 1820: loc.linnum++; 1821: } 1822: p++; 1823: stringbuffer.writeUTF8(c); 1824: continue; 1825: } 1826: break; 1827: } 1828: stringbuffer.writeByte(c); 1829: } 1830: } 1831: 1832: /************************************** 1833: */ 1834: 1835: TOK Lexer::charConstant(Token *t, int wide) 1836: { 1837: unsigned c; 1838: TOK tk = TOKcharv; 1839: 1840: //printf("Lexer::charConstant\n"); 1841: p++; 1842: c = *p++; 1843: switch (c) 1844: { 1845: #if ! TEXTUAL_ASSEMBLY_OUT 1846: case '\\': 1847: switch (*p) 1848: { 1849: case 'u': 1850: t->uns64value = escapeSequence(); 1851: tk = TOKwcharv; 1852: break; 1853: 1854: case 'U': 1855: case '&': 1856: t->uns64value = escapeSequence(); 1857: tk = TOKdcharv; 1858: break; 1859: 1860: default: 1861: t->uns64value = escapeSequence(); 1862: break; 1863: } 1864: break; 1865: #endif 1866: case '\n': 1867: L1: 1868: loc.linnum++; 1869: case '\r': 1870: case 0: 1871: case 0x1A: 1872: case '\'': 1873: error("unterminated character constant"); 1874: return tk; 1875: 1876: default: 1877: if (c & 0x80) 1878: { 1879: p--; 1880: c = decodeUTF(); 1881: p++; 1882: if (c == LS || c == PS) 1883: goto L1; 1884: if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE)) 1885: tk = TOKwcharv; 1886: else 1887: tk = TOKdcharv; 1888: } 1889: t->uns64value = c; 1890: break; 1891: } 1892: 1893: if (*p != '\'') 1894: { error("unterminated character constant"); 1895: return tk; 1896: } 1897: p++; 1898: return tk; 1899: } 1900: 1901: /*************************************** 1902: * Get postfix of string literal. 1903: */ 1904: 1905: void Lexer::stringPostfix(Token *t) 1906: { 1907: switch (*p) 1908: { 1909: case 'c': 1910: case 'w': 1911: case 'd': 1912: t->postfix = *p; 1913: p++; 1914: break; 1915: 1916: default: 1917: t->postfix = 0; 1918: break; 1919: } 1920: } 1921: 1922: /*************************************** 1923: * Read \u or \U unicode sequence 1924: * Input: 1925: * u 'u' or 'U' 1926: */ 1927: 1928: #if 0 1929: unsigned Lexer::wchar(unsigned u) 1930: { 1931: unsigned value; 1932: unsigned n; 1933: unsigned char c; 1934: unsigned nchars; 1935: 1936: nchars = (u == 'U') ? 8 : 4; 1937: value = 0; 1938: for (n = 0; 1; n++) 1939: { 1940: ++p; 1941: if (n == nchars) 1942: break; 1943: c = *p; 1944: if (!ishex(c)) 1945: { error("\\%c sequence must be followed by %d hex characters", u, nchars); 1946: break; 1947: } 1948: if (isdigit(c)) 1949: c -= '0'; 1950: else if (islower(c)) 1951: c -= 'a' - 10; 1952: else 1953: c -= 'A' - 10; 1954: value <<= 4; 1955: value |= c; 1956: } 1957: return value; 1958: } 1959: #endif 1960: 1961: /************************************** 1962: * Read in a number. 1963: * If it's an integer, store it in tok.TKutok.Vlong. 1964: * integers can be decimal, octal or hex 1965: * Handle the suffixes U, UL, LU, L, etc. 1966: * If it's double, store it in tok.TKutok.Vdouble. 1967: * Returns: 1968: * TKnum 1969: * TKdouble,... 1970: */ 1971: 1972: TOK Lexer::number(Token *t) 1973: { 1974: // We use a state machine to collect numbers 1975: enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale, 1976: STATE_hex, STATE_binary, STATE_hex0, STATE_binary0, 1977: STATE_hexh, STATE_error }; 1978: enum STATE state; 1979: 1980: enum FLAGS 1981: { FLAGS_decimal = 1, // decimal 1982: FLAGS_unsigned = 2, // u or U suffix 1983: FLAGS_long = 4, // l or L suffix 1984: }; 1985: enum FLAGS flags = FLAGS_decimal; 1986: 1987: int i;
warning C4101: 'i' : unreferenced local variable
1988: int base; 1989: unsigned c; 1990: unsigned char *start; 1991: TOK result; 1992: 1993: //printf("Lexer::number()\n"); 1994: state = STATE_initial; 1995: base = 0; 1996: stringbuffer.reset(); 1997: start = p; 1998: while (1) 1999: { 2000: c = *p; 2001: switch (state) 2002: { 2003: case STATE_initial: // opening state 2004: if (c == '0') 2005: state = STATE_0; 2006: else 2007: state = STATE_decimal; 2008: break; 2009: 2010: case STATE_0: 2011: flags = (FLAGS) (flags & ~FLAGS_decimal); 2012: switch (c) 2013: { 2014: #if ZEROH 2015: case 'H': // 0h 2016: case 'h': 2017: goto hexh; 2018: #endif 2019: case 'X': 2020: case 'x': 2021: state = STATE_hex0; 2022: break; 2023: 2024: case '.': 2025: if (p[1] == '.') // .. is a separate token 2026: goto done; 2027: case 'i': 2028: case 'f': 2029: case 'F': 2030: goto real; 2031: #if ZEROH 2032: case 'E': 2033: case 'e': 2034: goto case_hex; 2035: #endif 2036: case 'B': 2037: case 'b': 2038: state = STATE_binary0; 2039: break; 2040: 2041: case '0': case '1': case '2': case '3': 2042: case '4': case '5': case '6': case '7': 2043: state = STATE_octal; 2044: break; 2045: 2046: #if ZEROH 2047: case '8': case '9': case 'A': 2048: case 'C': case 'D': case 'F': 2049: case 'a': case 'c': case 'd': case 'f': 2050: case_hex: 2051: state = STATE_hexh; 2052: break; 2053: #endif 2054: case '_': 2055: state = STATE_octal; 2056: p++; 2057: continue; 2058: 2059: case 'L': 2060: if (p[1] == 'i') 2061: goto real; 2062: goto done; 2063: 2064: default: 2065: goto done; 2066: } 2067: break; 2068: 2069: case STATE_decimal: // reading decimal number 2070: if (!isdigit(c)) 2071: { 2072: #if ZEROH 2073: if (ishex(c) 2074: || c == 'H' || c == 'h' 2075: ) 2076: goto hexh; 2077: #endif 2078: if (c == '_') // ignore embedded _ 2079: { p++; 2080: continue; 2081: } 2082: if (c == '.' && p[1] != '.') 2083: goto real; 2084: else if (c == 'i' || c == 'f' || c == 'F' || 2085: c == 'e' || c == 'E') 2086: { 2087: real: // It's a real number. Back up and rescan as a real 2088: p = start; 2089: return inreal(t); 2090: } 2091: else if (c == 'L' && p[1] == 'i') 2092: goto real; 2093: goto done; 2094: } 2095: break; 2096: 2097: case STATE_hex0: // reading hex number 2098: case STATE_hex: 2099: if (!ishex(c)) 2100: { 2101: if (c == '_') // ignore embedded _ 2102: { p++; 2103: continue; 2104: } 2105: if (c == '.' && p[1] != '.') 2106: goto real; 2107: if (c == 'P' || c == 'p' || c == 'i') 2108: goto real; 2109: if (state == STATE_hex0) 2110: error("Hex digit expected, not '%c'", c); 2111: goto done; 2112: } 2113: state = STATE_hex; 2114: break; 2115: 2116: #if ZEROH 2117: hexh: 2118: state = STATE_hexh; 2119: case STATE_hexh: // parse numbers like 0FFh 2120: if (!ishex(c)) 2121: { 2122: if (c == 'H' || c == 'h') 2123: { 2124: p++; 2125: base = 16; 2126: goto done; 2127: } 2128: else 2129: { 2130: // Check for something like 1E3 or 0E24 2131: if (memchr((char *)stringbuffer.data, 'E', stringbuffer.offset) || 2132: memchr((char *)stringbuffer.data, 'e', stringbuffer.offset)) 2133: goto real; 2134: error("Hex digit expected, not '%c'", c); 2135: goto done; 2136: } 2137: } 2138: break; 2139: #endif 2140: 2141: case STATE_octal: // reading octal number 2142: case STATE_octale: // reading octal number with non-octal digits 2143: if (!isoctal(c)) 2144: { 2145: #if ZEROH 2146: if (ishex(c) 2147: || c == 'H' || c == 'h' 2148: ) 2149: goto hexh; 2150: #endif 2151: if (c == '_') // ignore embedded _ 2152: { p++; 2153: continue; 2154: } 2155: if (c == '.' && p[1] != '.') 2156: goto real; 2157: if (c == 'i') 2158: goto real; 2159: if (isdigit(c)) 2160: { 2161: state = STATE_octale; 2162: } 2163: else 2164: goto done; 2165: } 2166: break; 2167: 2168: case STATE_binary0: // starting binary number 2169: case STATE_binary: // reading binary number 2170: if (c != '0' && c != '1') 2171: { 2172: #if ZEROH 2173: if (ishex(c) 2174: || c == 'H' || c == 'h' 2175: ) 2176: goto hexh; 2177: #endif 2178: if (c == '_') // ignore embedded _ 2179: { p++; 2180: continue; 2181: } 2182: if (state == STATE_binary0) 2183: { error("binary digit expected"); 2184: state = STATE_error; 2185: break; 2186: } 2187: else 2188: goto done; 2189: } 2190: state = STATE_binary; 2191: break; 2192: 2193: case STATE_error: // for error recovery 2194: if (!isdigit(c)) // scan until non-digit 2195: goto done; 2196: break; 2197: 2198: default: 2199: assert(0); 2200: } 2201: stringbuffer.writeByte(c); 2202: p++; 2203: } 2204: done: 2205: stringbuffer.writeByte(0); // terminate string 2206: if (state == STATE_octale) 2207: error("Octal digit expected"); 2208: 2209: uinteger_t n; // unsigned >=64 bit integer type 2210: 2211: if (stringbuffer.offset == 2 && (state == STATE_decimal || state == STATE_0)) 2212: n = stringbuffer.data[0] - '0'; 2213: else 2214: { 2215: // Convert string to integer 2216: #if __DMC__ 2217: errno = 0; 2218: n = strtoull((char *)stringbuffer.data,NULL,base); 2219: if (errno == ERANGE) 2220: error("integer overflow"); 2221: #else 2222: // Not everybody implements strtoull() 2223: char *p = (char *)stringbuffer.data; 2224: int r = 10, d; 2225: 2226: if (*p == '0') 2227: { 2228: if (p[1] == 'x' || p[1] == 'X') 2229: p += 2, r = 16; 2230: else if (p[1] == 'b' || p[1] == 'B') 2231: p += 2, r = 2; 2232: else if (isdigit(p[1]))
warning C6328: 'char' passed as parameter '1' when 'unsigned char' is required in call to 'isdigit'
2233: p += 1, r = 8; 2234: } 2235: 2236: n = 0; 2237: while (1) 2238: { 2239: if (*p >= '0' && *p <= '9') 2240: d = *p - '0'; 2241: else if (*p >= 'a' && *p <= 'z') 2242: d = *p - 'a' + 10; 2243: else if (*p >= 'A' && *p <= 'Z') 2244: d = *p - 'A' + 10; 2245: else 2246: break; 2247: if (d >= r) 2248: break; 2249: uinteger_t n2 = n * r; 2250: //printf("n2 / r = %llx, n = %llx\n", n2/r, n); 2251: if (n2 / r != n || n2 + d < n) 2252: { 2253: error ("integer overflow"); 2254: break; 2255: } 2256: 2257: n = n2 + d; 2258: p++; 2259: } 2260: #endif 2261: if (sizeof(n) > 8 && 2262: n > 0xFFFFFFFFFFFFFFFFULL) // if n needs more than 64 bits 2263: error("integer overflow"); 2264: } 2265: 2266: // Parse trailing 'u', 'U', 'l' or 'L' in any combination 2267: const unsigned char *psuffix = p; 2268: while (1) 2269: { unsigned char f; 2270: 2271: switch (*p) 2272: { case 'U': 2273: case 'u': 2274: f = FLAGS_unsigned; 2275: goto L1; 2276: 2277: case 'l': 2278: if (1 || !global.params.useDeprecated) 2279: error("'l' suffix is deprecated, use 'L' instead"); 2280: case 'L': 2281: f = FLAGS_long; 2282: L1: 2283: p++; 2284: if (flags & f) 2285: error("unrecognized token"); 2286: flags = (FLAGS) (flags | f); 2287: continue; 2288: default: 2289: break; 2290: } 2291: break; 2292: } 2293: 2294: if (state == STATE_octal && n >= 8 && !global.params.useDeprecated) 2295: error("octal literals 0%llo%.*s are deprecated, use std.conv.octal!%llo%.*s instead", 2296: n, p - psuffix, psuffix, n, p - psuffix, psuffix); 2297: 2298: switch (flags) 2299: { 2300: case 0: 2301: /* Octal or Hexadecimal constant. 2302: * First that fits: int, uint, long, ulong 2303: */ 2304: if (n & 0x8000000000000000LL) 2305: result = TOKuns64v; 2306: else if (n & 0xFFFFFFFF00000000LL) 2307: result = TOKint64v; 2308: else if (n & 0x80000000) 2309: result = TOKuns32v; 2310: else 2311: result = TOKint32v; 2312: break; 2313: 2314: case FLAGS_decimal: 2315: /* First that fits: int, long, long long 2316: */ 2317: if (n & 0x8000000000000000LL) 2318: { error("signed integer overflow"); 2319: result = TOKuns64v; 2320: } 2321: else if (n & 0xFFFFFFFF80000000LL) 2322: result = TOKint64v; 2323: else 2324: result = TOKint32v; 2325: break; 2326: 2327: case FLAGS_unsigned: 2328: case FLAGS_decimal | FLAGS_unsigned: 2329: /* First that fits: uint, ulong 2330: */ 2331: if (n & 0xFFFFFFFF00000000LL) 2332: result = TOKuns64v; 2333: else 2334: result = TOKuns32v; 2335: break; 2336: 2337: case FLAGS_decimal | FLAGS_long: 2338: if (n & 0x8000000000000000LL) 2339: { error("signed integer overflow"); 2340: result = TOKuns64v; 2341: } 2342: else 2343: result = TOKint64v; 2344: break; 2345: 2346: case FLAGS_long: 2347: if (n & 0x8000000000000000LL) 2348: result = TOKuns64v; 2349: else 2350: result = TOKint64v; 2351: break; 2352: 2353: case FLAGS_unsigned | FLAGS_long: 2354: case FLAGS_decimal | FLAGS_unsigned | FLAGS_long: 2355: result = TOKuns64v; 2356: break; 2357: 2358: default: 2359: #ifdef DEBUG 2360: printf("%x\n",flags); 2361: #endif 2362: assert(0); 2363: } 2364: t->uns64value = n; 2365: return result; 2366: } 2367: 2368: /************************************** 2369: * Read in characters, converting them to real. 2370: * Bugs: 2371: * Exponent overflow not detected. 2372: * Too much requested precision is not detected. 2373: */ 2374: 2375: TOK Lexer::inreal(Token *t) 2376: #ifdef __DMC__ 2377: __in 2378: { 2379: assert(*p == '.' || isdigit(*p)); 2380: } 2381: __out (result) 2382: { 2383: switch (result) 2384: { 2385: case TOKfloat32v: 2386: case TOKfloat64v: 2387: case TOKfloat80v: 2388: case TOKimaginary32v: 2389: case TOKimaginary64v: 2390: case TOKimaginary80v: 2391: break; 2392: 2393: default: 2394: assert(0); 2395: } 2396: } 2397: __body 2398: #endif /* __DMC__ */ 2399: { int dblstate; 2400: unsigned c; 2401: char hex; // is this a hexadecimal-floating-constant? 2402: TOK result; 2403: 2404: //printf("Lexer::inreal()\n"); 2405: stringbuffer.reset(); 2406: dblstate = 0; 2407: hex = 0; 2408: Lnext: 2409: while (1) 2410: { 2411: // Get next char from input 2412: c = *p++; 2413: //printf("dblstate = %d, c = '%c'\n", dblstate, c); 2414: while (1) 2415: { 2416: switch (dblstate) 2417: { 2418: case 0: // opening state 2419: if (c == '0') 2420: dblstate = 9; 2421: else if (c == '.') 2422: dblstate = 3; 2423: else 2424: dblstate = 1; 2425: break; 2426: 2427: case 9: 2428: dblstate = 1; 2429: if (c == 'X' || c == 'x') 2430: { hex++; 2431: break; 2432: } 2433: case 1: // digits to left of . 2434: case 3: // digits to right of . 2435: case 7: // continuing exponent digits 2436: if (!isdigit(c) && !(hex && isxdigit(c))) 2437: { 2438: if (c == '_') 2439: goto Lnext; // ignore embedded '_' 2440: dblstate++; 2441: continue; 2442: } 2443: break; 2444: 2445: case 2: // no more digits to left of . 2446: if (c == '.') 2447: { dblstate++; 2448: break; 2449: } 2450: case 4: // no more digits to right of . 2451: if ((c == 'E' || c == 'e') || 2452: hex && (c == 'P' || c == 'p')) 2453: { dblstate = 5; 2454: hex = 0; // exponent is always decimal 2455: break; 2456: } 2457: if (hex) 2458: error("binary-exponent-part required"); 2459: goto done; 2460: 2461: case 5: // looking immediately to right of E 2462: dblstate++; 2463: if (c == '-' || c == '+') 2464: break; 2465: case 6: // 1st exponent digit expected 2466: if (!isdigit(c)) 2467: error("exponent expected"); 2468: dblstate++; 2469: break; 2470: 2471: case 8: // past end of exponent digits 2472: goto done; 2473: } 2474: break; 2475: } 2476: stringbuffer.writeByte(c); 2477: } 2478: done: 2479: p--; 2480: 2481: stringbuffer.writeByte(0); 2482: 2483: #if _WIN32 && __DMC__ 2484: char *save = __locale_decpoint; 2485: __locale_decpoint = "."; 2486: #endif 2487: #ifdef IN_GCC 2488: t->float80value = real_t::parse((char *)stringbuffer.data, real_t::LongDouble); 2489: #else 2490: t->float80value = strtold((char *)stringbuffer.data, NULL); 2491: #endif 2492: errno = 0; 2493: switch (*p) 2494: { 2495: case 'F': 2496: case 'f': 2497: #ifdef IN_GCC 2498: real_t::parse((char *)stringbuffer.data, real_t::Float); 2499: #else 2500: { // Only interested in errno return 2501: float f = strtof((char *)stringbuffer.data, NULL);
warning C4244: 'initializing' : conversion from 'double' to 'float', possible loss of data
2502: // Assign to f to keep gcc warnings at bay 2503: } 2504: #endif 2505: result = TOKfloat32v; 2506: p++; 2507: break; 2508: 2509: default: 2510: #ifdef IN_GCC 2511: real_t::parse((char *)stringbuffer.data, real_t::Double); 2512: #else 2513: /* Should do our own strtod(), since dmc and linux gcc 2514: * accept 2.22507e-308, while apple gcc will only take 2515: * 2.22508e-308. Not sure who is right. 2516: */ 2517: { // Only interested in errno return 2518: double d = strtod((char *)stringbuffer.data, NULL); 2519: // Assign to d to keep gcc warnings at bay 2520: } 2521: #endif 2522: result = TOKfloat64v; 2523: break; 2524: 2525: case 'l': 2526: if (!global.params.useDeprecated) 2527: error("'l' suffix is deprecated, use 'L' instead"); 2528: case 'L': 2529: result = TOKfloat80v; 2530: p++; 2531: break; 2532: } 2533: if (*p == 'i' || *p == 'I') 2534: { 2535: if (!global.params.useDeprecated && *p == 'I') 2536: error("'I' suffix is deprecated, use 'i' instead"); 2537: p++; 2538: switch (result) 2539: { 2540: case TOKfloat32v: 2541: result = TOKimaginary32v; 2542: break; 2543: case TOKfloat64v: 2544: result = TOKimaginary64v; 2545: break; 2546: case TOKfloat80v: 2547: result = TOKimaginary80v; 2548: break; 2549: } 2550: } 2551: #if _WIN32 && __DMC__ 2552: __locale_decpoint = save; 2553: #endif 2554: if (errno == ERANGE) 2555: error("number is not representable"); 2556: return result; 2557: } 2558: 2559: /********************************************* 2560: * Do pragma. 2561: * Currently, the only pragma supported is: 2562: * #line linnum [filespec] 2563: */ 2564: 2565: void Lexer::pragma() 2566: { 2567: Token tok; 2568: int linnum; 2569: char *filespec = NULL; 2570: Loc loc = this->loc; 2571: 2572: scan(&tok); 2573: if (tok.value != TOKidentifier || tok.ident != Id::line) 2574: goto Lerr; 2575: 2576: scan(&tok); 2577: if (tok.value == TOKint32v || tok.value == TOKint64v) 2578: linnum = tok.uns64value - 1;
warning C4244: '=' : conversion from 'd_uns64' to 'int', possible loss of data
2579: else 2580: goto Lerr; 2581: 2582: while (1) 2583: { 2584: switch (*p) 2585: { 2586: case 0: 2587: case 0x1A: 2588: case '\n': 2589: Lnewline: 2590: this->loc.linnum = linnum; 2591: if (filespec) 2592: this->loc.filename = filespec; 2593: return; 2594: 2595: case '\r': 2596: p++; 2597: if (*p != '\n') 2598: { p--; 2599: goto Lnewline; 2600: } 2601: continue; 2602: 2603: case ' ': 2604: case '\t': 2605: case '\v': 2606: case '\f': 2607: p++; 2608: continue; // skip white space 2609: 2610: case '_': 2611: if (mod && memcmp(p, "__FILE__", 8) == 0) 2612: { 2613: p += 8; 2614: filespec = mem.strdup(loc.filename ? loc.filename : mod->ident->toChars()); 2615: } 2616: continue; 2617: 2618: case '"': 2619: if (filespec) 2620: goto Lerr; 2621: stringbuffer.reset(); 2622: p++; 2623: while (1) 2624: { unsigned c; 2625: 2626: c = *p; 2627: switch (c) 2628: { 2629: case '\n': 2630: case '\r': 2631: case 0: 2632: case 0x1A: 2633: goto Lerr; 2634: 2635: case '"': 2636: stringbuffer.writeByte(0); 2637: filespec = mem.strdup((char *)stringbuffer.data); 2638: p++; 2639: break; 2640: 2641: default: 2642: if (c & 0x80) 2643: { unsigned u = decodeUTF(); 2644: if (u == PS || u == LS) 2645: goto Lerr; 2646: } 2647: stringbuffer.writeByte(c); 2648: p++; 2649: continue; 2650: } 2651: break; 2652: } 2653: continue; 2654: 2655: default: 2656: if (*p & 0x80) 2657: { unsigned u = decodeUTF(); 2658: if (u == PS || u == LS) 2659: goto Lnewline; 2660: } 2661: goto Lerr; 2662: } 2663: } 2664: 2665: Lerr: 2666: error(loc, "#line integer [\"filespec\"]\\n expected"); 2667: } 2668: 2669: 2670: /******************************************** 2671: * Decode UTF character. 2672: * Issue error messages for invalid sequences. 2673: * Return decoded character, advance p to last character in UTF sequence. 2674: */ 2675: 2676: unsigned Lexer::decodeUTF() 2677: { 2678: dchar_t u; 2679: unsigned char c; 2680: unsigned char *s = p; 2681: size_t len; 2682: size_t idx; 2683: const char *msg; 2684: 2685: c = *s; 2686: assert(c & 0x80); 2687: 2688: // Check length of remaining string up to 6 UTF-8 characters 2689: for (len = 1; len < 6 && s[len]; len++) 2690: ; 2691: 2692: idx = 0; 2693: msg = utf_decodeChar(s, len, &idx, &u); 2694: p += idx - 1; 2695: if (msg) 2696: { 2697: error("%s", msg); 2698: } 2699: return u; 2700: } 2701: 2702: 2703: /*************************************************** 2704: * Parse doc comment embedded between t->ptr and p. 2705: * Remove trailing blanks and tabs from lines. 2706: * Replace all newlines with \n. 2707: * Remove leading comment character from each line. 2708: * Decide if it's a lineComment or a blockComment. 2709: * Append to previous one for this token. 2710: */ 2711: 2712: void Lexer::getDocComment(Token *t, unsigned lineComment) 2713: { 2714: /* ct tells us which kind of comment it is: '/', '*', or '+' 2715: */ 2716: unsigned char ct = t->ptr[2]; 2717: 2718: /* Start of comment text skips over / * *, / + +, or / / / 2719: */ 2720: unsigned char *q = t->ptr + 3; // start of comment text 2721: 2722: unsigned char *qend = p; 2723: if (ct == '*' || ct == '+') 2724: qend -= 2; 2725: 2726: /* Scan over initial row of ****'s or ++++'s or ////'s 2727: */ 2728: for (; q < qend; q++) 2729: { 2730: if (*q != ct) 2731: break; 2732: } 2733: 2734: /* Remove trailing row of ****'s or ++++'s 2735: */ 2736: if (ct != '/') 2737: { 2738: for (; q < qend; qend--) 2739: { 2740: if (qend[-1] != ct) 2741: break; 2742: } 2743: } 2744: 2745: /* Comment is now [q .. qend]. 2746: * Canonicalize it into buf[]. 2747: */ 2748: OutBuffer buf; 2749: int linestart = 0; 2750: 2751: for (; q < qend; q++) 2752: { 2753: unsigned char c = *q; 2754: 2755: switch (c) 2756: { 2757: case '*': 2758: case '+': 2759: if (linestart && c == ct) 2760: { linestart = 0; 2761: /* Trim preceding whitespace up to preceding \n 2762: */ 2763: while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) 2764: buf.offset--; 2765: continue; 2766: } 2767: break; 2768: 2769: case ' ': 2770: case '\t': 2771: break; 2772: 2773: case '\r': 2774: if (q[1] == '\n') 2775: continue; // skip the \r 2776: goto Lnewline; 2777: 2778: default: 2779: if (c == 226) 2780: { 2781: // If LS or PS 2782: if (q[1] == 128 && 2783: (q[2] == 168 || q[2] == 169)) 2784: { 2785: q += 2; 2786: goto Lnewline; 2787: } 2788: } 2789: linestart = 0; 2790: break; 2791: 2792: Lnewline: 2793: c = '\n'; // replace all newlines with \n 2794: case '\n': 2795: linestart = 1; 2796: 2797: /* Trim trailing whitespace 2798: */ 2799: while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) 2800: buf.offset--; 2801: 2802: break; 2803: } 2804: buf.writeByte(c); 2805: } 2806: 2807: // Always end with a newline 2808: if (!buf.offset || buf.data[buf.offset - 1] != '\n') 2809: buf.writeByte('\n'); 2810: 2811: buf.writeByte(0); 2812: 2813: // It's a line comment if the start of the doc comment comes 2814: // after other non-whitespace on the same line. 2815: unsigned char** dc = (lineComment && anyToken) 2816: ? &t->lineComment 2817: : &t->blockComment; 2818: 2819: // Combine with previous doc comment, if any 2820: if (*dc) 2821: *dc = combineComments(*dc, (unsigned char *)buf.data); 2822: else 2823: *dc = (unsigned char *)buf.extractData(); 2824: } 2825: 2826: /******************************************** 2827: * Combine two document comments into one, 2828: * separated by a newline. 2829: */ 2830: 2831: unsigned char *Lexer::combineComments(unsigned char *c1, unsigned char *c2) 2832: { 2833: //printf("Lexer::combineComments('%s', '%s')\n", c1, c2); 2834: 2835: unsigned char *c = c2; 2836: 2837: if (c1) 2838: { c = c1; 2839: if (c2) 2840: { size_t len1 = strlen((char *)c1); 2841: size_t len2 = strlen((char *)c2); 2842: 2843: c = (unsigned char *)mem.malloc(len1 + 1 + len2 + 1); 2844: memcpy(c, c1, len1); 2845: if (len1 && c1[len1 - 1] != '\n') 2846: { c[len1] = '\n'; 2847: len1++; 2848: } 2849: memcpy(c + len1, c2, len2); 2850: c[len1 + len2] = 0; 2851: } 2852: } 2853: return c; 2854: } 2855: 2856: /******************************************** 2857: * Create an identifier in the string table. 2858: */ 2859: 2860: Identifier *Lexer::idPool(const char *s) 2861: { 2862: size_t len = strlen(s); 2863: StringValue *sv = stringtable.update(s, len); 2864: Identifier *id = (Identifier *) sv->ptrvalue; 2865: if (!id) 2866: { 2867: id = new Identifier(sv->lstring.string, TOKidentifier); 2868: sv->ptrvalue = id; 2869: } 2870: return id; 2871: } 2872: 2873: /********************************************* 2874: * Create a unique identifier using the prefix s. 2875: */ 2876: 2877: Identifier *Lexer::uniqueId(const char *s, int num) 2878: { char buffer[32]; 2879: size_t slen = strlen(s); 2880: 2881: assert(slen + sizeof(num) * 3 + 1 <= sizeof(buffer)); 2882: sprintf(buffer, "%s%d", s, num);
warning C4996: 'sprintf': This function or variable may be unsafe. Consider using sprintf_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include\stdio.h(371) : see declaration of 'sprintf'
2883: return idPool(buffer); 2884: } 2885: 2886: Identifier *Lexer::uniqueId(const char *s) 2887: { 2888: static int num; 2889: return uniqueId(s, ++num); 2890: } 2891: 2892: /**************************************** 2893: */ 2894: 2895: struct Keyword 2896: { const char *name; 2897: enum TOK value; 2898: }; 2899: 2900: static Keyword keywords[] = 2901: { 2902: // { "", TOK }, 2903: 2904: { "this", TOKthis }, 2905: { "super", TOKsuper }, 2906: { "assert", TOKassert }, 2907: { "null", TOKnull }, 2908: { "true", TOKtrue }, 2909: { "false", TOKfalse }, 2910: { "cast", TOKcast }, 2911: { "new", TOKnew }, 2912: { "delete", TOKdelete }, 2913: { "throw", TOKthrow }, 2914: { "module", TOKmodule }, 2915: { "pragma", TOKpragma }, 2916: { "typeof", TOKtypeof }, 2917: { "typeid", TOKtypeid }, 2918: 2919: { "template", TOKtemplate }, 2920: 2921: { "void", TOKvoid }, 2922: { "byte", TOKint8 }, 2923: { "ubyte", TOKuns8 }, 2924: { "short", TOKint16 }, 2925: { "ushort", TOKuns16 }, 2926: { "int", TOKint32 }, 2927: { "uint", TOKuns32 }, 2928: { "long", TOKint64 }, 2929: { "ulong", TOKuns64 }, 2930: { "cent", TOKcent, }, 2931: { "ucent", TOKucent, }, 2932: { "float", TOKfloat32 }, 2933: { "double", TOKfloat64 }, 2934: { "real", TOKfloat80 }, 2935: 2936: { "bool", TOKbool }, 2937: { "char", TOKchar }, 2938: { "wchar", TOKwchar }, 2939: { "dchar", TOKdchar }, 2940: 2941: { "ifloat", TOKimaginary32 }, 2942: { "idouble", TOKimaginary64 }, 2943: { "ireal", TOKimaginary80 }, 2944: 2945: { "cfloat", TOKcomplex32 }, 2946: { "cdouble", TOKcomplex64 }, 2947: { "creal", TOKcomplex80 }, 2948: 2949: { "delegate", TOKdelegate }, 2950: { "function", TOKfunction }, 2951: 2952: { "is", TOKis }, 2953: { "if", TOKif }, 2954: { "else", TOKelse }, 2955: { "while", TOKwhile }, 2956: { "for", TOKfor }, 2957: { "do", TOKdo }, 2958: { "switch", TOKswitch }, 2959: { "case", TOKcase }, 2960: { "default", TOKdefault }, 2961: { "break", TOKbreak }, 2962: { "continue", TOKcontinue }, 2963: { "synchronized", TOKsynchronized }, 2964: { "return", TOKreturn }, 2965: { "goto", TOKgoto }, 2966: { "try", TOKtry }, 2967: { "catch", TOKcatch }, 2968: { "finally", TOKfinally }, 2969: { "with", TOKwith }, 2970: { "asm", TOKasm }, 2971: { "foreach", TOKforeach }, 2972: { "foreach_reverse", TOKforeach_reverse }, 2973: { "scope", TOKscope }, 2974: 2975: { "struct", TOKstruct }, 2976: { "class", TOKclass }, 2977: { "interface", TOKinterface }, 2978: { "union", TOKunion }, 2979: { "enum", TOKenum }, 2980: { "import", TOKimport }, 2981: { "mixin", TOKmixin }, 2982: { "static", TOKstatic }, 2983: { "final", TOKfinal }, 2984: { "const", TOKconst }, 2985: { "typedef", TOKtypedef }, 2986: { "alias", TOKalias }, 2987: { "override", TOKoverride }, 2988: { "abstract", TOKabstract }, 2989: { "volatile", TOKvolatile }, 2990: { "debug", TOKdebug }, 2991: { "deprecated", TOKdeprecated }, 2992: { "in", TOKin }, 2993: { "out", TOKout }, 2994: { "inout", TOKinout }, 2995: { "lazy", TOKlazy }, 2996: { "auto", TOKauto }, 2997: 2998: { "align", TOKalign }, 2999: { "extern", TOKextern }, 3000: { "private", TOKprivate }, 3001: { "package", TOKpackage }, 3002: { "protected", TOKprotected }, 3003: { "public", TOKpublic }, 3004: { "export", TOKexport }, 3005: 3006: { "body", TOKbody }, 3007: { "invariant", TOKinvariant }, 3008: { "unittest", TOKunittest }, 3009: { "version", TOKversion }, 3010: //{ "manifest", TOKmanifest }, 3011: 3012: // Added after 1.0 3013: { "__argTypes", TOKargTypes }, 3014: { "ref", TOKref }, 3015: { "macro", TOKmacro }, 3016: #if DMDV2 3017: { "pure", TOKpure }, 3018: { "nothrow", TOKnothrow }, 3019: { "__thread", TOKtls }, 3020: { "__gshared", TOKgshared }, 3021: { "__traits", TOKtraits }, 3022: { "__overloadset", TOKoverloadset }, 3023: { "__FILE__", TOKfile }, 3024: { "__LINE__", TOKline }, 3025: { "shared", TOKshared }, 3026: { "immutable", TOKimmutable }, 3027: #endif 3028: }; 3029: 3030: int Token::isKeyword() 3031: { 3032: for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++) 3033: { 3034: if (keywords[u].value == value) 3035: return 1; 3036: } 3037: return 0; 3038: } 3039: 3040: void Lexer::initKeywords() 3041: { StringValue *sv; 3042: unsigned u; 3043: enum TOK v; 3044: unsigned nkeywords = sizeof(keywords) / sizeof(keywords[0]); 3045: 3046: if (global.params.Dversion == 1) 3047: nkeywords -= 2; 3048: 3049: cmtable_init(); 3050: 3051: for (u = 0; u < nkeywords; u++) 3052: { const char *s; 3053: 3054: //printf("keyword[%d] = '%s'\n",u, keywords[u].name); 3055: s = keywords[u].name; 3056: v = keywords[u].value; 3057: sv = stringtable.insert(s, strlen(s)); 3058: sv->ptrvalue = (void *) new Identifier(sv->lstring.string,v); 3059: 3060: //printf("tochars[%d] = '%s'\n",v, s); 3061: Token::tochars[v] = s; 3062: } 3063: 3064: Token::tochars[TOKeof] = "EOF"; 3065: Token::tochars[TOKlcurly] = "{"; 3066: Token::tochars[TOKrcurly] = "}"; 3067: Token::tochars[TOKlparen] = "("; 3068: Token::tochars[TOKrparen] = ")"; 3069: Token::tochars[TOKlbracket] = "["; 3070: Token::tochars[TOKrbracket] = "]"; 3071: Token::tochars[TOKsemicolon] = ";"; 3072: Token::tochars[TOKcolon] = ":"; 3073: Token::tochars[TOKcomma] = ","; 3074: Token::tochars[TOKdot] = "."; 3075: Token::tochars[TOKxor] = "^"; 3076: Token::tochars[TOKxorass] = "^="; 3077: Token::tochars[TOKassign] = "="; 3078: Token::tochars[TOKconstruct] = "="; 3079: #if DMDV2 3080: Token::tochars[TOKblit] = "="; 3081: #endif 3082: Token::tochars[TOKlt] = "<"; 3083: Token::tochars[TOKgt] = ">"; 3084: Token::tochars[TOKle] = "<="; 3085: Token::tochars[TOKge] = ">="; 3086: Token::tochars[TOKequal] = "=="; 3087: Token::tochars[TOKnotequal] = "!="; 3088: Token::tochars[TOKnotidentity] = "!is"; 3089: Token::tochars[TOKtobool] = "!!"; 3090: 3091: Token::tochars[TOKunord] = "!<>="; 3092: Token::tochars[TOKue] = "!<>"; 3093: Token::tochars[TOKlg] = "<>"; 3094: Token::tochars[TOKleg] = "<>="; 3095: Token::tochars[TOKule] = "!>"; 3096: Token::tochars[TOKul] = "!>="; 3097: Token::tochars[TOKuge] = "!<"; 3098: Token::tochars[TOKug] = "!<="; 3099: 3100: Token::tochars[TOKnot] = "!"; 3101: Token::tochars[TOKtobool] = "!!"; 3102: Token::tochars[TOKshl] = "<<"; 3103: Token::tochars[TOKshr] = ">>"; 3104: Token::tochars[TOKushr] = ">>>"; 3105: Token::tochars[TOKadd] = "+"; 3106: Token::tochars[TOKmin] = "-"; 3107: Token::tochars[TOKmul] = "*"; 3108: Token::tochars[TOKdiv] = "/"; 3109: Token::tochars[TOKmod] = "%"; 3110: Token::tochars[TOKslice] = ".."; 3111: Token::tochars[TOKdotdotdot] = "..."; 3112: Token::tochars[TOKand] = "&"; 3113: Token::tochars[TOKandand] = "&&"; 3114: Token::tochars[TOKor] = "|"; 3115: Token::tochars[TOKoror] = "||"; 3116: Token::tochars[TOKarray] = "[]"; 3117: Token::tochars[TOKindex] = "[i]"; 3118: Token::tochars[TOKaddress] = "&"; 3119: Token::tochars[TOKstar] = "*"; 3120: Token::tochars[TOKtilde] = "~"; 3121: Token::tochars[TOKdollar] = "$"; 3122: Token::tochars[TOKcast] = "cast"; 3123: Token::tochars[TOKplusplus] = "++"; 3124: Token::tochars[TOKminusminus] = "--"; 3125: Token::tochars[TOKpreplusplus] = "++"; 3126: Token::tochars[TOKpreminusminus] = "--"; 3127: Token::tochars[TOKtype] = "type"; 3128: Token::tochars[TOKquestion] = "?"; 3129: Token::tochars[TOKneg] = "-"; 3130: Token::tochars[TOKuadd] = "+"; 3131: Token::tochars[TOKvar] = "var"; 3132: Token::tochars[TOKaddass] = "+="; 3133: Token::tochars[TOKminass] = "-="; 3134: Token::tochars[TOKmulass] = "*="; 3135: Token::tochars[TOKdivass] = "/="; 3136: Token::tochars[TOKmodass] = "%="; 3137: Token::tochars[TOKshlass] = "<<="; 3138: Token::tochars[TOKshrass] = ">>="; 3139: Token::tochars[TOKushrass] = ">>>="; 3140: Token::tochars[TOKandass] = "&="; 3141: Token::tochars[TOKorass] = "|="; 3142: Token::tochars[TOKcatass] = "~="; 3143: Token::tochars[TOKcat] = "~"; 3144: Token::tochars[TOKcall] = "call"; 3145: Token::tochars[TOKidentity] = "is"; 3146: Token::tochars[TOKnotidentity] = "!is"; 3147: 3148: Token::tochars[TOKorass] = "|="; 3149: Token::tochars[TOKidentifier] = "identifier"; 3150: #if DMDV2 3151: Token::tochars[TOKat] = "@"; 3152: Token::tochars[TOKpow] = "^^"; 3153: Token::tochars[TOKpowass] = "^^="; 3154: #endif 3155: 3156: // For debugging 3157: Token::tochars[TOKerror] = "error"; 3158: Token::tochars[TOKdotexp] = "dotexp"; 3159: Token::tochars[TOKdotti] = "dotti"; 3160: Token::tochars[TOKdotvar] = "dotvar"; 3161: Token::tochars[TOKdottype] = "dottype"; 3162: Token::tochars[TOKsymoff] = "symoff"; 3163: Token::tochars[TOKarraylength] = "arraylength"; 3164: Token::tochars[TOKarrayliteral] = "arrayliteral"; 3165: Token::tochars[TOKassocarrayliteral] = "assocarrayliteral"; 3166: Token::tochars[TOKstructliteral] = "structliteral"; 3167: Token::tochars[TOKstring] = "string"; 3168: Token::tochars[TOKdsymbol] = "symbol"; 3169: Token::tochars[TOKtuple] = "tuple"; 3170: Token::tochars[TOKdeclaration] = "declaration"; 3171: Token::tochars[TOKdottd] = "dottd"; 3172: Token::tochars[TOKon_scope_exit] = "scope(exit)"; 3173: Token::tochars[TOKon_scope_success] = "scope(success)"; 3174: Token::tochars[TOKon_scope_failure] = "scope(failure)"; 3175: 3176: #if UNITTEST 3177: unittest_lexer(); 3178: #endif 3179: } 3180: 3181: #if UNITTEST 3182: 3183: void unittest_lexer() 3184: { 3185: //printf("unittest_lexer()\n"); 3186: 3187: /* Not much here, just trying things out. 3188: */ 3189: const unsigned char text[] = "int"; 3190: Lexer lex1(NULL, (unsigned char *)text, 0, sizeof(text), 0, 0); 3191: TOK tok; 3192: tok = lex1.nextToken(); 3193: //printf("tok == %s, %d, %d\n", Token::toChars(tok), tok, TOKint32); 3194: assert(tok == TOKint32); 3195: tok = lex1.nextToken(); 3196: assert(tok == TOKeof); 3197: tok = lex1.nextToken(); 3198: assert(tok == TOKeof); 3199: } 3200: 3201: #endif 3202: 3203: