1: // Copyright (C) 1987-1995 by Symantec
   2: // Copyright (C) 2000-2011 by Digital Mars
   3: // All Rights Reserved
   4: // http://www.digitalmars.com
   5: // Written by Walter Bright
   6: /*
   7:  * This source file is made available for personal use
   8:  * only. The license is in /dmd/src/dmd/backendlicense.txt
   9:  * or /dm/src/dmd/backendlicense.txt
  10:  * For any other uses, please contact Digital Mars.
  11:  */
  12: 
  13: #if !SPP
  14: 
  15: #include        <stdio.h>
  16: #include        <string.h>
  17: #include        <time.h>
  18: #include        <math.h>
  19: #include        "cc.h"
  20: #include        "el.h"
  21: #include        "oper.h"
  22: #include        "code.h"
  23: #include        "global.h"
  24: 
  25: static char __file__[] = __FILE__;      /* for tassert.h                */
  26: #include        "tassert.h"
  27: 
  28: // Constants that the 8087 supports directly
  29: // BUG: rewrite for 80 bit long doubles
  30: #define PI              3.14159265358979323846
  31: #define LOG2            0.30102999566398119521
  32: #define LN2             0.6931471805599453094172321
  33: #define LOG2T           3.32192809488736234787
  34: #define LOG2E           1.4426950408889634074   /* 1/LN2                */
  35: 
  36: #define FWAIT   0x9B            /* FWAIT opcode                         */
  37: 
  38: /* Mark variable referenced by e as not a register candidate            */
  39: #define notreg(e)       ((e)->EV.sp.Vsym->Sflags &= ~GTregcand)
  40: 
  41: /* Generate the appropriate ESC instruction     */
  42: #define ESC(MF,b)       (0xD8 + ((MF) << 1) + (b))
  43: enum MF
  44: {       // Values for MF
  45:         MFfloat         = 0,
  46:         MFlong          = 1,
  47:         MFdouble        = 2,
  48:         MFword          = 3
  49: };
  50: 
  51: NDP _8087elems[8];              // 8087 stack
  52: NDP ndp_zero;
  53: 
  54: int stackused = 0;              /* number of items on the 8087 stack    */
  55: 
  56: /*********************************
  57:  */
  58: 
  59: struct Dconst
  60: {
  61:     int round;
  62:     symbol *roundto0;
  63:     symbol *roundtonearest;
  64: };
  65: 
  66: static Dconst oldd;
  67: 
  68: #define NDPP    0       // print out debugging info
  69: #define NOSAHF  I64     // can't use SAHF instruction
  70: 
  71: code *loadComplex(elem *e);
  72: code *opmod_complex87(elem *e,regm_t *pretregs);
  73: code *opass_complex87(elem *e,regm_t *pretregs);
  74: code * genf2(code *c,unsigned op,unsigned rm);
  75: 
  76: #define CW_roundto0             0xFBF
  77: #define CW_roundtonearest       0x3BF
  78: 
  79: STATIC code *genrnd(code *c, short cw);
  80: 
  81: /**********************************
  82:  * When we need to temporarilly save 8087 registers, we record information
  83:  * about the save into an array of NDP structs:
  84:  */
  85: 
  86: NDP *NDP::save = NULL;
  87: int NDP::savemax = 0;           /* # of entries in NDP::save[]          */
  88: int NDP::savetop = 0;           /* # of entries used in NDP::save[]     */
  89: 
  90: #ifdef DEBUG
  91: #define NDPSAVEINC 2            /* flush reallocation bugs              */
  92: #else
  93: #define NDPSAVEINC 8            /* allocation chunk sizes               */
  94: #endif
  95: 
  96: /****************************************
  97:  * Store/load to ndp save location i
  98:  */
  99: 
 100: code *ndp_fstp(code *c, int i, tym_t ty)
 101: {   unsigned grex = I64 ? (REX_W << 16) : 0;
 102:     switch (tybasic(ty))
 103:     {
 104:         case TYfloat:
 105:         case TYifloat:
 106:         case TYcfloat:
 107:             c = genc1(c,0xD9,grex | modregrm(2,3,BPRM),FLndp,i); // FSTP m32real i[BP]
 108:             break;
 109: 
 110:         case TYdouble:
 111:         case TYdouble_alias:
 112:         case TYidouble:
 113:         case TYcdouble:
 114:             c = genc1(c,0xDD,grex | modregrm(2,3,BPRM),FLndp,i); // FSTP m64real i[BP]
 115:             break;
 116: 
 117:         case TYldouble:
 118:         case TYildouble:
 119:         case TYcldouble:
 120:             c = genc1(c,0xDB,grex | modregrm(2,7,BPRM),FLndp,i); // FSTP m80real i[BP]
 121:             break;
 122: 
 123:         default:
 124:             assert(0);
 125:     }
 126:     return c;
 127: }
 128: 
 129: code *ndp_fld(code *c, int i, tym_t ty)
 130: {   unsigned grex = I64 ? (REX_W << 16) : 0;
 131:     switch (tybasic(ty))
 132:     {
 133:         case TYfloat:
 134:         case TYifloat:
 135:         case TYcfloat:
 136:             c = genc1(c,0xD9,grex | modregrm(2,0,BPRM),FLndp,i);
 137:             break;
 138: 
 139:         case TYdouble:
 140:         case TYdouble_alias:
 141:         case TYidouble:
 142:         case TYcdouble:
 143:             c = genc1(c,0xDD,grex | modregrm(2,0,BPRM),FLndp,i);
 144:             break;
 145: 
 146:         case TYldouble:
 147:         case TYildouble:
 148:         case TYcldouble:
 149:             c = genc1(c,0xDB,grex | modregrm(2,5,BPRM),FLndp,i); // FLD m80real i[BP]
 150:             break;
 151: 
 152:         default:
 153:             assert(0);
 154:     }
 155:     return c;
 156: }
 157: 
 158: /**************************
 159:  * Return index of empty slot in NDP::save[].
 160:  */
 161: 
 162: STATIC int getemptyslot()
 163: {       int i;
 164: 
 165:         for (i = 0; i < NDP::savemax; i++)
 166:                 if (NDP::save[i].e == NULL)
 167:                         goto L1;
 168:         /* Out of room, reallocate NDP::save[]  */
 169:         NDP::save = (NDP *)mem_realloc(NDP::save,
 170:                 (NDP::savemax + NDPSAVEINC) * sizeof(*NDP::save));
 171:         /* clear out new portion of NDP::save[] */
 172:         memset(NDP::save + NDP::savemax,0,NDPSAVEINC * sizeof(*NDP::save));
 173:         i = NDP::savemax;
 174:         NDP::savemax += NDPSAVEINC;
 175: 
 176:     L1: if (i >= NDP::savetop)
 177:                 NDP::savetop = i + 1;
 178:         return i;
 179: }
 180: 
 181: /*********************************
 182:  * Pop 8087 stack.
 183:  */
 184: 
 185: #undef pop87
 186: 
 187: void pop87(
 188: #ifdef DEBUG
 189:         int line, const char *file
 190: #endif
 191:         )
 192: #ifdef DEBUG
 193: #define pop87() pop87(__LINE__,__FILE__)
 194: #endif
 195: {
 196:         int i;
 197: 
 198: #if NDPP
 199:         dbg_printf("pop87(%s(%d): stackused=%d)\n", file, line, stackused);
 200: #endif
 201:         --stackused;
 202:         assert(stackused >= 0);
 203:         for (i = 0; i < arraysize(_8087elems) - 1; i++)
 204:                 _8087elems[i] = _8087elems[i + 1];
 205:         /* end of stack is nothing      */
 206:         _8087elems[arraysize(_8087elems) - 1] = ndp_zero;
 207: }
 208: 
 209: /*******************************
 210:  * Push 8087 stack. Generate and return any code
 211:  * necessary to preserve anything that might run off the end of the stack.
 212:  */
 213: 
 214: #undef push87
 215: 
 216: #ifdef DEBUG
 217: code *push87(int line, const char *file);
 218: code *push87() { return push87(__LINE__,__FILE__); }
 219: #endif
 220: 
 221: code *push87(
 222: #ifdef DEBUG
 223:         int line, const char *file
 224: #endif
 225:         )
 226: #ifdef DEBUG
 227: #define push87() push87(__LINE__,__FILE__)
 228: #endif
 229: {
 230:         code *c;
 231:         int i;
 232: 
 233:         c = CNIL;
 234:         // if we would lose the top register off of the stack
 235:         if (_8087elems[7].e != NULL)
 236:         {
 237:                 i = getemptyslot();
 238:                 NDP::save[i] = _8087elems[7];
 239:                 c = genf2(c,0xD9,0xF6);         // FDECSTP
 240:                 c = genfwait(c);
 241:                 c = ndp_fstp(c, i, _8087elems[7].e->Ety);       // FSTP i[BP]
 242:                 assert(stackused == 8);
 243:                 if (NDPP) dbg_printf("push87() : overflow\n");
 244:         }
 245:         else
 246:         {
 247: #ifdef DEBUG
 248:                 if (NDPP) dbg_printf("push87(%s(%d): %d)\n", file, line, stackused);
 249: #endif
 250:                 stackused++;
 251:                 assert(stackused <= 8);
 252:         }
 253:         // Shift the stack up
 254:         for (i = 7; i > 0; i--)
 255:                 _8087elems[i] = _8087elems[i - 1];
 256:         _8087elems[0] = ndp_zero;
 257:         return c;
 258: }
 259: 
 260: /*****************************
 261:  * Note elem e as being in ST(i) as being a value we want to keep.
 262:  */
 263: 
 264: #ifdef DEBUG
 265: void note87(elem *e, unsigned offset, int i, int linnum);
 266: void note87(elem *e, unsigned offset, int i)
 267: {
 268:     return note87(e, offset, i, 0);
 269: }
 270: void note87(elem *e, unsigned offset, int i, int linnum)
 271: #define note87(e,offset,i) note87(e,offset,i,__LINE__)
 272: #else
 273: void note87(elem *e, unsigned offset, int i)
 274: #endif
 275: {
 276: #if NDPP
 277:         printf("note87(e = %p.%d, i = %d, stackused = %d, line = %d)\n",e,offset,i,stackused,linnum);
 278: #endif
 279: #if 0 && DEBUG
 280:         if (_8087elems[i].e)
 281:                 printf("_8087elems[%d].e = %p\n",i,_8087elems[i].e);
 282: #endif
 283:         //if (i >= stackused) *(char*)0=0;
 284:         assert(i < stackused);
 285:         _8087elems[i].e = e;
warning C6386: Buffer overrun: accessing '_8087elems', the writable size is '64' bytes, but '8388488' bytes might be written: Lines: 284, 285
286: _8087elems[i].offset = offset; 287: } 288: 289: /**************************************************** 290: * Exchange two entries in 8087 stack. 291: */ 292: 293: void xchg87(int i, int j) 294: { 295: NDP save; 296: 297: save = _8087elems[i]; 298: _8087elems[i] = _8087elems[j]; 299: _8087elems[j] = save; 300: } 301: 302: /**************************** 303: * Make sure that elem e is in register ST(i). Reload it if necessary. 304: * Input: 305: * i 0..3 8087 register number 306: * flag 1 don't bother with FXCH 307: */ 308: 309: #ifdef DEBUG 310: STATIC code * makesure87(elem *e,unsigned offset,int i,unsigned flag,int linnum) 311: #define makesure87(e,offset,i,flag) makesure87(e,offset,i,flag,__LINE__) 312: #else 313: STATIC code * makesure87(elem *e,unsigned offset,int i,unsigned flag) 314: #endif 315: { 316: code *c; 317: int j; 318: 319: #ifdef DEBUG 320: if (NDPP) printf("makesure87(e=%p, offset=%d, i=%d, flag=%d, line=%d)\n",e,offset,i,flag,linnum); 321: #endif 322: assert(e && i < 4); 323: c = CNIL; 324: L1: 325: if (_8087elems[i].e != e || _8087elems[i].offset != offset) 326: { 327: #ifdef DEBUG 328: if (_8087elems[i].e) 329: printf("_8087elems[%d].e = %p, .offset = %d\n",i,_8087elems[i].e,_8087elems[i].offset); 330: #endif 331: assert(_8087elems[i].e == NULL); 332: for (j = 0; 1; j++) 333: { 334: if (j >= NDP::savetop && e->Eoper == OPcomma) 335: { 336: e = e->E2; // try right side 337: goto L1; 338: } 339: #ifdef DEBUG 340: if (j >= NDP::savetop) 341: printf("e = %p, NDP::savetop = %d\n",e,NDP::savetop); 342: #endif 343: assert(j < NDP::savetop); 344: //printf("\tNDP::save[%d] = %p, .offset = %d\n", j, NDP::save[j].e, NDP::save[j].offset); 345: if (e == NDP::save[j].e && offset == NDP::save[j].offset) 346: break; 347: } 348: c = push87(); 349: c = genfwait(c); 350: c = ndp_fld(c, j, e->Ety); // FLD j[BP] 351: if (!(flag & 1)) 352: { 353: while (i != 0) 354: { 355: genf2(c,0xD9,0xC8 + i); // FXCH ST(i) 356: i--; 357: } 358: } 359: NDP::save[j] = ndp_zero; // back in 8087 360: } 361: //_8087elems[i].e = NULL; 362: return c; 363: } 364: 365: /**************************** 366: * Save in memory any values in the 8087 that we want to keep. 367: */ 368: 369: code *save87() 370: { 371: code *c; 372: int i; 373: 374: c = CNIL; 375: while (_8087elems[0].e && stackused) 376: { 377: /* Save it */ 378: i = getemptyslot(); 379: if (NDPP) printf("saving %p in temporary NDP::save[%d]\n",_8087elems[0].e,i); 380: NDP::save[i] = _8087elems[0]; 381: 382: c = genfwait(c); 383: c = ndp_fstp(c,i,_8087elems[0].e->Ety); // FSTP i[BP] 384: pop87(); 385: } 386: if (c) /* if any stores */ 387: genfwait(c); /* wait for last one to finish */ 388: return c; 389: } 390: 391: /****************************************** 392: * Save any noted values that would be destroyed by n pushes 393: */ 394: 395: code *save87regs(unsigned n) 396: { 397: unsigned j; 398: unsigned k; 399: code *c = NULL; 400: 401: assert(n <= 7); 402: j = 8 - n; 403: if (stackused > j)
warning C4018: '>' : signed/unsigned mismatch
404: { 405: for (k = 8; k > j; k--) 406: { 407: c = genf2(c,0xD9,0xF6); // FDECSTP 408: c = genfwait(c); 409: if (k <= stackused)
warning C4018: '<=' : signed/unsigned mismatch
410: { int i; 411: 412: i = getemptyslot(); 413: c = ndp_fstp(c, i, _8087elems[k - 1].e->Ety); // FSTP i[BP] 414: NDP::save[i] = _8087elems[k - 1]; 415: _8087elems[k - 1] = ndp_zero; 416: } 417: } 418: 419: for (k = 8; k > j; k--) 420: { 421: if (k > stackused)
warning C4018: '>' : signed/unsigned mismatch
422: { c = genf2(c,0xD9,0xF7); // FINCSTP 423: c = genfwait(c); 424: } 425: } 426: stackused = j; 427: } 428: return c; 429: } 430: 431: /************************************* 432: * Find which, if any, slot on stack holds elem e. 433: */ 434: 435: STATIC int cse_get(elem *e, unsigned offset) 436: { int i; 437: 438: for (i = 0; 1; i++) 439: { 440: if (i == stackused) 441: { 442: i = -1; 443: //printf("cse not found\n"); 444: //elem_print(e); 445: break; 446: } 447: if (_8087elems[i].e == e && 448: _8087elems[i].offset == offset) 449: { //printf("cse found %d\n",i); 450: //elem_print(e); 451: break; 452: } 453: } 454: return i; 455: } 456: 457: /************************************* 458: * Reload common subexpression. 459: */ 460: 461: code *comsub87(elem *e,regm_t *pretregs) 462: { code *c; 463: 464: //printf("comsub87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 465: // Look on 8087 stack 466: int i = cse_get(e, 0); 467: 468: if (tycomplex(e->Ety)) 469: { 470: unsigned sz = tysize(e->Ety); 471: int j = cse_get(e, sz / 2); 472: if (i >= 0 && j >= 0) 473: { 474: c = push87(); 475: c = cat(c, push87()); 476: c = genf2(c,0xD9,0xC0 + i); // FLD ST(i) 477: c = genf2(c,0xD9,0xC0 + j + 1); // FLD ST(j + 1) 478: c = cat(c,fixresult_complex87(e,mST01,pretregs)); 479: } 480: else 481: // Reload 482: c = loaddata(e,pretregs); 483: } 484: else 485: { 486: if (i >= 0) 487: { 488: c = push87(); 489: c = genf2(c,0xD9,0xC0 + i); // FLD ST(i) 490: if (*pretregs & XMMREGS) 491: c = cat(c,fixresult87(e,mST0,pretregs)); 492: else 493: c = cat(c,fixresult(e,mST0,pretregs)); 494: } 495: else 496: // Reload 497: c = loaddata(e,pretregs); 498: } 499: 500: freenode(e); 501: return c; 502: } 503: 504: 505: /************************** 506: * Generate code to deal with floatreg. 507: */ 508: 509: code * genfltreg(code *c,unsigned opcode,unsigned reg,targ_size_t offset) 510: { 511: floatreg = TRUE; 512: reflocal = TRUE; 513: if ((opcode & ~7) == 0xD8) 514: c = genfwait(c); 515: return genc1(c,opcode,modregxrm(2,reg,BPRM),FLfltreg,offset); 516: } 517: 518: /******************************* 519: * Decide if we need to gen an FWAIT. 520: */ 521: 522: code *genfwait(code *c) 523: { 524: if (ADDFWAIT()) 525: c = gen1(c,FWAIT); 526: return c; 527: } 528: 529: /*************************************** 530: * Generate floating point instruction. 531: */ 532: 533: code * genf2(code *c,unsigned op,unsigned rm) 534: { 535: return gen2(genfwait(c),op,rm); 536: } 537: 538: /*************************** 539: * Put the 8087 flags into the CPU flags. 540: */ 541: 542: STATIC code * cg87_87topsw(code *c) 543: { 544: /* Note that SAHF is not available on some early I64 processors 545: * and will cause a seg fault 546: */ 547: c = cat(c,getregs(mAX)); 548: if (config.target_cpu >= TARGET_80286) 549: c = genf2(c,0xDF,0xE0); // FSTSW AX 550: else 551: { c = genfltreg(c,0xD8+5,7,0); /* FSTSW floatreg[BP] */ 552: genfwait(c); /* FWAIT */ 553: genfltreg(c,0x8A,4,1); /* MOV AH,floatreg+1[BP] */ 554: } 555: gen1(c,0x9E); // SAHF 556: code_orflag(c,CFpsw); 557: return c; 558: } 559: 560: /*************************** 561: * Set the PSW based on the state of ST0. 562: * Input: 563: * pop if stack should be popped after test 564: * Returns: 565: * start of code appended to c. 566: */ 567: 568: STATIC code * genftst(code *c,elem *e,int pop) 569: { 570: if (NOSAHF) 571: { 572: c = cat(c,push87()); 573: c = gen2(c,0xD9,0xEE); // FLDZ 574: gen2(c,0xDF,0xE9); // FUCOMIP ST1 575: pop87(); 576: if (pop) 577: { c = genf2(c,0xDD,modregrm(3,3,0)); // FPOP 578: pop87(); 579: } 580: } 581: else if (config.flags4 & CFG4fastfloat) // if fast floating point 582: { 583: c = genf2(c,0xD9,0xE4); // FTST 584: c = cg87_87topsw(c); // put 8087 flags in CPU flags 585: if (pop) 586: { c = genf2(c,0xDD,modregrm(3,3,0)); // FPOP 587: pop87(); 588: } 589: } 590: else if (config.target_cpu >= TARGET_80386) 591: { 592: // FUCOMP doesn't raise exceptions on QNANs, unlike FTST 593: c = cat(c,push87()); 594: c = gen2(c,0xD9,0xEE); // FLDZ 595: gen2(c,pop ? 0xDA : 0xDD,0xE9); // FUCOMPP / FUCOMP 596: pop87(); 597: if (pop) 598: pop87(); 599: cg87_87topsw(c); // put 8087 flags in CPU flags 600: } 601: else 602: { 603: // Call library function which does not raise exceptions 604: regm_t regm = 0; 605: 606: c = cat(c,callclib(e,CLIBftest,&regm,0)); 607: if (pop) 608: { c = genf2(c,0xDD,modregrm(3,3,0)); // FPOP 609: pop87(); 610: } 611: } 612: return c; 613: } 614: 615: /************************************* 616: * Determine if there is a special 8087 instruction to load 617: * constant e. 618: * Input: 619: * im 0 load real part 620: * 1 load imaginary part 621: * Returns: 622: * opcode if found 623: * 0 if not 624: */ 625: 626: unsigned char loadconst(elem *e, int im) 627: #if __DMC__ 628: __in 629: { 630: elem_debug(e); 631: assert(im == 0 || im == 1); 632: } 633: __body 634: #endif 635: { 636: static float fval[7] = 637: {0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2};
warning C4305: 'initializing' : truncation from 'double' to 'float'
warning C4305: 'initializing' : truncation from 'double' to 'float'
warning C4305: 'initializing' : truncation from 'double' to 'float'
warning C4305: 'initializing' : truncation from 'double' to 'float'
warning C4305: 'initializing' : truncation from 'double' to 'float'
638: static double dval[7] = 639: {0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2}; 640: static long double ldval[7] = 641: #if __APPLE__ || __FreeBSD__ || __OpenBSD__ || __sun&&__SVR4 642: #define M_PIl 0x1.921fb54442d1846ap+1L // 3.14159 fldpi 643: #define M_LOG2T_L 0x1.a934f0979a3715fcp+1L // 3.32193 fldl2t 644: #define M_LOG2El 0x1.71547652b82fe178p+0L // 1.4427 fldl2e 645: #define M_LOG2_L 0x1.34413509f79fef32p-2L // 0.30103 fldlg2 646: #define M_LN2l 0x1.62e42fefa39ef358p-1L // 0.693147 fldln2 647: {0.0,1.0,M_PIl,M_LOG2T_L,M_LOG2El,M_LOG2_L,M_LN2l}; 648: #elif __GNUC__ 649: // BUG: should get proper 80 bit values for these 650: #define M_LOG2T_L LOG2T 651: #define M_LOG2_L LOG2 652: {0.0,1.0,M_PIl,M_LOG2T_L,M_LOG2El,M_LOG2_L,M_LN2l}; 653: #elif _MSC_VER 654: // BUG: should get proper 80 bit values for these 655: #define M_LOG2T_L LOG2T 656: #define M_LOG2_L LOG2 657: {0.0,1.0,PI,M_LOG2T_L,LOG2E,M_LOG2_L,LN2}; 658: #else 659: {0.0,1.0,M_PI_L,M_LOG2T_L,M_LOG2E_L,M_LOG2_L,M_LN2_L}; 660: #endif 661: static char opcode[7 + 1] = 662: /* FLDZ,FLD1,FLDPI,FLDL2T,FLDL2E,FLDLG2,FLDLN2,0 */ 663: {0xEE,0xE8,0xEB,0xE9,0xEA,0xEC,0xED,0};
warning C4309: 'initializing' : truncation of constant value
warning C4309: 'initializing' : truncation of constant value
warning C4309: 'initializing' : truncation of constant value
warning C4309: 'initializing' : truncation of constant value
warning C4309: 'initializing' : truncation of constant value
warning C4309: 'initializing' : truncation of constant value
warning C4309: 'initializing' : truncation of constant value
664: int i; 665: targ_float f; 666: targ_double d; 667: targ_ldouble ld; 668: int sz; 669: int zero; 670: void *p; 671: static char zeros[sizeof(long double)]; 672: 673: if (im == 0) 674: { 675: switch (tybasic(e->Ety)) 676: { 677: case TYfloat: 678: case TYifloat: 679: case TYcfloat: 680: f = e->EV.Vfloat; 681: sz = 4; 682: p = &f; 683: break; 684: 685: case TYdouble: 686: case TYdouble_alias: 687: case TYidouble: 688: case TYcdouble: 689: d = e->EV.Vdouble; 690: sz = 8; 691: p = &d; 692: break; 693: 694: case TYldouble: 695: case TYildouble: 696: case TYcldouble: 697: ld = e->EV.Vldouble; 698: sz = 10; 699: p = &ld; 700: break; 701: 702: default: 703: assert(0); 704: } 705: } 706: else 707: { 708: switch (tybasic(e->Ety)) 709: { 710: case TYcfloat: 711: f = e->EV.Vcfloat.im; 712: sz = 4; 713: p = &f; 714: break; 715: 716: case TYcdouble: 717: d = e->EV.Vcdouble.im; 718: sz = 8; 719: p = &d; 720: break; 721: 722: case TYcldouble: 723: ld = e->EV.Vcldouble.im; 724: sz = 10; 725: p = &ld; 726: break; 727: 728: default: 729: assert(0); 730: } 731: } 732: 733: // Note that for this purpose, -0 is not regarded as +0, 734: // since FLDZ loads a +0 735: zero = (memcmp(p, zeros, sz) == 0);
warning C6385: Invalid data: accessing 'argument 2', the readable size is '8' bytes, but '10' bytes might be read: Lines: 636, 638, 640, 661, 664, 665, 666, 667, 668, 669, 670, 671, 673, 708, 722, 723, 724, 725, 735
736: if (zero && config.target_cpu >= TARGET_PentiumPro) 737: return 0xEE; // FLDZ is the only one with 1 micro-op 738: 739: // For some reason, these instructions take more clocks 740: if (config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium) 741: return 0; 742: 743: if (zero) 744: return 0xEE; 745: 746: for (i = 1; i < arraysize(fval); i++) 747: { 748: switch (sz) 749: { 750: case 4: 751: if (fval[i] != f) 752: continue; 753: break; 754: case 8: 755: if (dval[i] != d) 756: continue; 757: break; 758: case 10: 759: if (ldval[i] != ld) 760: continue; 761: break; 762: default: 763: assert(0); 764: } 765: break; 766: } 767: return opcode[i]; 768: } 769: 770: /****************************** 771: * Given the result of an expression is in retregs, 772: * generate necessary code to return result in *pretregs. 773: */ 774: 775: 776: code *fixresult87(elem *e,regm_t retregs,regm_t *pretregs) 777: { 778: regm_t regm; 779: tym_t tym; 780: code *c1,*c2; 781: unsigned sz; 782: 783: //printf("fixresult87(e = %p, retregs = x%x, *pretregs = x%x)\n", e,retregs,*pretregs); 784: //printf("fixresult87(e = %p, retregs = %s, *pretregs = %s)\n", e,regm_str(retregs),regm_str(*pretregs)); 785: assert(!*pretregs || retregs); 786: c1 = CNIL; 787: c2 = CNIL; 788: tym = tybasic(e->Ety); 789: sz = tysize[tym]; 790: //printf("tym = x%x, sz = %d\n", tym, sz); 791: 792: if (*pretregs & mST01) 793: return fixresult_complex87(e, retregs, pretregs); 794: 795: /* if retregs needs to be transferred into the 8087 */ 796: if (*pretregs & mST0 && retregs & (mBP | ALLREGS)) 797: { 798: assert(sz <= DOUBLESIZE); 799: if (!I16) 800: { 801: 802: if (*pretregs & mPSW) 803: { // Set flags 804: regm_t r = retregs | mPSW; 805: c1 = fixresult(e,retregs,&r); 806: } 807: c2 = push87(); 808: if (sz == REGSIZE || (I64 && sz == 4)) 809: { 810: unsigned reg = findreg(retregs); 811: c2 = genfltreg(c2,0x89,reg,0); // MOV fltreg,reg 812: genfltreg(c2,0xD9,0,0); // FLD float ptr fltreg 813: } 814: else 815: { unsigned msreg,lsreg; 816: 817: msreg = findregmsw(retregs); 818: lsreg = findreglsw(retregs); 819: c2 = genfltreg(c2,0x89,lsreg,0); // MOV fltreg,lsreg 820: genfltreg(c2,0x89,msreg,4); // MOV fltreg+4,msreg 821: genfltreg(c2,0xDD,0,0); // FLD double ptr fltreg 822: } 823: } 824: else 825: { 826: regm = (sz == FLOATSIZE) ? FLOATREGS : DOUBLEREGS; 827: regm |= *pretregs & mPSW; 828: c1 = fixresult(e,retregs,&regm); 829: regm = 0; // don't worry about result from CLIBxxx 830: c2 = callclib(e, 831: ((sz == FLOATSIZE) ? CLIBfltto87 : CLIBdblto87), 832: &regm,0); 833: } 834: } 835: else if (*pretregs & (mBP | ALLREGS) && retregs & mST0) 836: { unsigned mf; 837: unsigned reg; 838: 839: assert(sz <= DOUBLESIZE); 840: mf = (sz == FLOATSIZE) ? MFfloat : MFdouble; 841: if (*pretregs & mPSW && !(retregs & mPSW)) 842: c1 = genftst(c1,e,0); 843: /* FSTP floatreg */ 844: pop87(); 845: c1 = genfltreg(c1,ESC(mf,1),3,0); 846: genfwait(c1); 847: c2 = allocreg(pretregs,&reg,(sz == FLOATSIZE) ? TYfloat : TYdouble); 848: if (sz == FLOATSIZE) 849: { 850: if (!I16) 851: c2 = genfltreg(c2,0x8B,reg,0); 852: else 853: { c2 = genfltreg(c2,0x8B,reg,REGSIZE); 854: genfltreg(c2,0x8B,findreglsw(*pretregs),0); 855: } 856: } 857: else 858: { assert(sz == DOUBLESIZE); 859: if (I16) 860: { c2 = genfltreg(c2,0x8B,AX,6); 861: genfltreg(c2,0x8B,BX,4); 862: genfltreg(c2,0x8B,CX,2); 863: genfltreg(c2,0x8B,DX,0); 864: } 865: else if (I32) 866: { c2 = genfltreg(c2,0x8B,reg,REGSIZE); 867: genfltreg(c2,0x8B,findreglsw(*pretregs),0); 868: } 869: else // I64 870: { 871: c2 = genfltreg(c2,0x8B,reg,0); 872: code_orrex(c2, REX_W); 873: } 874: } 875: } 876: else if (*pretregs == 0 && retregs == mST0) 877: { 878: c1 = genf2(c1,0xDD,modregrm(3,3,0)); // FPOP 879: pop87(); 880: } 881: else 882: { if (*pretregs & mPSW) 883: { if (!(retregs & mPSW)) 884: { assert(retregs & mST0); 885: c1 = genftst(c1,e,!(*pretregs & mST0)); // FTST 886: } 887: } 888: if (*pretregs & mST0 && retregs & XMMREGS) 889: { 890: assert(sz <= DOUBLESIZE); 891: unsigned mf = (sz == FLOATSIZE) ? MFfloat : MFdouble; 892: // MOVD floatreg,XMM? 893: unsigned reg = findreg(retregs); 894: c1 = genfltreg(c1,0xF20F11,reg - XMM0,0); 895: c2 = push87(); 896: c2 = genfltreg(c2,ESC(mf,1),0,0); // FLD float/double ptr fltreg 897: } 898: else if (retregs & mST0 && *pretregs & XMMREGS) 899: { 900: assert(sz <= DOUBLESIZE); 901: unsigned mf = (sz == FLOATSIZE) ? MFfloat : MFdouble; 902: // FSTP floatreg 903: pop87(); 904: c1 = genfltreg(c1,ESC(mf,1),3,0); 905: genfwait(c1); 906: // MOVD XMM?,floatreg 907: unsigned reg; 908: c2 = allocreg(pretregs,&reg,(sz == FLOATSIZE) ? TYfloat : TYdouble); 909: c2 = genfltreg(c2,0xF20F10,reg -XMM0,0); 910: } 911: else 912: assert(!(*pretregs & mST0) || (retregs & mST0)); 913: } 914: if (*pretregs & mST0) 915: note87(e,0,0); 916: return cat(c1,c2); 917: } 918: 919: /******************************** 920: * Generate in-line 8087 code for the following operators: 921: * add 922: * min 923: * mul 924: * div 925: * cmp 926: */ 927: 928: // Reverse the order that the op is done in 929: static const char oprev[9] = { -1,0,1,2,3,5,4,7,6 }; 930: 931: code *orth87(elem *e,regm_t *pretregs) 932: { 933: unsigned op; 934: code *c1,*c2,*c3,*c4; 935: code *cx; 936: regm_t retregs; 937: regm_t resregm; 938: elem *e1; 939: elem *e2; 940: int e2oper; 941: int eoper; 942: unsigned sz2; 943: int clib = CLIBMAX; // initialize to invalid value 944: int reverse = 0; 945: 946: //printf("orth87(+e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 947: #if 1 // we could be evaluating / for side effects only 948: assert(*pretregs != 0); 949: #endif 950: retregs = mST0; 951: resregm = mST0; 952: 953: e1 = e->E1; 954: e2 = e->E2; 955: c3 = CNIL; 956: c4 = CNIL; 957: sz2 = tysize(e1->Ety); 958: if (tycomplex(e1->Ety)) 959: sz2 /= 2; 960: 961: eoper = e->Eoper; 962: if (eoper == OPmul && e2->Eoper == OPconst && el_toldouble(e->E2) == 2.0L) 963: { 964: // Perform "mul 2.0" as fadd ST(0), ST 965: c1 = codelem(e1,&retregs,FALSE); 966: c1 = genf2(c1, 0xDC, 0xC0); // fadd ST(0), ST; 967: c2 = fixresult87(e,mST0,pretregs); // result is in ST(0). 968: freenode(e2); 969: return cat(c1,c2); 970: } 971: 972: if (OTrel(eoper)) 973: eoper = OPeqeq; 974: #define X(op, ty1, ty2) (((op) << 16) + (ty1) * 256 + (ty2)) 975: switch (X(eoper, tybasic(e1->Ety), tybasic(e2->Ety))) 976: { 977: case X(OPadd, TYfloat, TYfloat): 978: case X(OPadd, TYdouble, TYdouble): 979: case X(OPadd, TYdouble_alias, TYdouble_alias): 980: case X(OPadd, TYldouble, TYldouble): 981: case X(OPadd, TYldouble, TYdouble): 982: case X(OPadd, TYdouble, TYldouble): 983: case X(OPadd, TYifloat, TYifloat): 984: case X(OPadd, TYidouble, TYidouble): 985: case X(OPadd, TYildouble, TYildouble): 986: op = 0; // FADDP 987: break; 988: 989: case X(OPmin, TYfloat, TYfloat): 990: case X(OPmin, TYdouble, TYdouble): 991: case X(OPmin, TYdouble_alias, TYdouble_alias): 992: case X(OPmin, TYldouble, TYldouble): 993: case X(OPmin, TYldouble, TYdouble): 994: case X(OPmin, TYdouble, TYldouble): 995: case X(OPmin, TYifloat, TYifloat): 996: case X(OPmin, TYidouble, TYidouble): 997: case X(OPmin, TYildouble, TYildouble): 998: op = 4; // FSUBP 999: break; 1000: 1001: case X(OPmul, TYfloat, TYfloat): 1002: case X(OPmul, TYdouble, TYdouble): 1003: case X(OPmul, TYdouble_alias, TYdouble_alias): 1004: case X(OPmul, TYldouble, TYldouble): 1005: case X(OPmul, TYldouble, TYdouble): 1006: case X(OPmul, TYdouble, TYldouble): 1007: case X(OPmul, TYifloat, TYifloat): 1008: case X(OPmul, TYidouble, TYidouble): 1009: case X(OPmul, TYildouble, TYildouble): 1010: case X(OPmul, TYfloat, TYifloat): 1011: case X(OPmul, TYdouble, TYidouble): 1012: case X(OPmul, TYldouble, TYildouble): 1013: case X(OPmul, TYifloat, TYfloat): 1014: case X(OPmul, TYidouble, TYdouble): 1015: case X(OPmul, TYildouble, TYldouble): 1016: op = 1; // FMULP 1017: break; 1018: 1019: case X(OPdiv, TYfloat, TYfloat): 1020: case X(OPdiv, TYdouble, TYdouble): 1021: case X(OPdiv, TYdouble_alias, TYdouble_alias): 1022: case X(OPdiv, TYldouble, TYldouble): 1023: case X(OPdiv, TYldouble, TYdouble): 1024: case X(OPdiv, TYdouble, TYldouble): 1025: case X(OPdiv, TYifloat, TYifloat): 1026: case X(OPdiv, TYidouble, TYidouble): 1027: case X(OPdiv, TYildouble, TYildouble): 1028: op = 6; // FDIVP 1029: break; 1030: 1031: case X(OPmod, TYfloat, TYfloat): 1032: case X(OPmod, TYdouble, TYdouble): 1033: case X(OPmod, TYdouble_alias, TYdouble_alias): 1034: case X(OPmod, TYldouble, TYldouble): 1035: case X(OPmod, TYfloat, TYifloat): 1036: case X(OPmod, TYdouble, TYidouble): 1037: case X(OPmod, TYldouble, TYildouble): 1038: case X(OPmod, TYifloat, TYifloat): 1039: case X(OPmod, TYidouble, TYidouble): 1040: case X(OPmod, TYildouble, TYildouble): 1041: case X(OPmod, TYifloat, TYfloat): 1042: case X(OPmod, TYidouble, TYdouble): 1043: case X(OPmod, TYildouble, TYldouble): 1044: op = (unsigned) -1; 1045: break; 1046: 1047: case X(OPeqeq, TYfloat, TYfloat): 1048: case X(OPeqeq, TYdouble, TYdouble): 1049: case X(OPeqeq, TYdouble_alias, TYdouble_alias): 1050: case X(OPeqeq, TYldouble, TYldouble): 1051: case X(OPeqeq, TYifloat, TYifloat): 1052: case X(OPeqeq, TYidouble, TYidouble): 1053: case X(OPeqeq, TYildouble, TYildouble): 1054: assert(OTrel(e->Eoper)); 1055: assert((*pretregs & mST0) == 0); 1056: c1 = codelem(e1,&retregs,FALSE); 1057: note87(e1,0,0); 1058: resregm = mPSW; 1059: 1060: if (rel_exception(e->Eoper) || config.flags4 & CFG4fastfloat) 1061: { 1062: if (cnst(e2) && !boolres(e2)) 1063: { 1064: if (NOSAHF) 1065: { 1066: c1 = cat(c1,push87()); 1067: c1 = gen2(c1,0xD9,0xEE); // FLDZ 1068: gen2(c1,0xDF,0xF1); // FCOMIP ST1 1069: pop87(); 1070: } 1071: else 1072: { c1 = genf2(c1,0xD9,0xE4); // FTST 1073: c1 = cg87_87topsw(c1); 1074: } 1075: c2 = genf2(NULL,0xDD,modregrm(3,3,0)); // FPOP 1076: pop87(); 1077: } 1078: else if (NOSAHF) 1079: { 1080: note87(e1,0,0); 1081: c2 = load87(e2,0,&retregs,e1,-1); 1082: c2 = cat(c2,makesure87(e1,0,1,0)); 1083: resregm = 0; 1084: //c2 = genf2(c2,0xD9,0xC8 + 1); // FXCH ST1 1085: c2 = gen2(c2,0xDF,0xF1); // FCOMIP ST1 1086: pop87(); 1087: genf2(c2,0xDD,modregrm(3,3,0)); // FPOP 1088: pop87(); 1089: } 1090: else 1091: { 1092: c2 = load87(e2, 0, pretregs, e1, 3); // FCOMPP 1093: } 1094: } 1095: else 1096: { 1097: if (cnst(e2) && !boolres(e2) && 1098: config.target_cpu < TARGET_80386) 1099: { 1100: regm_t regm = 0; 1101: 1102: c2 = callclib(e,CLIBftest0,&regm,0); 1103: pop87(); 1104: } 1105: else 1106: { 1107: note87(e1,0,0); 1108: c2 = load87(e2,0,&retregs,e1,-1); 1109: c2 = cat(c2,makesure87(e1,0,1,0)); 1110: resregm = 0; 1111: if (NOSAHF) 1112: { 1113: c3 = gen2(CNIL,0xDF,0xE9); // FUCOMIP ST1 1114: pop87(); 1115: genf2(c3,0xDD,modregrm(3,3,0)); // FPOP 1116: pop87(); 1117: } 1118: else if (config.target_cpu >= TARGET_80386) 1119: { 1120: c3 = gen2(CNIL,0xDA,0xE9); // FUCOMPP 1121: c3 = cg87_87topsw(c3); 1122: pop87(); 1123: pop87(); 1124: } 1125: else 1126: // Call a function instead so that exceptions 1127: // are not generated. 1128: c3 = callclib(e,CLIBfcompp,&resregm,0); 1129: } 1130: } 1131: 1132: freenode(e2); 1133: return cat4(c1,c2,c3,c4); 1134: 1135: case X(OPadd, TYcfloat, TYcfloat): 1136: case X(OPadd, TYcdouble, TYcdouble): 1137: case X(OPadd, TYcldouble, TYcldouble): 1138: case X(OPadd, TYcfloat, TYfloat): 1139: case X(OPadd, TYcdouble, TYdouble): 1140: case X(OPadd, TYcldouble, TYldouble): 1141: case X(OPadd, TYfloat, TYcfloat): 1142: case X(OPadd, TYdouble, TYcdouble): 1143: case X(OPadd, TYldouble, TYcldouble): 1144: goto Lcomplex; 1145: 1146: case X(OPadd, TYifloat, TYcfloat): 1147: case X(OPadd, TYidouble, TYcdouble): 1148: case X(OPadd, TYildouble, TYcldouble): 1149: goto Lcomplex2; 1150: 1151: case X(OPmin, TYcfloat, TYcfloat): 1152: case X(OPmin, TYcdouble, TYcdouble): 1153: case X(OPmin, TYcldouble, TYcldouble): 1154: case X(OPmin, TYcfloat, TYfloat): 1155: case X(OPmin, TYcdouble, TYdouble): 1156: case X(OPmin, TYcldouble, TYldouble): 1157: case X(OPmin, TYfloat, TYcfloat): 1158: case X(OPmin, TYdouble, TYcdouble): 1159: case X(OPmin, TYldouble, TYcldouble): 1160: goto Lcomplex; 1161: 1162: case X(OPmin, TYifloat, TYcfloat): 1163: case X(OPmin, TYidouble, TYcdouble): 1164: case X(OPmin, TYildouble, TYcldouble): 1165: goto Lcomplex2; 1166: 1167: case X(OPmul, TYcfloat, TYcfloat): 1168: case X(OPmul, TYcdouble, TYcdouble): 1169: case X(OPmul, TYcldouble, TYcldouble): 1170: clib = CLIBcmul; 1171: goto Lcomplex; 1172: 1173: case X(OPdiv, TYcfloat, TYcfloat): 1174: case X(OPdiv, TYcdouble, TYcdouble): 1175: case X(OPdiv, TYcldouble, TYcldouble): 1176: case X(OPdiv, TYfloat, TYcfloat): 1177: case X(OPdiv, TYdouble, TYcdouble): 1178: case X(OPdiv, TYldouble, TYcldouble): 1179: case X(OPdiv, TYifloat, TYcfloat): 1180: case X(OPdiv, TYidouble, TYcdouble): 1181: case X(OPdiv, TYildouble, TYcldouble): 1182: clib = CLIBcdiv; 1183: goto Lcomplex; 1184: 1185: case X(OPdiv, TYifloat, TYfloat): 1186: case X(OPdiv, TYidouble, TYdouble): 1187: case X(OPdiv, TYildouble, TYldouble): 1188: op = 6; // FDIVP 1189: break; 1190: 1191: Lcomplex: 1192: c1 = loadComplex(e1); 1193: c2 = loadComplex(e2); 1194: c3 = makesure87(e1, sz2, 2, 0); 1195: c3 = cat(c3,makesure87(e1, 0, 3, 0)); 1196: retregs = mST01; 1197: if (eoper == OPadd) 1198: { 1199: c4 = genf2(NULL, 0xDE, 0xC0+2); // FADDP ST(2),ST 1200: genf2(c4, 0xDE, 0xC0+2); // FADDP ST(2),ST 1201: pop87(); 1202: pop87(); 1203: } 1204: else if (eoper == OPmin) 1205: { 1206: c4 = genf2(NULL, 0xDE, 0xE8+2); // FSUBP ST(2),ST 1207: genf2(c4, 0xDE, 0xE8+2); // FSUBP ST(2),ST 1208: pop87(); 1209: pop87(); 1210: } 1211: else 1212: c4 = callclib(e, clib, &retregs, 0); 1213: c4 = cat(c4, fixresult_complex87(e, retregs, pretregs)); 1214: return cat4(c1,c2,c3,c4); 1215: 1216: Lcomplex2: 1217: retregs = mST0; 1218: c1 = codelem(e1, &retregs, FALSE); 1219: note87(e1, 0, 0); 1220: c2 = loadComplex(e2); 1221: c3 = makesure87(e1, 0, 2, 0); 1222: retregs = mST01; 1223: if (eoper == OPadd) 1224: { 1225: c4 = genf2(NULL, 0xDE, 0xC0+2); // FADDP ST(2),ST 1226: } 1227: else if (eoper == OPmin) 1228: { 1229: c4 = genf2(NULL, 0xDE, 0xE8+2); // FSUBP ST(2),ST 1230: c4 = genf2(c4, 0xD9, 0xE0); // FCHS 1231: } 1232: else 1233: assert(0); 1234: pop87(); 1235: c4 = genf2(c4, 0xD9, 0xC8 + 1); // FXCH ST(1) 1236: c4 = cat(c4, fixresult_complex87(e, retregs, pretregs)); 1237: return cat4(c1,c2,c3,c4); 1238: 1239: case X(OPeqeq, TYcfloat, TYcfloat): 1240: case X(OPeqeq, TYcdouble, TYcdouble): 1241: case X(OPeqeq, TYcldouble, TYcldouble): 1242: case X(OPeqeq, TYcfloat, TYifloat): 1243: case X(OPeqeq, TYcdouble, TYidouble): 1244: case X(OPeqeq, TYcldouble, TYildouble): 1245: case X(OPeqeq, TYcfloat, TYfloat): 1246: case X(OPeqeq, TYcdouble, TYdouble): 1247: case X(OPeqeq, TYcldouble, TYldouble): 1248: case X(OPeqeq, TYifloat, TYcfloat): 1249: case X(OPeqeq, TYidouble, TYcdouble): 1250: case X(OPeqeq, TYildouble, TYcldouble): 1251: case X(OPeqeq, TYfloat, TYcfloat): 1252: case X(OPeqeq, TYdouble, TYcdouble): 1253: case X(OPeqeq, TYldouble, TYcldouble): 1254: case X(OPeqeq, TYfloat, TYifloat): 1255: case X(OPeqeq, TYdouble, TYidouble): 1256: case X(OPeqeq, TYldouble, TYildouble): 1257: case X(OPeqeq, TYifloat, TYfloat): 1258: case X(OPeqeq, TYidouble, TYdouble): 1259: case X(OPeqeq, TYildouble, TYldouble): 1260: c1 = loadComplex(e1); 1261: c2 = loadComplex(e2); 1262: c3 = makesure87(e1, sz2, 2, 0); 1263: c3 = cat(c3,makesure87(e1, 0, 3, 0)); 1264: retregs = 0; 1265: c4 = callclib(e, CLIBccmp, &retregs, 0); 1266: return cat4(c1,c2,c3,c4); 1267: 1268: 1269: case X(OPadd, TYfloat, TYifloat): 1270: case X(OPadd, TYdouble, TYidouble): 1271: case X(OPadd, TYldouble, TYildouble): 1272: case X(OPadd, TYifloat, TYfloat): 1273: case X(OPadd, TYidouble, TYdouble): 1274: case X(OPadd, TYildouble, TYldouble): 1275: 1276: case X(OPmin, TYfloat, TYifloat): 1277: case X(OPmin, TYdouble, TYidouble): 1278: case X(OPmin, TYldouble, TYildouble): 1279: case X(OPmin, TYifloat, TYfloat): 1280: case X(OPmin, TYidouble, TYdouble): 1281: case X(OPmin, TYildouble, TYldouble): 1282: retregs = mST0; 1283: c1 = codelem(e1, &retregs, FALSE); 1284: note87(e1, 0, 0); 1285: c2 = codelem(e2, &retregs, FALSE); 1286: c3 = makesure87(e1, 0, 1, 0); 1287: if (eoper == OPmin) 1288: c3 = genf2(c3, 0xD9, 0xE0); // FCHS 1289: if (tyimaginary(e1->Ety)) 1290: c3 = genf2(c3, 0xD9, 0xC8 + 1); // FXCH ST(1) 1291: retregs = mST01; 1292: c4 = fixresult_complex87(e, retregs, pretregs); 1293: return cat4(c1,c2,c3,c4); 1294: 1295: case X(OPadd, TYcfloat, TYifloat): 1296: case X(OPadd, TYcdouble, TYidouble): 1297: case X(OPadd, TYcldouble, TYildouble): 1298: op = 0; 1299: goto Lci; 1300: 1301: case X(OPmin, TYcfloat, TYifloat): 1302: case X(OPmin, TYcdouble, TYidouble): 1303: case X(OPmin, TYcldouble, TYildouble): 1304: op = 4; 1305: goto Lci; 1306: 1307: Lci: 1308: c1 = loadComplex(e1); 1309: retregs = mST0; 1310: c2 = load87(e2,sz2,&retregs,e1,op); 1311: freenode(e2); 1312: retregs = mST01; 1313: c3 = makesure87(e1,0,1,0); 1314: c4 = fixresult_complex87(e, retregs, pretregs); 1315: return cat4(c1,c2,c3,c4); 1316: 1317: case X(OPmul, TYcfloat, TYfloat): 1318: case X(OPmul, TYcdouble, TYdouble): 1319: case X(OPmul, TYcldouble, TYldouble): 1320: c1 = loadComplex(e1); 1321: goto Lcm1; 1322: 1323: case X(OPmul, TYcfloat, TYifloat): 1324: case X(OPmul, TYcdouble, TYidouble): 1325: case X(OPmul, TYcldouble, TYildouble): 1326: c1 = loadComplex(e1); 1327: c1 = genf2(c1, 0xD9, 0xE0); // FCHS 1328: genf2(c1,0xD9,0xC8 + 1); // FXCH ST(1) 1329: if (elemisone(e2)) 1330: { 1331: freenode(e2); 1332: c2 = NULL; 1333: c3 = NULL; 1334: goto Lcd4; 1335: } 1336: goto Lcm1; 1337: 1338: Lcm1: 1339: retregs = mST0; 1340: c2 = codelem(e2, &retregs, FALSE); 1341: c3 = makesure87(e1, sz2, 1, 0); 1342: c3 = cat(c3,makesure87(e1, 0, 2, 0)); 1343: goto Lcm2; 1344: 1345: case X(OPmul, TYfloat, TYcfloat): 1346: case X(OPmul, TYdouble, TYcdouble): 1347: case X(OPmul, TYldouble, TYcldouble): 1348: retregs = mST0; 1349: c1 = codelem(e1, &retregs, FALSE); 1350: note87(e1, 0, 0); 1351: c2 = loadComplex(e2); 1352: c3 = makesure87(e1, 0, 2, 0); 1353: c3 = genf2(c3,0xD9,0xC8 + 1); // FXCH ST(1) 1354: genf2(c3,0xD9,0xC8 + 2); // FXCH ST(2) 1355: goto Lcm2; 1356: 1357: case X(OPmul, TYifloat, TYcfloat): 1358: case X(OPmul, TYidouble, TYcdouble): 1359: case X(OPmul, TYildouble, TYcldouble): 1360: retregs = mST0; 1361: c1 = codelem(e1, &retregs, FALSE); 1362: note87(e1, 0, 0); 1363: c2 = loadComplex(e2); 1364: c3 = makesure87(e1, 0, 2, 0); 1365: c3 = genf2(c3, 0xD9, 0xE0); // FCHS 1366: genf2(c3,0xD9,0xC8 + 2); // FXCH ST(2) 1367: goto Lcm2; 1368: 1369: Lcm2: 1370: c3 = genf2(c3,0xDC,0xC8 + 2); // FMUL ST(2), ST 1371: genf2(c3,0xDE,0xC8 + 1); // FMULP ST(1), ST 1372: goto Lcd3; 1373: 1374: case X(OPdiv, TYcfloat, TYfloat): 1375: case X(OPdiv, TYcdouble, TYdouble): 1376: case X(OPdiv, TYcldouble, TYldouble): 1377: c1 = loadComplex(e1); 1378: retregs = mST0; 1379: c2 = codelem(e2, &retregs, FALSE); 1380: c3 = makesure87(e1, sz2, 1, 0); 1381: c3 = cat(c3,makesure87(e1, 0, 2, 0)); 1382: goto Lcd1; 1383: 1384: case X(OPdiv, TYcfloat, TYifloat): 1385: case X(OPdiv, TYcdouble, TYidouble): 1386: case X(OPdiv, TYcldouble, TYildouble): 1387: c1 = loadComplex(e1); 1388: c1 = genf2(c1,0xD9,0xC8 + 1); // FXCH ST(1) 1389: xchg87(0, 1); 1390: genf2(c1, 0xD9, 0xE0); // FCHS 1391: retregs = mST0; 1392: c2 = codelem(e2, &retregs, FALSE); 1393: c3 = makesure87(e1, 0, 1, 0); 1394: c3 = cat(c3,makesure87(e1, sz2, 2, 0)); 1395: Lcd1: 1396: c3 = genf2(c3,0xDC,0xF8 + 2); // FDIV ST(2), ST 1397: genf2(c3,0xDE,0xF8 + 1); // FDIVP ST(1), ST 1398: Lcd3: 1399: pop87(); 1400: Lcd4: 1401: retregs = mST01; 1402: c4 = fixresult_complex87(e, retregs, pretregs); 1403: return cat4(c1, c2, c3, c4); 1404: 1405: case X(OPmod, TYcfloat, TYfloat): 1406: case X(OPmod, TYcdouble, TYdouble): 1407: case X(OPmod, TYcldouble, TYldouble): 1408: case X(OPmod, TYcfloat, TYifloat): 1409: case X(OPmod, TYcdouble, TYidouble): 1410: case X(OPmod, TYcldouble, TYildouble): 1411: /* 1412: fld E1.re 1413: fld E1.im 1414: fld E2 1415: fxch ST(1) 1416: FM1: fprem 1417: fstsw word ptr sw 1418: fwait 1419: mov AH, byte ptr sw+1 1420: jp FM1 1421: fxch ST(2) 1422: FM2: fprem 1423: fstsw word ptr sw 1424: fwait 1425: mov AH, byte ptr sw+1 1426: jp FM2 1427: fstp ST(1) 1428: fxch ST(1) 1429: */ 1430: c1 = loadComplex(e1); 1431: retregs = mST0; 1432: c2 = codelem(e2, &retregs, FALSE); 1433: c3 = makesure87(e1, sz2, 1, 0); 1434: c3 = cat(c3,makesure87(e1, 0, 2, 0)); 1435: c3 = genf2(c3, 0xD9, 0xC8 + 1); // FXCH ST(1) 1436: 1437: cx = gen2(NULL, 0xD9, 0xF8); // FPREM 1438: cx = cg87_87topsw(cx); 1439: cx = genjmp(cx, JP, FLcode, (block *)cx); // JP FM1 1440: cx = genf2(cx, 0xD9, 0xC8 + 2); // FXCH ST(2) 1441: c3 = cat(c3,cx); 1442: 1443: cx = gen2(NULL, 0xD9, 0xF8); // FPREM 1444: cx = cg87_87topsw(cx); 1445: cx = genjmp(cx, JP, FLcode, (block *)cx); // JP FM2 1446: cx = genf2(cx,0xDD,0xD8 + 1); // FSTP ST(1) 1447: cx = genf2(cx, 0xD9, 0xC8 + 1); // FXCH ST(1) 1448: c3 = cat(c3,cx); 1449: 1450: goto Lcd3; 1451: 1452: default: 1453: #ifdef DEBUG 1454: elem_print(e); 1455: #endif 1456: assert(0); 1457: break; 1458: } 1459: #undef X 1460: 1461: e2oper = e2->Eoper; 1462: 1463: /* Move double-sized operand into the second position if there's a chance 1464: * it will allow combining a load with an operation (DMD Bugzilla 2905) 1465: */ 1466: if ( ((tybasic(e1->Ety) == TYdouble) 1467: && ((e1->Eoper == OPvar) || (e1->Eoper == OPconst)) 1468: && (tybasic(e2->Ety) != TYdouble)) || 1469: (e1->Eoper == OPconst) || 1470: (e1->Eoper == OPvar && 1471: ((e1->Ety & (mTYconst | mTYimmutable) && !OTleaf(e2oper)) || 1472: (e2oper == OPd_f && 1473: (e2->E1->Eoper == OPs32_d || e2->E1->Eoper == OPs64_d || e2->E1->Eoper == OPs16_d) && 1474: e2->E1->E1->Eoper == OPvar 1475: ) || 1476: ((e2oper == OPs32_d || e2oper == OPs64_d || e2oper == OPs16_d) && 1477: e2->E1->Eoper == OPvar 1478: ) 1479: ) 1480: ) 1481: ) 1482: { // Reverse order of evaluation 1483: e1 = e->E2; 1484: e2 = e->E1; 1485: op = oprev[op + 1]; 1486: reverse ^= 1; 1487: } 1488: 1489: c1 = codelem(e1,&retregs,FALSE); 1490: note87(e1,0,0); 1491: 1492: if (config.flags4 & CFG4fdivcall && e->Eoper == OPdiv) 1493: { 1494: regm_t retregs = mST0;
warning C6246: Local declaration of 'retregs' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '936' of 'c:\projects\extern\d\dmd\src\backend\cg87.c': Lines: 936
1495: c2 = load87(e2,0,&retregs,e1,-1); 1496: c2 = cat(c2,makesure87(e1,0,1,0)); 1497: if (op == 7) // if reverse divide 1498: c2 = genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1) 1499: c2 = cat(c2,callclib(e,CLIBfdiv87,&retregs,0)); 1500: pop87(); 1501: resregm = mST0; 1502: freenode(e2); 1503: c4 = fixresult87(e,resregm,pretregs); 1504: } 1505: else if (e->Eoper == OPmod) 1506: { 1507: /* 1508: * fld tbyte ptr y 1509: * fld tbyte ptr x // ST = x, ST1 = y 1510: * FM1: // We don't use fprem1 because for some inexplicable 1511: * // reason we get -5 when we do _modulo(15, 10) 1512: * fprem // ST = ST % ST1 1513: * fstsw word ptr sw 1514: * fwait 1515: * mov AH,byte ptr sw+1 // get msb of status word in AH 1516: * sahf // transfer to flags 1517: * jp FM1 // continue till ST < ST1 1518: * fstp ST(1) // leave remainder on stack 1519: */ 1520: regm_t retregs = mST0;
warning C6246: Local declaration of 'retregs' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '936' of 'c:\projects\extern\d\dmd\src\backend\cg87.c': Lines: 936
1521: c2 = load87(e2,0,&retregs,e1,-1); 1522: c2 = cat(c2,makesure87(e1,0,1,0)); // now have x,y on stack; need y,x 1523: if (!reverse) // if not reverse modulo 1524: c2 = genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1) 1525: 1526: c3 = gen2(NULL, 0xD9, 0xF8); // FM1: FPREM 1527: c3 = cg87_87topsw(c3); 1528: c3 = genjmp(c3, JP, FLcode, (block *)c3); // JP FM1 1529: c3 = genf2(c3,0xDD,0xD8 + 1); // FSTP ST(1) 1530: 1531: pop87(); 1532: resregm = mST0; 1533: freenode(e2); 1534: c4 = fixresult87(e,resregm,pretregs); 1535: } 1536: else 1537: { c2 = load87(e2,0,pretregs,e1,op); 1538: freenode(e2); 1539: } 1540: if (*pretregs & mST0) 1541: note87(e,0,0); 1542: //printf("orth87(-e = %p, *pretregs = x%x)\n", e, *pretregs); 1543: return cat4(c1,c2,c3,c4); 1544: } 1545: 1546: /***************************** 1547: * Load e into ST01. 1548: */ 1549: 1550: code *loadComplex(elem *e) 1551: { int sz; 1552: regm_t retregs; 1553: code *c; 1554: 1555: sz = tysize(e->Ety); 1556: switch (tybasic(e->Ety)) 1557: { 1558: case TYfloat: 1559: case TYdouble: 1560: case TYldouble: 1561: retregs = mST0; 1562: c = codelem(e,&retregs,FALSE); 1563: // Convert to complex with a 0 for the imaginary part 1564: c = cat(c, push87()); 1565: c = gen2(c,0xD9,0xEE); // FLDZ 1566: break; 1567: 1568: case TYifloat: 1569: case TYidouble: 1570: case TYildouble: 1571: // Convert to complex with a 0 for the real part 1572: c = push87(); 1573: c = gen2(c,0xD9,0xEE); // FLDZ 1574: retregs = mST0; 1575: c = cat(c, codelem(e,&retregs,FALSE)); 1576: break; 1577: 1578: case TYcfloat: 1579: case TYcdouble: 1580: case TYcldouble: 1581: sz /= 2; 1582: retregs = mST01; 1583: c = codelem(e,&retregs,FALSE); 1584: break; 1585: 1586: default: 1587: assert(0); 1588: } 1589: note87(e, 0, 1); 1590: note87(e, sz, 0); 1591: return c; 1592: } 1593: 1594: /************************* 1595: * If op == -1, load expression e into ST0. 1596: * else compute (eleft op e), eleft is in ST0. 1597: * Must follow same logic as cmporder87(); 1598: */ 1599: 1600: code *load87(elem *e,unsigned eoffset,regm_t *pretregs,elem *eleft,int op) 1601: { 1602: code *ccomma,*c,*c2,*cpush; 1603: code cs; 1604: regm_t retregs; 1605: unsigned reg,mf,mf1; 1606: int opr; 1607: unsigned char ldop; 1608: tym_t ty; 1609: int i; 1610: 1611: #if NDPP 1612: printf("+load87(e=%p, eoffset=%d, *pretregs=%s, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,regm_str(*pretregs),eleft,op,stackused); 1613: #endif 1614: elem_debug(e); 1615: ccomma = NULL; 1616: cpush = NULL; 1617: if (ADDFWAIT()) 1618: cs.Iflags = CFwait; 1619: else 1620: cs.Iflags = 0; 1621: cs.Irex = 0; 1622: opr = oprev[op + 1]; 1623: ty = tybasic(e->Ety); 1624: if ((ty == TYldouble || ty == TYildouble) && 1625: op != -1 && e->Eoper != OPd_ld) 1626: goto Ldefault; 1627: mf = (ty == TYfloat || ty == TYifloat || ty == TYcfloat) ? MFfloat : MFdouble; 1628: L5: 1629: switch (e->Eoper) 1630: { 1631: case OPcomma: 1632: ccomma = docommas(&e); 1633: // if (op != -1) 1634: // ccomma = cat(ccomma,makesure87(eleft,eoffset,0,0)); 1635: goto L5; 1636: 1637: case OPvar: 1638: notreg(e); 1639: case OPind: 1640: L2:
warning C4102: 'L2' : unreferenced label
1641: if (op != -1) 1642: { 1643: if (e->Ecount && e->Ecount != e->Ecomsub && 1644: (i = cse_get(e, 0)) >= 0) 1645: { static unsigned char b2[8] = {0xC0,0xC8,0xD0,0xD8,0xE0,0xE8,0xF0,0xF8}; 1646: 1647: c = genf2(NULL,0xD8,b2[op] + i); // Fop ST(i) 1648: } 1649: else 1650: { 1651: c = getlvalue(&cs,e,0); 1652: if (I64) 1653: cs.Irex &= ~REX_W; // don't use for x87 ops 1654: c = cat(c,makesure87(eleft,eoffset,0,0)); 1655: cs.Iop = ESC(mf,0); 1656: cs.Irm |= modregrm(0,op,0); 1657: c = gen(c,&cs); 1658: } 1659: } 1660: else 1661: { 1662: cpush = push87(); 1663: switch (ty) 1664: { 1665: case TYfloat: 1666: case TYdouble: 1667: case TYifloat: 1668: case TYidouble: 1669: case TYcfloat: 1670: case TYcdouble: 1671: case TYdouble_alias: 1672: c = loadea(e,&cs,ESC(mf,1),0,0,0,0); // FLD var 1673: break; 1674: case TYldouble: 1675: case TYildouble: 1676: case TYcldouble: 1677: c = loadea(e,&cs,0xDB,5,0,0,0); // FLD var 1678: break; 1679: default: 1680: // __debug printf("ty = x%x\n", ty); 1681: assert(0); 1682: break; 1683: } 1684: note87(e,0,0); 1685: } 1686: break; 1687: case OPd_f: 1688: case OPf_d: 1689: case OPd_ld: 1690: mf1 = (tybasic(e->E1->Ety) == TYfloat || tybasic(e->E1->Ety) == TYifloat) 1691: ? MFfloat : MFdouble; 1692: if (op != -1 && stackused) 1693: note87(eleft,eoffset,0); // don't trash this value 1694: if (e->E1->Eoper == OPvar || e->E1->Eoper == OPind) 1695: { 1696: #if 1 1697: L4: 1698: c = getlvalue(&cs,e->E1,0); 1699: cs.Iop = ESC(mf1,0); 1700: if (ADDFWAIT()) 1701: cs.Iflags |= CFwait; 1702: if (!I16) 1703: cs.Iflags &= ~CFopsize; 1704: if (op != -1) 1705: { cs.Irm |= modregrm(0,op,0); 1706: c = cat(c,makesure87(eleft,eoffset,0,0)); 1707: } 1708: else 1709: { cs.Iop |= 1; 1710: c = cat(c,push87()); 1711: } 1712: c = gen(c,&cs); /* FLD / Fop */ 1713: #else 1714: c = loadea(e->E1,&cs,ESC(mf1,1),0,0,0,0); /* FLD e->E1 */ 1715: #endif 1716: /* Variable cannot be put into a register anymore */ 1717: if (e->E1->Eoper == OPvar) 1718: notreg(e->E1); 1719: freenode(e->E1); 1720: } 1721: else 1722: { 1723: retregs = mST0; 1724: c = codelem(e->E1,&retregs,FALSE); 1725: if (op != -1) 1726: { c = cat(c,makesure87(eleft,eoffset,1,0)); 1727: c = genf2(c,0xDE,modregrm(3,opr,1)); // FopRP 1728: pop87(); 1729: } 1730: } 1731: break; 1732: 1733: case OPs64_d: 1734: if (e->E1->Eoper == OPvar || 1735: (e->E1->Eoper == OPind && e->E1->Ecount == 0)) 1736: { 1737: c = getlvalue(&cs,e->E1,0); 1738: cs.Iop = 0xDF; 1739: if (ADDFWAIT()) 1740: cs.Iflags |= CFwait; 1741: if (!I16) 1742: cs.Iflags &= ~CFopsize; 1743: c = cat(c,push87()); 1744: cs.Irm |= modregrm(0,5,0); 1745: c = gen(c,&cs); // FILD m64 1746: // Variable cannot be put into a register anymore 1747: if (e->E1->Eoper == OPvar) 1748: notreg(e->E1); 1749: freenode(e->E1); 1750: } 1751: else if (I64) 1752: { 1753: retregs = ALLREGS; 1754: c = codelem(e->E1,&retregs,FALSE); 1755: reg = findreg(retregs); 1756: c = genfltreg(c,0x89,reg,0); // MOV floatreg,reg 1757: code_orrex(c, REX_W); 1758: c = cat(c,push87()); 1759: c = genfltreg(c,0xDF,5,0); // FILD long long ptr floatreg 1760: } 1761: else 1762: { 1763: retregs = ALLREGS; 1764: c = codelem(e->E1,&retregs,FALSE); 1765: reg = findreglsw(retregs); 1766: c = genfltreg(c,0x89,reg,0); // MOV floatreg,reglsw 1767: reg = findregmsw(retregs); 1768: c = genfltreg(c,0x89,reg,4); // MOV floatreg+4,regmsw 1769: c = cat(c,push87()); 1770: c = genfltreg(c,0xDF,5,0); // FILD long long ptr floatreg 1771: } 1772: if (op != -1) 1773: { c = cat(c,makesure87(eleft,eoffset,1,0)); 1774: c = genf2(c,0xDE,modregrm(3,opr,1)); // FopRP 1775: pop87(); 1776: } 1777: break; 1778: 1779: case OPconst: 1780: ldop = loadconst(e, 0); 1781: if (ldop) 1782: { 1783: cpush = push87(); 1784: c = genf2(NULL,0xD9,ldop); // FLDx 1785: if (op != -1) 1786: { genf2(c,0xDE,modregrm(3,opr,1)); // FopRP 1787: pop87(); 1788: } 1789: } 1790: else 1791: { 1792: assert(0); 1793: } 1794: break; 1795: 1796: case OPu16_d: 1797: { 1798: /* This opcode should never be generated */ 1799: /* (probably shouldn't be for 16 bit code too) */ 1800: assert(!I32); 1801: 1802: if (op != -1) 1803: note87(eleft,eoffset,0); // don't trash this value 1804: retregs = ALLREGS & mLSW; 1805: c = codelem(e->E1,&retregs,FALSE); 1806: c = regwithvalue(c,ALLREGS & mMSW,0,&reg,0); // 0-extend 1807: retregs |= mask[reg]; 1808: mf1 = MFlong; 1809: goto L3; 1810: } 1811: case OPs16_d: mf1 = MFword; goto L6; 1812: case OPs32_d: mf1 = MFlong; goto L6; 1813: L6: 1814: if (op != -1) 1815: note87(eleft,eoffset,0); // don't trash this value 1816: if (e->E1->Eoper == OPvar || 1817: (e->E1->Eoper == OPind && e->E1->Ecount == 0)) 1818: { 1819: goto L4; 1820: } 1821: else 1822: { 1823: retregs = ALLREGS; 1824: c = codelem(e->E1,&retregs,FALSE); 1825: L3: 1826: if (I16 && e->Eoper != OPs16_d) 1827: { 1828: /* MOV floatreg+2,reg */ 1829: reg = findregmsw(retregs); 1830: c = genfltreg(c,0x89,reg,REGSIZE); 1831: retregs &= mLSW; 1832: } 1833: reg = findreg(retregs); 1834: c = genfltreg(c,0x89,reg,0); /* MOV floatreg,reg */ 1835: if (op != -1) 1836: { c = cat(c,makesure87(eleft,eoffset,0,0)); 1837: genfltreg(c,ESC(mf1,0),op,0); /* Fop floatreg */ 1838: } 1839: else 1840: { 1841: /* FLD long ptr floatreg */ 1842: c = cat(c,push87()); 1843: c = genfltreg(c,ESC(mf1,1),0,0); 1844: } 1845: } 1846: break; 1847: default: 1848: Ldefault: 1849: retregs = mST0; 1850: #if 1 /* Do this instead of codelem() to avoid the freenode(e). 1851: We also lose CSE capability */ 1852: if (e->Eoper == OPconst) 1853: { 1854: c = load87(e, 0, &retregs, NULL, -1); 1855: } 1856: else 1857: c = (*cdxxx[e->Eoper])(e,&retregs); 1858: #else 1859: c = codelem(e,&retregs,FALSE); 1860: #endif 1861: if (op != -1) 1862: { 1863: c = cat(c,makesure87(eleft,eoffset,1,(op == 0 || op == 1))); 1864: pop87(); 1865: if (op == 4 || op == 6) // sub or div 1866: { code *cl; 1867: 1868: cl = code_last(c); 1869: if (cl && cl->Iop == 0xD9 && cl->Irm == 0xC9) // FXCH ST(1) 1870: { cl->Iop = NOP; 1871: opr = op; // reverse operands 1872: } 1873: } 1874: c = genf2(c,0xDE,modregrm(3,opr,1)); // FopRP 1875: } 1876: break; 1877: } 1878: if (op == 3) // FCOMP 1879: { pop87(); // extra pop was done 1880: cg87_87topsw(c); 1881: } 1882: c2 = fixresult87(e,((op == 3) ? mPSW : mST0),pretregs); 1883: #if NDPP 1884: printf("-load87(e=%p, eoffset=%d, *pretregs=%s, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,regm_str(*pretregs),eleft,op,stackused); 1885: #endif 1886: return cat4(ccomma,cpush,c,c2); 1887: } 1888: 1889: /******************************** 1890: * Determine if a compare is to be done forwards (return 0) 1891: * or backwards (return 1). 1892: * Must follow same logic as load87(). 1893: */ 1894: 1895: int cmporder87(elem *e) 1896: { 1897: //printf("cmporder87(%p)\n",e); 1898: L1: 1899: switch (e->Eoper) 1900: { 1901: case OPcomma: 1902: e = e->E2; 1903: goto L1; 1904: 1905: case OPd_f: 1906: case OPf_d: 1907: case OPd_ld: 1908: if (e->E1->Eoper == OPvar || e->E1->Eoper == OPind) 1909: goto ret0; 1910: else 1911: goto ret1; 1912: 1913: case OPconst: 1914: if (loadconst(e, 0) || tybasic(e->Ety) == TYldouble 1915: || tybasic(e->Ety) == TYildouble) 1916: { 1917: //printf("ret 1, loadconst(e) = %d\n", loadconst(e)); 1918: goto ret1; 1919: } 1920: goto ret0; 1921: 1922: case OPvar: 1923: case OPind: 1924: if (tybasic(e->Ety) == TYldouble || 1925: tybasic(e->Ety) == TYildouble) 1926: goto ret1; 1927: case OPu16_d: 1928: case OPs16_d: 1929: case OPs32_d: 1930: goto ret0; 1931: 1932: case OPs64_d: 1933: goto ret1; 1934: 1935: default: 1936: goto ret1; 1937: } 1938: 1939: ret1: return 1; 1940: ret0: return 0; 1941: } 1942: 1943: /******************************* 1944: * Perform an assignment to a long double/double/float. 1945: */ 1946: 1947: code *eq87(elem *e,regm_t *pretregs) 1948: { 1949: regm_t retregs; 1950: code *c1,*c2; 1951: code cs; 1952: unsigned op1; 1953: unsigned op2; 1954: tym_t ty1; 1955: 1956: //printf("+eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 1957: assert(e->Eoper == OPeq); 1958: retregs = mST0 | (*pretregs & mPSW); 1959: c1 = codelem(e->E2,&retregs,FALSE); 1960: ty1 = tybasic(e->E1->Ety); 1961: switch (ty1) 1962: { case TYdouble_alias: 1963: case TYidouble: 1964: case TYdouble: op1 = ESC(MFdouble,1); op2 = 3; break; 1965: case TYifloat: 1966: case TYfloat: op1 = ESC(MFfloat,1); op2 = 3; break; 1967: case TYildouble: 1968: case TYldouble: op1 = 0xDB; op2 = 7; break; 1969: default: 1970: assert(0); 1971: } 1972: if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) // if want result on stack too 1973: { 1974: if (ty1 == TYldouble || ty1 == TYildouble) 1975: { 1976: c1 = cat(c1,push87()); 1977: c1 = genf2(c1,0xD9,0xC0); // FLD ST(0) 1978: pop87(); 1979: } 1980: else 1981: op2 = 2; // FST e->E1 1982: } 1983: else 1984: { // FSTP e->E1 1985: pop87(); 1986: } 1987: #if 0 1988: // Doesn't work if ST(0) gets saved to the stack by getlvalue() 1989: c2 = loadea(e->E1,&cs,op1,op2,0,0,0); 1990: #else 1991: cs.Irex = 0; 1992: cs.Iflags = 0; 1993: cs.Iop = op1; 1994: if (*pretregs & (mST0 | ALLREGS | mBP)) // if want result on stack too 1995: { // Make sure it's still there 1996: elem *e2 = e->E2; 1997: while (e2->Eoper == OPcomma) 1998: e2 = e2->E2; 1999: note87(e2,0,0); 2000: c2 = getlvalue(&cs, e->E1, 0); 2001: c2 = cat(c2,makesure87(e2,0,0,1)); 2002: } 2003: else 2004: { 2005: c2 = getlvalue(&cs, e->E1, 0); 2006: } 2007: cs.Irm |= modregrm(0,op2,0); // OR in reg field 2008: if (I32) 2009: cs.Iflags &= ~CFopsize; 2010: else if (ADDFWAIT()) 2011: cs.Iflags |= CFwait; 2012: else if (I64) 2013: cs.Irex &= ~REX_W; 2014: c2 = gen(c2, &cs); 2015: #if LNGDBLSIZE == 12 2016: if (tysize[TYldouble] == 12) 2017: { 2018: /* This deals with the fact that 10 byte reals really 2019: * occupy 12 bytes by zeroing the extra 2 bytes. 2020: */ 2021: if (op1 == 0xDB) 2022: { 2023: cs.Iop = 0xC7; // MOV EA+10,0 2024: NEWREG(cs.Irm, 0); 2025: cs.IEV1.sp.Voffset += 10; 2026: cs.IFL2 = FLconst; 2027: cs.IEV2.Vint = 0; 2028: cs.Iflags |= CFopsize; 2029: c2 = gen(c2, &cs); 2030: } 2031: } 2032: #endif 2033: if (tysize[TYldouble] == 16) 2034: { 2035: /* This deals with the fact that 10 byte reals really 2036: * occupy 16 bytes by zeroing the extra 6 bytes. 2037: */ 2038: if (op1 == 0xDB) 2039: { 2040: cs.Irex &= ~REX_W; 2041: cs.Iop = 0xC7; // MOV EA+10,0 2042: NEWREG(cs.Irm, 0); 2043: cs.IEV1.sp.Voffset += 10; 2044: cs.IFL2 = FLconst; 2045: cs.IEV2.Vint = 0; 2046: cs.Iflags |= CFopsize; 2047: c2 = gen(c2, &cs); 2048: 2049: cs.IEV1.sp.Voffset += 2; 2050: cs.Iflags &= ~CFopsize; 2051: c2 = gen(c2, &cs); 2052: } 2053: } 2054: #endif 2055: c2 = genfwait(c2); 2056: freenode(e->E1); 2057: c1 = cat3(c1,c2,fixresult87(e,mST0 | mPSW,pretregs)); 2058: return c1; 2059: } 2060: 2061: /******************************* 2062: * Perform an assignment to a long double/double/float. 2063: */ 2064: 2065: code *complex_eq87(elem *e,regm_t *pretregs) 2066: { 2067: regm_t retregs; 2068: code *c1,*c2; 2069: code cs; 2070: unsigned op1; 2071: unsigned op2; 2072: unsigned sz; 2073: tym_t ty1; 2074: int fxch = 0; 2075: 2076: //printf("complex_eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 2077: assert(e->Eoper == OPeq); 2078: cs.Iflags = ADDFWAIT() ? CFwait : 0; 2079: cs.Irex = 0; 2080: retregs = mST01 | (*pretregs & mPSW); 2081: c1 = codelem(e->E2,&retregs,FALSE); 2082: ty1 = tybasic(e->E1->Ety); 2083: switch (ty1) 2084: { 2085: case TYcdouble: op1 = ESC(MFdouble,1); op2 = 3; break; 2086: case TYcfloat: op1 = ESC(MFfloat,1); op2 = 3; break; 2087: case TYcldouble: op1 = 0xDB; op2 = 7; break; 2088: default: 2089: assert(0); 2090: } 2091: if (*pretregs & (mST01 | mXMM0 | mXMM1)) // if want result on stack too 2092: { 2093: if (ty1 == TYcldouble) 2094: { 2095: c1 = cat(c1,push87()); 2096: c1 = cat(c1,push87()); 2097: c1 = genf2(c1,0xD9,0xC0 + 1); // FLD ST(1) 2098: genf2(c1,0xD9,0xC0 + 1); // FLD ST(1) 2099: pop87(); 2100: pop87(); 2101: } 2102: else 2103: { op2 = 2; // FST e->E1 2104: fxch = 1; 2105: } 2106: } 2107: else 2108: { // FSTP e->E1 2109: pop87(); 2110: pop87(); 2111: } 2112: sz = tysize(ty1) / 2; 2113: if (*pretregs & (mST01 | mXMM0 | mXMM1)) 2114: { 2115: cs.Iflags = 0; 2116: cs.Irex = 0; 2117: cs.Iop = op1; 2118: c2 = getlvalue(&cs, e->E1, 0); 2119: cs.IEVoffset1 += sz; 2120: cs.Irm |= modregrm(0, op2, 0); 2121: c2 = cat(c2, makesure87(e->E2, sz, 0, 0)); 2122: c2 = gen(c2, &cs); 2123: c2 = genfwait(c2); 2124: c2 = cat(c2, makesure87(e->E2, 0, 1, 0)); 2125: } 2126: else 2127: { 2128: c2 = loadea(e->E1,&cs,op1,op2,sz,0,0); 2129: c2 = genfwait(c2); 2130: } 2131: if (fxch) 2132: c2 = genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1) 2133: cs.IEVoffset1 -= sz; 2134: gen(c2, &cs); 2135: if (fxch) 2136: genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1) 2137: if (tysize[TYldouble] == 12) 2138: { 2139: if (op1 == 0xDB) 2140: { 2141: cs.Iop = 0xC7; // MOV EA+10,0 2142: NEWREG(cs.Irm, 0); 2143: cs.IEV1.sp.Voffset += 10; 2144: cs.IFL2 = FLconst; 2145: cs.IEV2.Vint = 0; 2146: cs.Iflags |= CFopsize; 2147: c2 = gen(c2, &cs); 2148: cs.IEVoffset1 += 12; 2149: c2 = gen(c2, &cs); // MOV EA+22,0 2150: } 2151: } 2152: if (tysize[TYldouble] == 16) 2153: { 2154: if (op1 == 0xDB) 2155: { 2156: cs.Iop = 0xC7; // MOV EA+10,0 2157: NEWREG(cs.Irm, 0); 2158: cs.IEV1.sp.Voffset += 10; 2159: cs.IFL2 = FLconst; 2160: cs.IEV2.Vint = 0; 2161: cs.Iflags |= CFopsize; 2162: c2 = gen(c2, &cs); 2163: 2164: cs.IEV1.sp.Voffset += 2; 2165: cs.Iflags &= ~CFopsize; 2166: c2 = gen(c2, &cs); 2167: 2168: cs.IEV1.sp.Voffset += 14; 2169: cs.Iflags |= CFopsize; 2170: c2 = gen(c2, &cs); 2171: 2172: cs.IEV1.sp.Voffset += 2; 2173: cs.Iflags &= ~CFopsize; 2174: c2 = gen(c2, &cs); 2175: } 2176: } 2177: c2 = genfwait(c2); 2178: freenode(e->E1); 2179: return cat3(c1,c2,fixresult_complex87(e,mST01 | mPSW,pretregs)); 2180: } 2181: 2182: /******************************* 2183: * Perform an assignment while converting to integral type, 2184: * i.e. handle (e1 = (int) e2) 2185: */ 2186: 2187: code *cnvteq87(elem *e,regm_t *pretregs) 2188: { 2189: regm_t retregs; 2190: code *c1,*c2; 2191: code cs; 2192: unsigned op1; 2193: unsigned op2; 2194: 2195: assert(e->Eoper == OPeq); 2196: assert(!*pretregs); 2197: retregs = mST0; 2198: elem_debug(e->E2); 2199: c1 = codelem(e->E2->E1,&retregs,FALSE); 2200: 2201: switch (e->E2->Eoper) 2202: { case OPd_s16: 2203: op1 = ESC(MFword,1); 2204: op2 = 3; 2205: break; 2206: case OPd_s32: 2207: case OPd_u16: 2208: op1 = ESC(MFlong,1); 2209: op2 = 3; 2210: break; 2211: case OPd_s64: 2212: op1 = 0xDF; 2213: op2 = 7; 2214: break; 2215: default: 2216: assert(0); 2217: } 2218: freenode(e->E2); 2219: 2220: c1 = genfwait(c1); 2221: c1 = genrnd(c1, CW_roundto0); // FLDCW roundto0 2222: 2223: pop87(); 2224: cs.Iflags = ADDFWAIT() ? CFwait : 0; 2225: if (e->E1->Eoper == OPvar) 2226: notreg(e->E1); // cannot be put in register anymore 2227: c2 = loadea(e->E1,&cs,op1,op2,0,0,0); 2228: 2229: c2 = genfwait(c2); 2230: c2 = genrnd(c2, CW_roundtonearest); // FLDCW roundtonearest 2231: 2232: freenode(e->E1); 2233: return cat(c1,c2); 2234: } 2235: 2236: /********************************** 2237: * Perform +=, -=, *= and /= for doubles. 2238: */ 2239: 2240: code *opass87(elem *e,regm_t *pretregs) 2241: { 2242: regm_t retregs; 2243: code *cl,*cr,*c; 2244: code cs; 2245: unsigned op; 2246: unsigned opld; 2247: unsigned op1; 2248: unsigned op2; 2249: tym_t ty1; 2250: 2251: ty1 = tybasic(e->E1->Ety); 2252: switch (ty1) 2253: { case TYdouble_alias: 2254: case TYidouble: 2255: case TYdouble: op1 = ESC(MFdouble,1); op2 = 3; break; 2256: case TYifloat: 2257: case TYfloat: op1 = ESC(MFfloat,1); op2 = 3; break; 2258: case TYildouble: 2259: case TYldouble: op1 = 0xDB; op2 = 7; break; 2260: 2261: case TYcfloat: 2262: case TYcdouble: 2263: case TYcldouble: 2264: return (e->Eoper == OPmodass) 2265: ? opmod_complex87(e, pretregs) 2266: : opass_complex87(e, pretregs); 2267: 2268: default: 2269: assert(0); 2270: } 2271: switch (e->Eoper) 2272: { case OPpostinc: 2273: case OPaddass: op = 0 << 3; opld = 0xC1; break; // FADD 2274: case OPpostdec: 2275: case OPminass: op = 5 << 3; opld = 0xE1; /*0xE9;*/ break; // FSUBR 2276: case OPmulass: op = 1 << 3; opld = 0xC9; break; // FMUL 2277: case OPdivass: op = 7 << 3; opld = 0xF1; break; // FDIVR 2278: case OPmodass: break; 2279: default: assert(0); 2280: } 2281: retregs = mST0; 2282: cr = codelem(e->E2,&retregs,FALSE); // evaluate rvalue 2283: note87(e->E2,0,0); 2284: cl = getlvalue(&cs,e->E1,0); 2285: cl = cat(cl,makesure87(e->E2,0,0,0)); 2286: cs.Iflags |= ADDFWAIT() ? CFwait : 0; 2287: if (I32) 2288: cs.Iflags &= ~CFopsize; 2289: if (config.flags4 & CFG4fdivcall && e->Eoper == OPdivass) 2290: { 2291: c = push87(); 2292: cs.Iop = op1; 2293: if (ty1 == TYldouble || ty1 == TYildouble) 2294: cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2295: c = gen(c,&cs); 2296: c = genf2(c,0xD9,0xC8 + 1); // FXCH ST(1) 2297: c = cat(c,callclib(e,CLIBfdiv87,&retregs,0)); 2298: pop87(); 2299: } 2300: else if (e->Eoper == OPmodass) 2301: { 2302: /* 2303: * fld tbyte ptr y 2304: * fld tbyte ptr x // ST = x, ST1 = y 2305: * FM1: // We don't use fprem1 because for some inexplicable 2306: * // reason we get -5 when we do _modulo(15, 10) 2307: * fprem // ST = ST % ST1 2308: * fstsw word ptr sw 2309: * fwait 2310: * mov AH,byte ptr sw+1 // get msb of status word in AH 2311: * sahf // transfer to flags 2312: * jp FM1 // continue till ST < ST1 2313: * fstp ST(1) // leave remainder on stack 2314: */ 2315: code *c1; 2316: 2317: c = push87(); 2318: cs.Iop = op1; 2319: if (ty1 == TYldouble || ty1 == TYildouble) 2320: cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2321: c = gen(c,&cs); // FLD e->E1 2322: 2323: c1 = gen2(NULL, 0xD9, 0xF8); // FPREM 2324: c1 = cg87_87topsw(c1); 2325: c1 = genjmp(c1, JP, FLcode, (block *)c1); // JP FM1 2326: c1 = genf2(c1,0xDD,0xD8 + 1); // FSTP ST(1) 2327: c = cat(c,c1); 2328: 2329: pop87(); 2330: } 2331: else if (ty1 == TYldouble || ty1 == TYildouble) 2332: { 2333: c = push87(); 2334: cs.Iop = op1; 2335: cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2336: c = gen(c,&cs); // FLD e->E1 2337: genf2(c,0xDE,opld); // FopP ST(1) 2338: pop87(); 2339: } 2340: else 2341: { cs.Iop = op1 & ~1; 2342: cs.Irm |= op; 2343: c = gen(CNIL,&cs); // Fop e->E1 2344: } 2345: if (*pretregs & mPSW) 2346: genftst(c,e,0); // FTST ST0 2347: /* if want result in registers */ 2348: if (*pretregs & (mST0 | ALLREGS | mBP)) 2349: { 2350: if (ty1 == TYldouble || ty1 == TYildouble) 2351: { 2352: c = cat(c,push87()); 2353: c = genf2(c,0xD9,0xC0); // FLD ST(0) 2354: pop87(); 2355: } 2356: else 2357: op2 = 2; // FST e->E1 2358: } 2359: else 2360: { // FSTP 2361: pop87(); 2362: } 2363: cs.Iop = op1; 2364: NEWREG(cs.Irm,op2); // FSTx e->E1 2365: freenode(e->E1); 2366: gen(c,&cs); 2367: genfwait(c); 2368: return cat4(cr,cl,c,fixresult87(e,mST0 | mPSW,pretregs)); 2369: } 2370: 2371: /*********************************** 2372: * Perform %= where E1 is complex and E2 is real or imaginary. 2373: */ 2374: 2375: code *opmod_complex87(elem *e,regm_t *pretregs) 2376: { 2377: regm_t retregs; 2378: code *cl,*cr,*c; 2379: code cs; 2380: tym_t ty1; 2381: unsigned sz2; 2382: 2383: /* fld E2 2384: fld E1.re 2385: FM1: fprem 2386: fstsw word ptr sw 2387: fwait 2388: mov AH, byte ptr sw+1 2389: jp FM1 2390: fxch ST(1) 2391: fld E1.im 2392: FM2: fprem 2393: fstsw word ptr sw 2394: fwait 2395: mov AH, byte ptr sw+1 2396: jp FM2 2397: fstp ST(1) 2398: */ 2399: 2400: ty1 = tybasic(e->E1->Ety); 2401: sz2 = tysize[ty1] / 2; 2402: 2403: retregs = mST0; 2404: cr = codelem(e->E2,&retregs,FALSE); // FLD E2 2405: note87(e->E2,0,0); 2406: cl = getlvalue(&cs,e->E1,0); 2407: cl = cat(cl,makesure87(e->E2,0,0,0)); 2408: cs.Iflags |= ADDFWAIT() ? CFwait : 0; 2409: if (!I16) 2410: cs.Iflags &= ~CFopsize; 2411: 2412: c = push87(); 2413: switch (ty1) 2414: { 2415: case TYcdouble: cs.Iop = ESC(MFdouble,1); break; 2416: case TYcfloat: cs.Iop = ESC(MFfloat,1); break; 2417: case TYcldouble: cs.Iop = 0xDB; cs.Irm |= modregrm(0, 5, 0); break; 2418: default: 2419: assert(0); 2420: } 2421: c = gen(c,&cs); // FLD E1.re 2422: 2423: code *c1; 2424: 2425: c1 = gen2(NULL, 0xD9, 0xF8); // FPREM 2426: c1 = cg87_87topsw(c1); 2427: c1 = genjmp(c1, JP, FLcode, (block *)c1); // JP FM1 2428: c1 = genf2(c1, 0xD9, 0xC8 + 1); // FXCH ST(1) 2429: c = cat(c,c1); 2430: 2431: c = cat(c, push87()); 2432: cs.IEVoffset1 += sz2; 2433: gen(c, &cs); // FLD E1.im 2434: 2435: c1 = gen2(NULL, 0xD9, 0xF8); // FPREM 2436: c1 = cg87_87topsw(c1); 2437: c1 = genjmp(c1, JP, FLcode, (block *)c1); // JP FM2 2438: c1 = genf2(c1,0xDD,0xD8 + 1); // FSTP ST(1) 2439: c = cat(c,c1); 2440: 2441: pop87(); 2442: 2443: if (*pretregs & (mST01 | mPSW)) 2444: { 2445: cs.Irm |= modregrm(0, 2, 0); 2446: gen(c, &cs); // FST mreal.im 2447: cs.IEVoffset1 -= sz2; 2448: gen(c, &cs); // FST mreal.re 2449: retregs = mST01; 2450: } 2451: else 2452: { 2453: cs.Irm |= modregrm(0, 3, 0); 2454: gen(c, &cs); // FSTP mreal.im 2455: cs.IEVoffset1 -= sz2; 2456: gen(c, &cs); // FSTP mreal.re 2457: pop87(); 2458: pop87(); 2459: retregs = 0; 2460: } 2461: freenode(e->E1); 2462: genfwait(c); 2463: return cat4(cr,cl,c,fixresult_complex87(e,retregs,pretregs)); 2464: } 2465: 2466: /********************************** 2467: * Perform +=, -=, *= and /= for the lvalue being complex. 2468: */ 2469: 2470: code *opass_complex87(elem *e,regm_t *pretregs) 2471: { 2472: regm_t retregs; 2473: regm_t idxregs; 2474: code *cl,*cr,*c; 2475: code cs; 2476: unsigned op; 2477: unsigned op2; 2478: tym_t ty1; 2479: unsigned sz2; 2480: 2481: ty1 = tybasic(e->E1->Ety); 2482: sz2 = tysize[ty1] / 2; 2483: switch (e->Eoper) 2484: { case OPpostinc: 2485: case OPaddass: op = 0 << 3; // FADD 2486: op2 = 0xC0; // FADDP ST(i),ST 2487: break; 2488: case OPpostdec: 2489: case OPminass: op = 5 << 3; // FSUBR 2490: op2 = 0xE0; // FSUBRP ST(i),ST 2491: break; 2492: case OPmulass: op = 1 << 3; // FMUL 2493: op2 = 0xC8; // FMULP ST(i),ST 2494: break; 2495: case OPdivass: op = 7 << 3; // FDIVR 2496: op2 = 0xF0; // FDIVRP ST(i),ST 2497: break; 2498: default: assert(0); 2499: } 2500: 2501: if (!tycomplex(e->E2->Ety) && 2502: (e->Eoper == OPmulass || e->Eoper == OPdivass)) 2503: { 2504: retregs = mST0; 2505: cr = codelem(e->E2, &retregs, FALSE); 2506: note87(e->E2, 0, 0); 2507: cl = getlvalue(&cs, e->E1, 0); 2508: cl = cat(cl,makesure87(e->E2,0,0,0)); 2509: cl = cat(cl,push87()); 2510: cl = genf2(cl,0xD9,0xC0); // FLD ST(0) 2511: goto L1; 2512: } 2513: else 2514: { 2515: cr = loadComplex(e->E2); 2516: cl = getlvalue(&cs,e->E1,0); 2517: cl = cat(cl,makesure87(e->E2,sz2,0,0)); 2518: cl = cat(cl,makesure87(e->E2,0,1,0)); 2519: } 2520: cs.Iflags |= ADDFWAIT() ? CFwait : 0; 2521: if (!I16) 2522: cs.Iflags &= ~CFopsize; 2523: 2524: switch (e->Eoper) 2525: { 2526: case OPpostinc: 2527: case OPaddass: 2528: case OPpostdec: 2529: case OPminass: 2530: L1: 2531: if (ty1 == TYcldouble) 2532: { 2533: c = push87(); 2534: c = cat(c, push87()); 2535: cs.Iop = 0xDB; 2536: cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2537: c = gen(c,&cs); // FLD e->E1.re 2538: cs.IEVoffset1 += sz2; 2539: gen(c,&cs); // FLD e->E1.im 2540: genf2(c, 0xDE, op2 + 2); // FADDP/FSUBRP ST(2),ST 2541: genf2(c, 0xDE, op2 + 2); // FADDP/FSUBRP ST(2),ST 2542: pop87(); 2543: pop87(); 2544: if (tyimaginary(e->E2->Ety)) 2545: { 2546: if (e->Eoper == OPmulass) 2547: { 2548: genf2(c, 0xD9, 0xE0); // FCHS 2549: genf2(c, 0xD9, 0xC8+1); // FXCH ST(1) 2550: } 2551: else if (e->Eoper == OPdivass) 2552: { 2553: genf2(c, 0xD9, 0xC8+1); // FXCH ST(1) 2554: genf2(c, 0xD9, 0xE0); // FCHS 2555: } 2556: } 2557: L2: 2558: if (*pretregs & (mST01 | mPSW)) 2559: { 2560: c = cat(c,push87()); 2561: c = cat(c,push87()); 2562: c = genf2(c,0xD9,0xC1); // FLD ST(1) 2563: c = genf2(c,0xD9,0xC1); // FLD ST(1) 2564: retregs = mST01; 2565: } 2566: else 2567: retregs = 0; 2568: cs.Iop = 0xDB; 2569: cs.Irm |= modregrm(0,7,0); 2570: gen(c,&cs); // FSTP e->E1.im 2571: cs.IEVoffset1 -= sz2; 2572: gen(c,&cs); // FSTP e->E1.re 2573: pop87(); 2574: pop87(); 2575: 2576: } 2577: else 2578: { unsigned char rmop = cs.Irm | op; 2579: unsigned char rmfst = cs.Irm | modregrm(0,2,0); 2580: unsigned char rmfstp = cs.Irm | modregrm(0,3,0); 2581: unsigned char iopfst = (ty1 == TYcfloat) ? 0xD9 : 0xDD; 2582: unsigned char iop = (ty1 == TYcfloat) ? 0xD8 : 0xDC; 2583: 2584: cs.Iop = iop; 2585: cs.Irm = rmop; 2586: cs.IEVoffset1 += sz2; 2587: c = gen(NULL, &cs); // FSUBR mreal.im 2588: if (tyimaginary(e->E2->Ety) && (e->Eoper == OPmulass || e->Eoper == OPdivass)) 2589: { 2590: if (e->Eoper == OPmulass) 2591: genf2(c, 0xD9, 0xE0); // FCHS 2592: genf2(c,0xD9,0xC8 + 1); // FXCH ST(1) 2593: cs.IEVoffset1 -= sz2; 2594: gen(c, &cs); // FMUL mreal.re 2595: if (e->Eoper == OPdivass) 2596: genf2(c, 0xD9, 0xE0); // FCHS 2597: if (*pretregs & (mST01 | mPSW)) 2598: { 2599: cs.Iop = iopfst; 2600: cs.Irm = rmfst; 2601: cs.IEVoffset1 += sz2; 2602: gen(c, &cs); // FST mreal.im 2603: genf2(c,0xD9,0xC8 + 1); // FXCH ST(1) 2604: cs.IEVoffset1 -= sz2; 2605: gen(c, &cs); // FST mreal.re 2606: genf2(c,0xD9,0xC8 + 1); // FXCH ST(1) 2607: retregs = mST01; 2608: } 2609: else 2610: { 2611: cs.Iop = iopfst; 2612: cs.Irm = rmfstp; 2613: cs.IEVoffset1 += sz2; 2614: gen(c, &cs); // FSTP mreal.im 2615: pop87(); 2616: cs.IEVoffset1 -= sz2; 2617: gen(c, &cs); // FSTP mreal.re 2618: pop87(); 2619: retregs = 0; 2620: } 2621: goto L3; 2622: } 2623: 2624: if (*pretregs & (mST01 | mPSW)) 2625: { 2626: cs.Iop = iopfst; 2627: cs.Irm = rmfst; 2628: gen(c, &cs); // FST mreal.im 2629: genf2(c,0xD9,0xC8 + 1); // FXCH ST(1) 2630: cs.Iop = iop; 2631: cs.Irm = rmop; 2632: cs.IEVoffset1 -= sz2; 2633: gen(c, &cs); // FSUBR mreal.re 2634: cs.Iop = iopfst; 2635: cs.Irm = rmfst; 2636: gen(c, &cs); // FST mreal.re 2637: genf2(c,0xD9,0xC8 + 1); // FXCH ST(1) 2638: retregs = mST01; 2639: } 2640: else 2641: { 2642: cs.Iop = iopfst; 2643: cs.Irm = rmfstp; 2644: gen(c, &cs); // FSTP mreal.im 2645: pop87(); 2646: cs.Iop = iop; 2647: cs.Irm = rmop; 2648: cs.IEVoffset1 -= sz2; 2649: gen(c, &cs); // FSUBR mreal.re 2650: cs.Iop = iopfst; 2651: cs.Irm = rmfstp; 2652: gen(c, &cs); // FSTP mreal.re 2653: pop87(); 2654: retregs = 0; 2655: } 2656: } 2657: L3: 2658: freenode(e->E1); 2659: genfwait(c); 2660: return cat4(cr,cl,c,fixresult_complex87(e,retregs,pretregs)); 2661: 2662: case OPmulass: 2663: c = push87(); 2664: c = cat(c, push87()); 2665: if (ty1 == TYcldouble) 2666: { 2667: cs.Iop = 0xDB; 2668: cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2669: c = gen(c,&cs); // FLD e->E1.re 2670: cs.IEVoffset1 += sz2; 2671: gen(c,&cs); // FLD e->E1.im 2672: retregs = mST01; 2673: c = cat(c,callclib(e, CLIBcmul, &retregs, 0)); 2674: goto L2; 2675: } 2676: else 2677: { 2678: cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD; 2679: cs.Irm |= modregrm(0, 0, 0); // FLD tbyte ptr ... 2680: c = gen(c,&cs); // FLD e->E1.re 2681: cs.IEVoffset1 += sz2; 2682: gen(c,&cs); // FLD e->E1.im 2683: retregs = mST01; 2684: c = cat(c,callclib(e, CLIBcmul, &retregs, 0)); 2685: if (*pretregs & (mST01 | mPSW)) 2686: { 2687: cs.Irm |= modregrm(0, 2, 0); 2688: gen(c, &cs); // FST mreal.im 2689: cs.IEVoffset1 -= sz2; 2690: gen(c, &cs); // FST mreal.re 2691: retregs = mST01; 2692: } 2693: else 2694: { 2695: cs.Irm |= modregrm(0, 3, 0); 2696: gen(c, &cs); // FSTP mreal.im 2697: cs.IEVoffset1 -= sz2; 2698: gen(c, &cs); // FSTP mreal.re 2699: pop87(); 2700: pop87(); 2701: retregs = 0; 2702: } 2703: goto L3; 2704: } 2705: 2706: case OPdivass: 2707: c = push87(); 2708: c = cat(c, push87()); 2709: idxregs = idxregm(&cs); // mask of index regs used 2710: if (ty1 == TYcldouble) 2711: { 2712: cs.Iop = 0xDB; 2713: cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2714: c = gen(c,&cs); // FLD e->E1.re 2715: genf2(c,0xD9,0xC8 + 2); // FXCH ST(2) 2716: cs.IEVoffset1 += sz2; 2717: gen(c,&cs); // FLD e->E1.im 2718: genf2(c,0xD9,0xC8 + 2); // FXCH ST(2) 2719: retregs = mST01; 2720: c = cat(c,callclib(e, CLIBcdiv, &retregs, idxregs)); 2721: goto L2; 2722: } 2723: else 2724: { 2725: cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD; 2726: cs.Irm |= modregrm(0, 0, 0); // FLD tbyte ptr ... 2727: c = gen(c,&cs); // FLD e->E1.re 2728: genf2(c,0xD9,0xC8 + 2); // FXCH ST(2) 2729: cs.IEVoffset1 += sz2; 2730: gen(c,&cs); // FLD e->E1.im 2731: genf2(c,0xD9,0xC8 + 2); // FXCH ST(2) 2732: retregs = mST01; 2733: c = cat(c,callclib(e, CLIBcdiv, &retregs, idxregs)); 2734: if (*pretregs & (mST01 | mPSW)) 2735: { 2736: cs.Irm |= modregrm(0, 2, 0); 2737: gen(c, &cs); // FST mreal.im 2738: cs.IEVoffset1 -= sz2; 2739: gen(c, &cs); // FST mreal.re 2740: retregs = mST01; 2741: } 2742: else 2743: { 2744: cs.Irm |= modregrm(0, 3, 0); 2745: gen(c, &cs); // FSTP mreal.im 2746: cs.IEVoffset1 -= sz2; 2747: gen(c, &cs); // FSTP mreal.re 2748: pop87(); 2749: pop87(); 2750: retregs = 0; 2751: } 2752: goto L3; 2753: } 2754: 2755: default: 2756: assert(0); 2757: } 2758: return NULL; 2759: } 2760: 2761: /************************** 2762: * OPnegass 2763: */ 2764: 2765: code *cdnegass87(elem *e,regm_t *pretregs) 2766: { regm_t retregs; 2767: tym_t tyml; 2768: unsigned op; 2769: code *cl,*cr,*c,cs; 2770: elem *e1; 2771: int sz; 2772: 2773: //printf("cdnegass87(e = %p, *pretregs = x%x)\n", e, *pretregs); 2774: e1 = e->E1; 2775: tyml = tybasic(e1->Ety); // type of lvalue 2776: sz = tysize[tyml]; 2777: 2778: cl = getlvalue(&cs,e1,0); 2779: cr = modEA(&cs); 2780: cs.Irm |= modregrm(0,6,0); 2781: cs.Iop = 0x80; 2782: cs.Irex = 0; 2783: #if LNGDBLSIZE > 10 2784: if (tyml == TYldouble || tyml == TYildouble) 2785: cs.IEVoffset1 += 10 - 1; 2786: else if (tyml == TYcldouble) 2787: cs.IEVoffset1 += tysize[TYldouble] + 10 - 1; 2788: else 2789: #endif 2790: cs.IEVoffset1 += sz - 1; 2791: cs.IFL2 = FLconst; 2792: cs.IEV2.Vuns = 0x80; 2793: c = gen(NULL,&cs); // XOR 7[EA],0x80 2794: if (tycomplex(tyml)) 2795: { 2796: cs.IEVoffset1 -= sz / 2; 2797: gen(c,&cs); // XOR 7[EA],0x80 2798: } 2799: c = cat3(cl,cr,c); 2800: 2801: if (*pretregs) 2802: { 2803: switch (tyml) 2804: { 2805: case TYifloat: 2806: case TYfloat: cs.Iop = 0xD9; op = 0; break; 2807: case TYidouble: 2808: case TYdouble: 2809: case TYdouble_alias: cs.Iop = 0xDD; op = 0; break; 2810: case TYildouble: 2811: case TYldouble: cs.Iop = 0xDB; op = 5; break; 2812: default: 2813: assert(0); 2814: } 2815: NEWREG(cs.Irm,op); 2816: cs.IEVoffset1 -= sz - 1; 2817: c = cat(c, push87()); 2818: c = gen(c,&cs); // FLD EA 2819: retregs = mST0; 2820: } 2821: else 2822: retregs = 0; 2823: 2824: freenode(e1); 2825: return cat(c,fixresult87(e,retregs,pretregs)); 2826: } 2827: 2828: /************************ 2829: * Take care of OPpostinc and OPpostdec. 2830: */ 2831: 2832: code *post87(elem *e,regm_t *pretregs) 2833: { 2834: regm_t retregs; 2835: code *cl,*cr,*c; 2836: code cs; 2837: unsigned op; 2838: unsigned op1; 2839: unsigned reg; 2840: tym_t ty1; 2841: 2842: //printf("post87()\n"); 2843: assert(*pretregs); 2844: cl = getlvalue(&cs,e->E1,0); 2845: cs.Iflags |= ADDFWAIT() ? CFwait : 0; 2846: if (!I16) 2847: cs.Iflags &= ~CFopsize; 2848: ty1 = tybasic(e->E1->Ety); 2849: switch (ty1) 2850: { case TYdouble_alias: 2851: case TYidouble: 2852: case TYdouble: 2853: case TYcdouble: op1 = ESC(MFdouble,1); reg = 0; break; 2854: case TYifloat: 2855: case TYfloat: 2856: case TYcfloat: op1 = ESC(MFfloat,1); reg = 0; break; 2857: case TYildouble: 2858: case TYldouble: 2859: case TYcldouble: op1 = 0xDB; reg = 5; break; 2860: default: 2861: assert(0); 2862: } 2863: NEWREG(cs.Irm, reg); 2864: if (reg == 5) 2865: reg = 7; 2866: else 2867: reg = 3; 2868: cs.Iop = op1; 2869: cl = cat(cl,push87()); 2870: cl = gen(cl,&cs); // FLD e->E1 2871: if (tycomplex(ty1)) 2872: { unsigned sz = tysize[ty1] / 2; 2873: 2874: cl = cat(cl,push87()); 2875: cs.IEVoffset1 += sz; 2876: cl = gen(cl,&cs); // FLD e->E1 2877: retregs = mST0; // note kludge to only load real part 2878: cr = codelem(e->E2,&retregs,FALSE); // load rvalue 2879: c = genf2(NULL,0xD8, // FADD/FSUBR ST,ST2 2880: (e->Eoper == OPpostinc) ? 0xC0 + 2 : 0xE8 + 2); 2881: NEWREG(cs.Irm,reg); 2882: pop87(); 2883: cs.IEVoffset1 -= sz; 2884: gen(c,&cs); // FSTP e->E1 2885: genfwait(c); 2886: freenode(e->E1); 2887: return cat4(cl, cr, c, fixresult_complex87(e, mST01, pretregs)); 2888: } 2889: 2890: if (*pretregs & (mST0 | ALLREGS | mBP)) 2891: { // Want the result in a register 2892: cl = cat(cl,push87()); 2893: genf2(cl,0xD9,0xC0); // FLD ST0 2894: } 2895: if (*pretregs & mPSW) /* if result in flags */ 2896: genftst(cl,e,0); // FTST ST0 2897: retregs = mST0; 2898: cr = codelem(e->E2,&retregs,FALSE); /* load rvalue */ 2899: pop87(); 2900: op = (e->Eoper == OPpostinc) ? modregrm(3,0,1) : modregrm(3,5,1); 2901: c = genf2(NULL,0xDE,op); // FADDP/FSUBRP ST1 2902: NEWREG(cs.Irm,reg); 2903: pop87(); 2904: gen(c,&cs); /* FSTP e->E1 */ 2905: genfwait(c); 2906: freenode(e->E1); 2907: return cat4(cl,cr,c,fixresult87(e,mPSW | mST0,pretregs)); 2908: } 2909: 2910: /************************ 2911: * Do the following opcodes: 2912: * OPd_s16 2913: * OPd_s32 2914: * OPd_u16 2915: * OPd_s64 2916: */ 2917: 2918: code *cnvt87(elem *e,regm_t *pretregs) 2919: { 2920: regm_t retregs; 2921: code *c1,*c2; 2922: unsigned mf,rf,reg; 2923: tym_t tym; 2924: int clib; 2925: int sz; 2926: int szoff; 2927: 2928: //printf("cnvt87(e = %p, *pretregs = x%x)\n", e, *pretregs); 2929: assert(*pretregs); 2930: tym = e->Ety; 2931: sz = tysize(tym); 2932: szoff = sz; 2933: unsigned grex = I64 ? REX_W << 16 : 0; 2934: 2935: switch (e->Eoper) 2936: { case OPd_s16: 2937: clib = CLIBdblint87; 2938: mf = ESC(MFword,1); 2939: rf = 3; 2940: break; 2941: 2942: case OPd_u16: 2943: szoff = 4; 2944: case OPd_s32: 2945: clib = CLIBdbllng87; 2946: mf = ESC(MFlong,1); 2947: rf = 3; 2948: break; 2949: 2950: case OPd_s64: 2951: clib = CLIBdblllng; 2952: mf = 0xDF; 2953: rf = 7; 2954: break; 2955: 2956: default: 2957: assert(0); 2958: } 2959: 2960: if (I16) // C may change the default control word 2961: { 2962: if (clib == CLIBdblllng) 2963: { retregs = I32 ? DOUBLEREGS_32 : DOUBLEREGS_16; 2964: c1 = codelem(e->E1,&retregs,FALSE); 2965: c2 = callclib(e,clib,pretregs,0); 2966: } 2967: else 2968: { retregs = mST0; //I32 ? DOUBLEREGS_32 : DOUBLEREGS_16; 2969: c1 = codelem(e->E1,&retregs,FALSE); 2970: c2 = callclib(e,clib,pretregs,0); 2971: pop87(); 2972: } 2973: } 2974: else if (1) 2975: { // Generate: 2976: // sub ESP,12 2977: // fstcw 8[ESP] 2978: // fldcw roundto0 2979: // fistp long64 ptr [ESP] 2980: // fldcw 8[ESP] 2981: // pop lsw 2982: // pop msw 2983: // add ESP,4 2984: 2985: unsigned szpush = szoff + 2; 2986: if (config.flags3 & CFG3pic) 2987: szpush += 2; 2988: szpush = (szpush + REGSIZE - 1) & ~(REGSIZE - 1); 2989: 2990: retregs = mST0; 2991: c1 = codelem(e->E1,&retregs,FALSE); 2992: 2993: if (szpush == REGSIZE) 2994: c1 = gen1(c1,0x50 + AX); // PUSH EAX 2995: else 2996: c1 = genc2(c1,0x81,grex | modregrm(3,5,SP), szpush); // SUB ESP,12 2997: c1 = genfwait(c1); 2998: genc1(c1,0xD9,modregrm(2,7,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FSTCW szoff[ESP] 2999: 3000: c1 = genfwait(c1); 3001: 3002: if (config.flags3 & CFG3pic) 3003: { 3004: genc(c1,0xC7,modregrm(2,0,4) + 256*modregrm(0,4,SP),FLconst,szoff+2,FLconst,CW_roundto0); // MOV szoff+2[ESP], CW_roundto0 3005: code_orflag(c1, CFopsize); 3006: genc1(c1,0xD9,modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff+2); // FLDCW szoff+2[ESP] 3007: } 3008: else 3009: c1 = genrnd(c1, CW_roundto0); // FLDCW roundto0 3010: 3011: pop87(); 3012: 3013: c1 = genfwait(c1); 3014: gen2sib(c1,mf,grex | modregrm(0,rf,4),modregrm(0,4,SP)); // FISTP [ESP] 3015: 3016: retregs = *pretregs & (ALLREGS | mBP); 3017: if (!retregs) 3018: retregs = ALLREGS; 3019: c2 = allocreg(&retregs,&reg,tym); 3020: 3021: c2 = genfwait(c2); // FWAIT 3022: c2 = genc1(c2,0xD9,grex | modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FLDCW szoff[ESP] 3023: 3024: if (szoff > REGSIZE) 3025: { szpush -= REGSIZE; 3026: c2 = genpop(c2,findreglsw(retregs)); // POP lsw 3027: } 3028: szpush -= REGSIZE; 3029: c2 = genpop(c2,reg); // POP reg 3030: 3031: if (szpush) 3032: genc2(c2,0x81,grex | modregrm(3,0,SP), szpush); // ADD ESP,4 3033: c2 = cat(c2,fixresult(e,retregs,pretregs)); 3034: } 3035: else 3036: { 3037: // This is incorrect. For -inf and nan, the 8087 returns the largest 3038: // negative int (0x80000....). For -inf, 0x7FFFF... should be returned, 3039: // and for nan, 0 should be returned. 3040: retregs = mST0; 3041: c1 = codelem(e->E1,&retregs,FALSE); 3042: 3043: c1 = genfwait(c1); 3044: c1 = genrnd(c1, CW_roundto0); // FLDCW roundto0 3045: 3046: pop87(); 3047: c1 = genfltreg(c1,mf,rf,0); // FISTP floatreg 3048: retregs = *pretregs & (ALLREGS | mBP); 3049: if (!retregs) 3050: retregs = ALLREGS; 3051: c2 = allocreg(&retregs,&reg,tym); 3052: 3053: c2 = genfwait(c2); 3054: 3055: if (sz > REGSIZE) 3056: { c2 = genfltreg(c2,0x8B,reg,REGSIZE); // MOV reg,floatreg + REGSIZE 3057: // MOV lsreg,floatreg 3058: genfltreg(c2,0x8B,findreglsw(retregs),0); 3059: } 3060: else 3061: c2 = genfltreg(c2,0x8B,reg,0); // MOV reg,floatreg 3062: c2 = genrnd(c2, CW_roundtonearest); // FLDCW roundtonearest 3063: c2 = cat(c2,fixresult(e,retregs,pretregs)); 3064: } 3065: return cat(c1,c2); 3066: } 3067: 3068: /************************ 3069: * Do OPrndtol. 3070: */ 3071: 3072: code *cdrndtol(elem *e,regm_t *pretregs) 3073: { 3074: regm_t retregs; 3075: code *c1,*c2; 3076: unsigned reg; 3077: tym_t tym; 3078: unsigned sz; 3079: unsigned char op1,op2; 3080: 3081: if (*pretregs == 0) 3082: return codelem(e->E1,pretregs,FALSE); 3083: tym = e->Ety; 3084: retregs = mST0; 3085: c1 = codelem(e->E1,&retregs,FALSE); 3086: 3087: sz = tysize(tym); 3088: switch (sz) 3089: { case 2: 3090: op1 = 0xDF; 3091: op2 = 3; 3092: break; 3093: case 4: 3094: op1 = 0xDB; 3095: op2 = 3; 3096: break; 3097: case 8: 3098: op1 = 0xDF; 3099: op2 = 7; 3100: break; 3101: default: 3102: assert(0); 3103: } 3104: 3105: pop87(); 3106: c1 = genfltreg(c1,op1,op2,0); // FISTP floatreg 3107: retregs = *pretregs & (ALLREGS | mBP); 3108: if (!retregs) 3109: retregs = ALLREGS; 3110: c2 = allocreg(&retregs,&reg,tym); 3111: c2 = genfwait(c2); // FWAIT 3112: if (tysize(tym) > REGSIZE) 3113: { c2 = genfltreg(c2,0x8B,reg,REGSIZE); // MOV reg,floatreg + REGSIZE 3114: // MOV lsreg,floatreg 3115: genfltreg(c2,0x8B,findreglsw(retregs),0); 3116: } 3117: else 3118: { 3119: c2 = genfltreg(c2,0x8B,reg,0); // MOV reg,floatreg 3120: if (tysize(tym) == 8 && I64) 3121: code_orrex(c2, REX_W); 3122: } 3123: c2 = cat(c2,fixresult(e,retregs,pretregs)); 3124: 3125: return cat(c1,c2); 3126: } 3127: 3128: /************************* 3129: * Do OPscale, OPyl2x, OPyl2xp1. 3130: */ 3131: 3132: code *cdscale(elem *e,regm_t *pretregs) 3133: { 3134: regm_t retregs; 3135: code *c1,*c2,*c3; 3136: 3137: assert(*pretregs != 0); 3138: 3139: retregs = mST0; 3140: c1 = codelem(e->E1,&retregs,FALSE); 3141: note87(e->E1,0,0); 3142: c2 = codelem(e->E2,&retregs,FALSE); 3143: c2 = cat(c2,makesure87(e->E1,0,1,0)); // now have x,y on stack; need y,x 3144: switch (e->Eoper) 3145: { 3146: case OPscale: 3147: c2 = genf2(c2,0xD9,0xFD); // FSCALE 3148: genf2(c2,0xDD,0xD8 + 1); // FSTP ST(1) 3149: break; 3150: 3151: case OPyl2x: 3152: c2 = genf2(c2,0xD9,0xF1); // FYL2X 3153: break; 3154: 3155: case OPyl2xp1: 3156: c2 = genf2(c2,0xD9,0xF9); // FYL2XP1 3157: break; 3158: } 3159: pop87(); 3160: c3 = fixresult87(e,mST0,pretregs); 3161: return cat3(c1,c2,c3); 3162: } 3163: 3164: 3165: /********************************** 3166: * Unary -, absolute value, square root, sine, cosine 3167: */ 3168: 3169: code *neg87(elem *e,regm_t *pretregs) 3170: { 3171: regm_t retregs; 3172: code *c1,*c2; 3173: int op; 3174: 3175: assert(*pretregs); 3176: switch (e->Eoper) 3177: { case OPneg: op = 0xE0; break; 3178: case OPabs: op = 0xE1; break; 3179: case OPsqrt: op = 0xFA; break; 3180: case OPsin: op = 0xFE; break; 3181: case OPcos: op = 0xFF; break; 3182: case OPrint: op = 0xFC; break; // FRNDINT 3183: default: 3184: assert(0); 3185: } 3186: retregs = mST0; 3187: c1 = codelem(e->E1,&retregs,FALSE); 3188: c1 = genf2(c1,0xD9,op); // FCHS/FABS/FSQRT/FSIN/FCOS/FRNDINT 3189: c2 = fixresult87(e,mST0,pretregs); 3190: return cat(c1,c2); 3191: } 3192: 3193: /********************************** 3194: * Unary - for complex operands 3195: */ 3196: 3197: code *neg_complex87(elem *e,regm_t *pretregs) 3198: { 3199: regm_t retregs; 3200: code *c1,*c2; 3201: 3202: assert(e->Eoper == OPneg); 3203: retregs = mST01; 3204: c1 = codelem(e->E1,&retregs,FALSE); 3205: c1 = genf2(c1,0xD9,0xE0); // FCHS 3206: genf2(c1,0xD9,0xC8 + 1); // FXCH ST(1) 3207: genf2(c1,0xD9,0xE0); // FCHS 3208: genf2(c1,0xD9,0xC8 + 1); // FXCH ST(1) 3209: c2 = fixresult_complex87(e,mST01,pretregs); 3210: return cat(c1,c2); 3211: } 3212: 3213: /********************************* 3214: */ 3215: 3216: code *cdind87(elem *e,regm_t *pretregs) 3217: { code *c,*ce,cs; 3218: 3219: //printf("cdind87(e = %p, *pretregs = x%x)\n",e,*pretregs); 3220: 3221: c = getlvalue(&cs,e,0); // get addressing mode 3222: if (*pretregs) 3223: { 3224: switch (tybasic(e->Ety)) 3225: { case TYfloat: 3226: case TYifloat: 3227: cs.Iop = 0xD9; 3228: break; 3229: 3230: case TYidouble: 3231: case TYdouble: 3232: case TYdouble_alias: 3233: cs.Iop = 0xDD; 3234: break; 3235: 3236: case TYildouble: 3237: case TYldouble: 3238: cs.Iop = 0xDB; 3239: cs.Irm |= modregrm(0,5,0); 3240: break; 3241: 3242: default: 3243: assert(0); 3244: } 3245: c = cat(c,push87()); 3246: c = gen(c,&cs); // FLD EA 3247: ce = fixresult87(e,mST0,pretregs); 3248: c = cat(c,ce); 3249: } 3250: return c; 3251: } 3252: 3253: /************************************ 3254: * Reset statics for another .obj file. 3255: */ 3256: 3257: void cg87_reset() 3258: { 3259: memset(&oldd,0,sizeof(oldd)); 3260: } 3261: 3262: 3263: /***************************************** 3264: * Initialize control word constants. 3265: */ 3266: 3267: STATIC code *genrnd(code *c, short cw) 3268: { 3269: if (config.flags3 & CFG3pic) 3270: { code *c1; 3271: 3272: c1 = genfltreg(NULL, 0xC7, 0, 0); // MOV floatreg, cw 3273: c1->IFL2 = FLconst; 3274: c1->IEV2.Vuns = cw; 3275: 3276: c1 = genfltreg(c1, 0xD9, 5, 0); // FLDCW floatreg 3277: c = cat(c, c1); 3278: } 3279: else 3280: { 3281: if (!oldd.round) // if not initialized 3282: { short cwi; 3283: 3284: oldd.round = 1; 3285: 3286: cwi = CW_roundto0; // round to 0 3287: oldd.roundto0 = out_readonly_sym(TYshort,&cwi,2); 3288: cwi = CW_roundtonearest; // round to nearest 3289: oldd.roundtonearest = out_readonly_sym(TYshort,&cwi,2); 3290: } 3291: symbol *rnddir = (cw == CW_roundto0) ? oldd.roundto0 : oldd.roundtonearest; 3292: code cs; 3293: cs.Iop = 0xD9; 3294: cs.Iflags = CFoff; 3295: cs.Irex = 0; 3296: cs.IEVsym1 = rnddir; 3297: cs.IFL1 = rnddir->Sfl; 3298: cs.IEVoffset1 = 0; 3299: cs.Irm = modregrm(0,5,BPRM); 3300: c = gen(c,&cs); 3301: } 3302: return c; 3303: } 3304: 3305: /************************* Complex Numbers *********************/ 3306: 3307: /*************************** 3308: * Set the PSW based on the state of ST01. 3309: * Input: 3310: * pop if stack should be popped after test 3311: * Returns: 3312: * start of code appended to c. 3313: */ 3314: 3315: STATIC code * genctst(code *c,elem *e,int pop) 3316: #if __DMC__ 3317: __in 3318: { 3319: assert(pop == 0 || pop == 1); 3320: } 3321: __body 3322: #endif 3323: { 3324: // Generate: 3325: // if (pop) 3326: // FLDZ 3327: // FUCOMPP 3328: // FSTSW AX 3329: // SAHF 3330: // FLDZ 3331: // FUCOMPP 3332: // JNE L1 3333: // JP L1 // if NAN 3334: // FSTSW AX 3335: // SAHF 3336: // L1: 3337: // else 3338: // FLDZ 3339: // FUCOM 3340: // FSTSW AX 3341: // SAHF 3342: // FUCOMP ST(2) 3343: // JNE L1 3344: // JP L1 // if NAN 3345: // FSTSW AX 3346: // SAHF 3347: // L1: 3348: // FUCOMP doesn't raise exceptions on QNANs, unlike FTST 3349: 3350: code *cnop; 3351: 3352: cnop = gennop(CNIL); 3353: c = cat(c,push87()); 3354: c = gen2(c,0xD9,0xEE); // FLDZ 3355: if (pop) 3356: { 3357: gen2(c,0xDA,0xE9); // FUCOMPP 3358: pop87(); 3359: pop87(); 3360: cg87_87topsw(c); // put 8087 flags in CPU flags 3361: gen2(c,0xD9,0xEE); // FLDZ 3362: gen2(c,0xDA,0xE9); // FUCOMPP 3363: pop87(); 3364: genjmp(c,JNE,FLcode,(block *) cnop); // JNE L1 3365: genjmp(c,JP, FLcode,(block *) cnop); // JP L1 3366: cg87_87topsw(c); // put 8087 flags in CPU flags 3367: } 3368: else 3369: { 3370: gen2(c,0xDD,0xE1); // FUCOM 3371: cg87_87topsw(c); // put 8087 flags in CPU flags 3372: gen2(c,0xDD,0xEA); // FUCOMP ST(2) 3373: pop87(); 3374: genjmp(c,JNE,FLcode,(block *) cnop); // JNE L1 3375: genjmp(c,JP, FLcode,(block *) cnop); // JP L1 3376: cg87_87topsw(c); // put 8087 flags in CPU flags 3377: } 3378: return cat(c, cnop); 3379: } 3380: 3381: /****************************** 3382: * Given the result of an expression is in retregs, 3383: * generate necessary code to return result in *pretregs. 3384: */ 3385: 3386: 3387: code *fixresult_complex87(elem *e,regm_t retregs,regm_t *pretregs) 3388: { 3389: tym_t tym; 3390: code *c1,*c2; 3391: unsigned sz; 3392: 3393: #if 0 3394: printf("fixresult_complex87(e = %p, retregs = %s, *pretregs = %s)\n", 3395: e,regm_str(retregs),regm_str(*pretregs)); 3396: #endif 3397: assert(!*pretregs || retregs); 3398: c1 = CNIL; 3399: c2 = CNIL; 3400: tym = tybasic(e->Ety); 3401: sz = tysize[tym]; 3402: 3403: if (*pretregs == 0 && retregs == mST01) 3404: { 3405: c1 = genf2(c1,0xDD,modregrm(3,3,0)); // FPOP 3406: pop87(); 3407: c1 = genf2(c1,0xDD,modregrm(3,3,0)); // FPOP 3408: pop87(); 3409: } 3410: else if (tym == TYcfloat && *pretregs & (mAX|mDX) && retregs & mST01) 3411: { 3412: if (*pretregs & mPSW && !(retregs & mPSW)) 3413: c1 = genctst(c1,e,0); // FTST 3414: pop87(); 3415: c1 = genfltreg(c1, ESC(MFfloat,1),3,0); // FSTP floatreg 3416: genfwait(c1); 3417: c2 = getregs(mDX|mAX); 3418: c2 = genfltreg(c2, 0x8B, DX, 0); // MOV EDX,floatreg 3419: 3420: pop87(); 3421: c2 = genfltreg(c2, ESC(MFfloat,1),3,0); // FSTP floatreg 3422: genfwait(c2); 3423: c2 = genfltreg(c2, 0x8B, AX, 0); // MOV EAX,floatreg 3424: } 3425: else if (tym == TYcfloat && retregs & (mAX|mDX) && *pretregs & mST01) 3426: { 3427: c1 = push87(); 3428: c1 = genfltreg(c1, 0x89, AX, 0); // MOV floatreg, EAX 3429: genfltreg(c1, 0xD9, 0, 0); // FLD float ptr floatreg 3430: 3431: c2 = push87(); 3432: c2 = genfltreg(c2, 0x89, DX, 0); // MOV floatreg, EDX 3433: genfltreg(c2, 0xD9, 0, 0); // FLD float ptr floatreg 3434: 3435: if (*pretregs & mPSW) 3436: c2 = genctst(c2,e,0); // FTST 3437: } 3438: else if ((tym == TYcfloat || tym == TYcdouble) && 3439: *pretregs & (mXMM0|mXMM1) && retregs & mST01) 3440: { 3441: if (*pretregs & mPSW && !(retregs & mPSW)) 3442: c1 = genctst(c1,e,0); // FTST 3443: pop87(); 3444: c1 = genfltreg(c1, ESC(MFdouble,1),3,0); // FSTP floatreg 3445: genfwait(c1); 3446: c2 = getregs(mXMM0|mXMM1); 3447: c2 = genfltreg(c2, 0xF20F10, XMM1 - XMM0, 0); // MOVD XMM1,floatreg 3448: 3449: pop87(); 3450: c2 = genfltreg(c2, ESC(MFdouble,1),3,0); // FSTP floatreg 3451: genfwait(c2); 3452: c2 = genfltreg(c2, 0xF20F10, XMM0 - XMM0, 0); // MOVD XMM0,floatreg 3453: } 3454: else if ((tym == TYcfloat || tym == TYcdouble) && 3455: retregs & (mXMM0|mXMM1) && *pretregs & mST01) 3456: { 3457: c1 = push87(); 3458: c1 = genfltreg(c1, 0xF20F11, XMM0-XMM0, 0); // MOVD floatreg, XMM0 3459: genfltreg(c1, 0xDD, 0, 0); // FLD double ptr floatreg 3460: 3461: c2 = push87(); 3462: c2 = genfltreg(c2, 0xF20F11, XMM1-XMM0, 0); // MOV floatreg, XMM1 3463: genfltreg(c2, 0xDD, 0, 0); // FLD double ptr floatreg 3464: 3465: if (*pretregs & mPSW) 3466: c2 = genctst(c2,e,0); // FTST 3467: } 3468: else 3469: { if (*pretregs & mPSW) 3470: { if (!(retregs & mPSW)) 3471: { assert(retregs & mST01); 3472: c1 = genctst(c1,e,!(*pretregs & mST01)); // FTST 3473: } 3474: } 3475: assert(!(*pretregs & mST01) || (retregs & mST01)); 3476: } 3477: if (*pretregs & mST01) 3478: { note87(e,0,1); 3479: note87(e,sz/2,0); 3480: } 3481: return cat(c1,c2); 3482: } 3483: 3484: /***************************************** 3485: * Operators OPc_r and OPc_i 3486: */ 3487: 3488: code *cdconvt87(elem *e, regm_t *pretregs) 3489: { 3490: regm_t retregs; 3491: code *c; 3492: 3493: retregs = mST01; 3494: c = codelem(e->E1, &retregs, FALSE); 3495: switch (e->Eoper) 3496: { 3497: case OPc_r: 3498: c = genf2(c,0xDD,0xD8 + 0); // FPOP 3499: pop87(); 3500: break; 3501: 3502: case OPc_i: 3503: c = genf2(c,0xDD,0xD8 + 1); // FSTP ST(1) 3504: pop87(); 3505: break; 3506: 3507: default: 3508: assert(0); 3509: } 3510: retregs = mST0; 3511: c = cat(c, fixresult87(e, retregs, pretregs)); 3512: return c; 3513: } 3514: 3515: /************************************** 3516: * Load complex operand into ST01 or flags or both. 3517: */ 3518: 3519: code *cload87(elem *e, regm_t *pretregs) 3520: #if __DMC__ 3521: __in 3522: { 3523: assert(I32 && config.inline8087); 3524: elem_debug(e); 3525: assert(*pretregs & (mST01 | mPSW)); 3526: assert(!(*pretregs & ~(mST01 | mPSW))); 3527: } 3528: __out (result) 3529: { 3530: } 3531: __body 3532: #endif 3533: { 3534: tym_t ty = tybasic(e->Ety); 3535: code *c = NULL; 3536: code *cpush = NULL; 3537: code cs; 3538: unsigned mf; 3539: unsigned sz; 3540: unsigned char ldop; 3541: regm_t retregs; 3542: int i; 3543: 3544: //printf("cload87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 3545: sz = tysize[ty] / 2; 3546: memset(&cs, 0, sizeof(cs)); 3547: if (ADDFWAIT()) 3548: cs.Iflags = CFwait; 3549: switch (ty) 3550: { 3551: case TYcfloat: mf = MFfloat; break; 3552: case TYcdouble: mf = MFdouble; break; 3553: case TYcldouble: break; 3554: default: assert(0); 3555: } 3556: switch (e->Eoper) 3557: { 3558: case OPvar: 3559: notreg(e); // never enregister this variable 3560: case OPind: 3561: cpush = cat(push87(), push87()); 3562: switch (ty) 3563: { 3564: case TYcfloat: 3565: case TYcdouble: 3566: c = loadea(e,&cs,ESC(mf,1),0,0,0,0); // FLD var 3567: cs.IEVoffset1 += sz; 3568: c = gen(c, &cs); 3569: break; 3570: 3571: case TYcldouble: 3572: c = loadea(e,&cs,0xDB,5,0,0,0); // FLD var 3573: cs.IEVoffset1 += sz; 3574: c = gen(c, &cs); 3575: break; 3576: 3577: default: 3578: assert(0); 3579: } 3580: retregs = mST01; 3581: break; 3582: 3583: case OPd_ld: 3584: case OPld_d: 3585: case OPf_d: 3586: case OPd_f: 3587: c = cload87(e->E1, pretregs); 3588: freenode(e->E1); 3589: return c; 3590: 3591: case OPconst: 3592: cpush = cat(push87(), push87()); 3593: for (i = 0; i < 2; i++) 3594: { 3595: ldop = loadconst(e, i); 3596: if (ldop) 3597: { 3598: c = genf2(c,0xD9,ldop); // FLDx 3599: } 3600: else 3601: { 3602: assert(0); 3603: } 3604: } 3605: retregs = mST01; 3606: break; 3607: 3608: default: 3609: #ifdef DEBUG 3610: elem_print(e); 3611: #endif 3612: assert(0); 3613: } 3614: return cat4(cpush,c,fixresult_complex87(e, retregs, pretregs), NULL); 3615: } 3616: 3617: #endif // !SPP 3618: