1: // Copyright (C) 1995-1998 by Symantec
   2: // Copyright (C) 2000-2009 by Digital Mars
   3: // All Rights Reserved
   4: // http://www.digitalmars.com
   5: // Written by Walter Bright
   6: /*
   7:  * This source file is made available for personal use
   8:  * only. The license is in /dmd/src/dmd/backendlicense.txt
   9:  * or /dm/src/dmd/backendlicense.txt
  10:  * For any other uses, please contact Digital Mars.
  11:  */
  12: 
  13: #if !SPP
  14: 
  15: #include        <stdio.h>
  16: #include        <string.h>
  17: #include        <time.h>
  18: 
  19: #include        "cc.h"
  20: #include        "el.h"
  21: #include        "code.h"
  22: #include        "oper.h"
  23: #include        "global.h"
  24: #include        "type.h"
  25: #include        "exh.h"
  26: #include        "list.h"
  27: 
  28: static char __file__[] = __FILE__;      /* for tassert.h                */
  29: #include        "tassert.h"
  30: 
  31: // If we use Pentium Pro scheduler
  32: #if 0
  33: #define PRO     (config.target_scheduler >= TARGET_PentiumPro)
  34: #else
  35: #define PRO     (config.target_cpu >= TARGET_PentiumPro)
  36: #endif
  37: 
  38: // Struct where we gather information about an instruction
  39: struct Cinfo
  40: {
  41:     code *c;            // the instruction
  42:     unsigned char pair; // pairing information
  43:     unsigned char sz;   // operand size
  44:     unsigned char isz;  // instruction size
  45: 
  46:     // For floating point scheduling
  47:     unsigned char fxch_pre;
  48:     unsigned char fxch_post;
  49:     unsigned char fp_op;
  50:         #define FPfstp  1       // FSTP mem
  51:         #define FPfld   2       // FLD mem
  52:         #define FPfop   3       // Fop ST0,mem or Fop ST0
  53: 
  54:     unsigned char flags;
  55: #define CIFLarraybounds 1       // this instruction is a jmp to array bounds
  56: #define CIFLea          2       // this instruction has a memory-referencing
  57:                                 // modregrm EA byte
  58: #define CIFLnostage     4       // don't stage these instructions
  59: #define CIFLpush        8       // it's a push we can swap around
  60: 
  61:     unsigned r;         // read mask
  62:     unsigned w;         // write mask
  63:     unsigned a;         // registers used in addressing mode
  64:     unsigned char reg;  // reg field of modregrm byte
  65:     unsigned char uops; // Pentium Pro micro-ops
  66:     unsigned sibmodrm;  // (sib << 8) + mod__rm byte
  67:     unsigned spadjust;  // if !=0, then amount ESP changes as a result of this
  68:                         // instruction being executed
  69:     int fpuadjust;      // if !=0, then amount FPU stack changes as a result
  70:                         // of this instruction being executed
  71: #if DEBUG
  72:     void print();       // pretty-printer
  73: #endif
  74: };
  75: 
  76: code *simpleops(code *c,regm_t scratch);
  77: code *schedule(code *c,regm_t scratch);
  78: code *peephole(code *c,regm_t scratch);
  79: 
  80: /*****************************************
  81:  * Do Pentium optimizations.
  82:  * Input:
  83:  *      scratch         scratch registers we can use
  84:  */
  85: 
  86: void cgsched_pentium(code **pc,regm_t scratch)
  87: {
  88:     //printf("scratch = x%02x\n",scratch);
  89:     if (config.target_scheduler >= TARGET_80486)
  90:     {
  91:         if (!I64)
  92:             *pc = peephole(*pc,0);
  93:         if (I32)                        // forget about 16 bit code
  94:         {
  95:             if (config.target_cpu == TARGET_Pentium ||
  96:                 config.target_cpu == TARGET_PentiumMMX)
  97:                 *pc = simpleops(*pc,scratch);
  98:             *pc = schedule(*pc,0);
  99:         }
 100:     }
 101: }
 102: 
 103: #define NP      0       // not pairable
 104: #define PU      1       // pairable in U only, never executed in V
 105: #define PV      2       // pairable in V only
 106: #define UV      (PU|PV) // pairable in both U and V
 107: #define PE      4       // register contention exception
 108: #define PF      8       // flags contention exception
 109: #define FX      0x10    // pairable with FXCH instruction
 110: 
 111: static unsigned char pentcycl[256] =
 112: {
 113:         UV,UV,UV,UV,    UV,UV,NP,NP,    // 0
 114:         UV,UV,UV,UV,    UV,UV,NP,NP,    // 8
 115:         PU,PU,PU,PU,    PU,PU,NP,NP,    // 10
 116:         PU,PU,PU,PU,    PU,PU,NP,NP,    // 18
 117:         UV,UV,UV,UV,    UV,UV,NP,NP,    // 20
 118:         UV,UV,UV,UV,    UV,UV,NP,NP,    // 28
 119:         UV,UV,UV,UV,    UV,UV,NP,NP,    // 30
 120:         UV,UV,UV,UV,    UV,UV,NP,NP,    // 38
 121: 
 122:         UV,UV,UV,UV,    UV,UV,UV,UV,    // 40
 123:         UV,UV,UV,UV,    UV,UV,UV,UV,    // 48
 124:         PE|UV,PE|UV,PE|UV,PE|UV,        PE|UV,PE|UV,PE|UV,PE|UV, // 50  PUSH reg
 125:         PE|UV,PE|UV,PE|UV,PE|UV,        PE|UV,PE|UV,PE|UV,PE|UV, // 58  POP reg
 126:         NP,NP,NP,NP,    NP,NP,NP,NP,    // 60
 127:         PE|UV,NP,PE|UV,NP,      NP,NP,NP,NP,    // 68
 128:         PV|PF,PV|PF,PV|PF,PV|PF,        PV|PF,PV|PF,PV|PF,PV|PF,        // 70   Jcc rel8
 129:         PV|PF,PV|PF,PV|PF,PV|PF,        PV|PF,PV|PF,PV|PF,PV|PF,        // 78   Jcc rel8
 130: 
 131:         NP,NP,NP,NP,    NP,NP,NP,NP,    // 80
 132:         UV,UV,UV,UV,    NP,UV,NP,NP,    // 88
 133:         NP,NP,NP,NP,    NP,NP,NP,NP,    // 90
 134:         NP,NP,NP,NP,    NP,NP,NP,NP,    // 98
 135:         UV,UV,UV,UV,    NP,NP,NP,NP,    // A0
 136:         UV,UV,NP,NP,    NP,NP,NP,NP,    // A8
 137:         UV,UV,UV,UV,    UV,UV,UV,UV,    // B0
 138:         UV,UV,UV,UV,    UV,UV,UV,UV,    // B8
 139: 
 140:         NP,NP,NP,NP,    NP,NP,NP,NP,    // C0
 141:         NP,NP,NP,NP,    NP,NP,NP,NP,    // C8
 142:         PU,PU,NP,NP,    NP,NP,NP,NP,    // D0
 143:         FX,NP,FX,FX,    NP,NP,FX,NP,    // D8   all floating point
 144:         NP,NP,NP,NP,    NP,NP,NP,NP,    // E0
 145:         PE|PV,PV,NP,PV, NP,NP,NP,NP,    // E8
 146:         NP,NP,NP,NP,    NP,NP,NP,NP,    // F0
 147:         NP,NP,NP,NP,    NP,NP,NP,NP,    // F8
 148: };
 149: 
 150: /********************************************
 151:  * For each opcode, determine read [0] and written [1] masks.
 152:  */
 153: 
 154: #define EA      0x100000
 155: #define R       0x200000        // register (reg of modregrm field)
 156: #define N       0x400000        // other things modified, not swappable
 157: #define B       0x800000        // it's a byte operation
 158: #define C       0x1000000       // floating point flags
 159: #define mMEM    0x2000000       // memory
 160: #define S       0x4000000       // floating point stack
 161: #define F       0x8000000       // flags
 162: 
 163: static unsigned oprw[256][2] =
 164: {
 165:         // 00
 166:         EA|R|B, F|EA|B,         // ADD
 167:         EA|R,   F|EA,
 168:         EA|R|B, F|R|B,
 169:         EA|R,   F|R,
 170:         mAX,    F|mAX,
 171:         mAX,    F|mAX,
 172:         N,      N,              // PUSH ES
 173:         N,      N,              // POP  ES
 174: 
 175:         // 08
 176:         EA|R|B, F|EA|B,         // OR
 177:         EA|R,   F|EA,
 178:         EA|R|B, F|R|B,
 179:         EA|R,   F|R,
 180:         mAX,    F|mAX,
 181:         mAX,    F|mAX,
 182:         N,      N,              // PUSH CS
 183:         N,      N,              // 2 byte escape
 184: 
 185:         // 10
 186:         F|EA|R|B,F|EA|B,        // ADC
 187:         F|EA|R, F|EA,
 188:         F|EA|R|B,F|R|B,
 189:         F|EA|R, F|R,
 190:         F|mAX,  F|mAX,
 191:         F|mAX,  F|mAX,
 192:         N,      N,              // PUSH SS
 193:         N,      N,              // POP  SS
 194: 
 195:         // 18
 196:         F|EA|R|B,F|EA|B,        // SBB
 197:         F|EA|R, F|EA,
 198:         F|EA|R|B,F|R|B,
 199:         F|EA|R, F|R,
 200:         F|mAX,  F|mAX,
 201:         F|mAX,  F|mAX,
 202:         N,      N,              // PUSH DS
 203:         N,      N,              // POP  DS
 204: 
 205:         // 20
 206:         EA|R|B, F|EA|B,         // AND
 207:         EA|R,   F|EA,
 208:         EA|R|B, F|R|B,
 209:         EA|R,   F|R,
 210:         mAX,    F|mAX,
 211:         mAX,    F|mAX,
 212:         N,      N,              // SEG ES
 213:         F|mAX,  F|mAX,          // DAA
 214: 
 215:         // 28
 216:         EA|R|B, F|EA|B,         // SUB
 217:         EA|R,   F|EA,
 218:         EA|R|B, F|R|B,
 219:         EA|R,   F|R,
 220:         mAX,    F|mAX,
 221:         mAX,    F|mAX,
 222:         N,      N,              // SEG CS
 223:         F|mAX,  F|mAX,          // DAS
 224: 
 225:         // 30
 226:         EA|R|B, F|EA|B,         // XOR
 227:         EA|R,   F|EA,
 228:         EA|R|B, F|R|B,
 229:         EA|R,   F|R,
 230:         mAX,    F|mAX,
 231:         mAX,    F|mAX,
 232:         N,      N,              // SEG SS
 233:         F|mAX,  F|mAX,          // AAA
 234: 
 235:         // 38
 236:         EA|R|B, F,              // CMP
 237:         EA|R,   F,
 238:         EA|R|B, F,
 239:         EA|R,   F,
 240:         mAX,    F,              // CMP AL,imm8
 241:         mAX,    F,              // CMP EAX,imm16/32
 242:         N,      N,              // SEG DS
 243:         N,      N,              // AAS
 244: 
 245:         // 40
 246:         mAX,    F|mAX,          // INC EAX
 247:         mCX,    F|mCX,
 248:         mDX,    F|mDX,
 249:         mBX,    F|mBX,
 250:         mSP,    F|mSP,
 251:         mBP,    F|mBP,
 252:         mSI,    F|mSI,
 253:         mDI,    F|mDI,
 254: 
 255:         // 48
 256:         mAX,    F|mAX,          // DEC EAX
 257:         mCX,    F|mCX,
 258:         mDX,    F|mDX,
 259:         mBX,    F|mBX,
 260:         mSP,    F|mSP,
 261:         mBP,    F|mBP,
 262:         mSI,    F|mSI,
 263:         mDI,    F|mDI,
 264: 
 265:         // 50
 266:         mAX|mSP,        mSP|mMEM,               // PUSH EAX
 267:         mCX|mSP,        mSP|mMEM,
 268:         mDX|mSP,        mSP|mMEM,
 269:         mBX|mSP,        mSP|mMEM,
 270:         mSP|mSP,        mSP|mMEM,
 271:         mBP|mSP,        mSP|mMEM,
 272:         mSI|mSP,        mSP|mMEM,
 273:         mDI|mSP,        mSP|mMEM,
 274: 
 275:         // 58
 276:         mSP|mMEM,       mAX|mSP,                // POP EAX
 277:         mSP|mMEM,       mCX|mSP,
 278:         mSP|mMEM,       mDX|mSP,
 279:         mSP|mMEM,       mBX|mSP,
 280:         mSP|mMEM,       mSP|mSP,
 281:         mSP|mMEM,       mBP|mSP,
 282:         mSP|mMEM,       mSI|mSP,
 283:         mSP|mMEM,       mDI|mSP,
 284: 
 285:         // 60
 286:         N,      N,              // PUSHA
 287:         N,      N,              // POPA
 288:         N,      N,              // BOUND Gv,Ma
 289:         N,      N,              // ARPL  Ew,Rw
 290:         N,      N,              // SEG FS
 291:         N,      N,              // SEG GS
 292:         N,      N,              // operand size prefix
 293:         N,      N,              // address size prefix
 294: 
 295:         // 68
 296:         mSP,    mSP|mMEM,       // PUSH immed16/32
 297:         EA,     F|R,            // IMUL Gv,Ev,lv
 298:         mSP,    mSP|mMEM,       // PUSH immed8
 299:         EA,     F|R,            // IMUL Gv,Ev,lb
 300:         N,      N,              // INSB Yb,DX
 301:         N,      N,              // INSW/D Yv,DX
 302:         N,      N,              // OUTSB DX,Xb
 303:         N,      N,              // OUTSW/D DX,Xv
 304: 
 305:         // 70
 306:         F|N,    N,
 307:         F|N,    N,
 308:         F|N,    N,
 309:         F|N,    N,
 310:         F|N,    N,
 311:         F|N,    N,
 312:         F|N,    N,
 313:         F|N,    N,
 314: 
 315:         // 78
 316:         F|N,    N,
 317:         F|N,    N,
 318:         F|N,    N,
 319:         F|N,    N,
 320:         F|N,    N,
 321:         F|N,    N,
 322:         F|N,    N,
 323:         F|N,    N,
 324: 
 325:         // 80
 326:         N,      N,
 327:         N,      N,
 328:         N,      N,
 329:         N,      N,
 330:         EA|R,   F,              // TEST EA,r8
 331:         EA|R,   F,              // TEST EA,r16/32
 332:         EA|R,   EA|R,           // XCHG EA,r8
 333:         EA|R,   EA|R,           // XCHG EA,r16/32
 334: 
 335:         // 88
 336:         R|B,    EA|B,           // MOV EA8,r8
 337:         R,      EA,             // MOV EA,r16/32
 338:         EA|B,   R|B,            // MOV r8,EA8
 339:         EA,     R,              // MOV r16/32,EA
 340:         N,      N,              // MOV EA,segreg
 341:         EA,     R,              // LEA r16/32,EA
 342:         N,      N,              // MOV segreg,EA
 343:         mSP|mMEM, EA|mSP,       // POP mem16/32
 344: 
 345:         // 90
 346:         0,              0,              // NOP
 347:         mAX|mCX,        mAX|mCX,
 348:         mAX|mDX,        mAX|mDX,
 349:         mAX|mBX,        mAX|mBX,
 350:         mAX|mSP,        mAX|mSP,
 351:         mAX|mBP,        mAX|mBP,
 352:         mAX|mSI,        mAX|mSI,
 353:         mAX|mDI,        mAX|mDI,
 354: 
 355:         // 98
 356:         mAX,            mAX,            // CBW
 357:         mAX,            mDX,            // CWD
 358:         N,              N|F,            // CALL far ptr
 359:         N,              N,              // WAIT
 360:         F|mSP,          mSP|mMEM,       // PUSHF
 361:         mSP|mMEM,       F|mSP,          // POPF
 362:         mAX,            F,              // SAHF
 363:         F,              mAX,            // LAHF
 364: 
 365:         // A0
 366:         mMEM,           mAX,            // MOV AL,moffs8
 367:         mMEM,           mAX,            // MOV EAX,moffs32
 368:         mAX,            mMEM,           // MOV moffs8,AL
 369:         mAX,            mMEM,           // MOV moffs32,EAX
 370:         N,              N,              // MOVSB
 371:         N,              N,              // MOVSW/D
 372:         N,              N,              // CMPSB
 373:         N,              N,              // CMPSW/D
 374: 
 375:         // A8
 376:         mAX,    F,                      // TEST AL,imm8
 377:         mAX,    F,                      // TEST AX,imm16
 378:         N,      N,                      // STOSB
 379:         N,      N,                      // STOSW/D
 380:         N,      N,                      // LODSB
 381:         N,      N,                      // LODSW/D
 382:         N,      N,                      // SCASB
 383:         N,      N,                      // SCASW/D
 384: 
 385:         // B0
 386:         0,      mAX,                    // MOV AL,imm8
 387:         0,      mCX,
 388:         0,      mDX,
 389:         0,      mBX,
 390:         0,      mAX,
 391:         0,      mCX,
 392:         0,      mDX,
 393:         0,      mBX,
 394: 
 395:         // B8
 396:         0,      mAX,                    // MOV AX,imm16
 397:         0,      mCX,
 398:         0,      mDX,
 399:         0,      mBX,
 400:         0,      mSP,
 401:         0,      mBP,
 402:         0,      mSI,
 403:         0,      mDI,
 404: 
 405:         // C0
 406:         EA,     F|EA,           // Shift Eb,Ib
 407:         EA,     F|EA,
 408:         N,      N,
 409:         N,      N,
 410:         N,      N,
 411:         N,      N,
 412:         0,      EA|B,           // MOV EA8,imm8
 413:         0,      EA,             // MOV EA,imm16
 414: 
 415:         // C8
 416:         N,      N,              // ENTER
 417:         N,      N,              // LEAVE
 418:         N,      N,              // RETF lw
 419:         N,      N,              // RETF
 420:         N,      N,              // INT 3
 421:         N,      N,              // INT lb
 422:         N,      N,              // INTO
 423:         N,      N,              // IRET
 424: 
 425:         // D0
 426:         EA,             F|EA,           // Shift EA,1
 427:         EA,             F|EA,
 428:         EA|mCX,         F|EA,           // Shift EA,CL
 429:         EA|mCX,         F|EA,
 430:         mAX,            F|mAX,          // AAM
 431:         mAX,            F|mAX,          // AAD
 432:         N,              N,              // reserved
 433:         mAX|mBX|mMEM,   mAX,            // XLAT
 434: 
 435:         // D8
 436:         N,      N,
 437:         N,      N,
 438:         N,      N,
 439:         N,      N,
 440:         N,      N,
 441:         N,      N,
 442:         N,      N,
 443:         N,      N,
 444: 
 445:         // E0
 446:         F|mCX|N,mCX|N,          // LOOPNE jb
 447:         F|mCX|N,mCX|N,          // LOOPE  jb
 448:         mCX|N,  mCX|N,          // LOOP   jb
 449:         mCX|N,  N,              // JCXZ   jb
 450:         N,      N,              // IN AL,lb
 451:         N,      N,              // IN EAX,lb
 452:         N,      N,              // OUT lb,AL
 453:         N,      N,              // OUT lb,EAX
 454: 
 455:         // E8
 456:         N,      N|F,            // CALL jv
 457:         N,      N,              // JMP Jv
 458:         N,      N,              // JMP Ab
 459:         N,      N,              // JMP jb
 460:         N|mDX,  N|mAX,          // IN AL,DX
 461:         N|mDX,  N|mAX,          // IN AX,DX
 462:         N|mAX|mDX,N,            // OUT DX,AL
 463:         N|mAX|mDX,N,            // OUT DX,AX
 464: 
 465:         // F0
 466:         N,      N,              // LOCK
 467:         N,      N,              // reserved
 468:         N,      N,              // REPNE
 469:         N,      N,              // REP,REPE
 470:         N,      N,              // HLT
 471:         F,      F,              // CMC
 472:         N,      N,
 473:         N,      N,
 474: 
 475:         // F8
 476:         0,      F,              // CLC
 477:         0,      F,              // STC
 478:         N,      N,              // CLI
 479:         N,      N,              // STI
 480:         N,      N,              // CLD
 481:         N,      N,              // STD
 482:         EA,     F|EA,           // INC/DEC
 483:         N,      N,
 484: };
 485: 
 486: /****************************************
 487:  * Same thing, but for groups.
 488:  */
 489: 
 490: static unsigned grprw[8][8][2] =
 491: {
 492:         // Grp 1
 493:         EA,     F|EA,           // ADD
 494:         EA,     F|EA,           // OR
 495:         F|EA,   F|EA,           // ADC
 496:         F|EA,   F|EA,           // SBB
 497:         EA,     F|EA,           // AND
 498:         EA,     F|EA,           // SUB
 499:         EA,     F|EA,           // XOR
 500:         EA,     F,              // CMP
 501: 
 502:         // Grp 3
 503:         EA,     F,              // TEST EA,imm
 504:         N,      N,              // reserved
 505:         EA,     EA,             // NOT
 506:         EA,     F|EA,           // NEG
 507:         mAX|EA, F|mAX|mDX,      // MUL
 508:         mAX|EA, F|mAX|mDX,      // IMUL
 509:         mAX|mDX|EA,     F|mAX|mDX,      // DIV
 510: #if 0
 511:         // Could generate an exception we want to catch
 512:         mAX|mDX|EA|N,   F|mAX|mDX|N,    // IDIV
 513: #else
 514:         mAX|mDX|EA,     F|mAX|mDX,      // IDIV
 515: #endif
 516: 
 517:         // Grp 5
 518:         EA,     F|EA,           // INC Ev
 519:         EA,     F|EA,           // DEC Ev
 520:         N|EA,   N,              // CALL Ev
 521:         N|EA,   N,              // CALL eP
 522:         N|EA,   N,              // JMP Ev
 523:         N|EA,   N,              // JMP Ep
 524:         mSP|EA, mSP|mMEM,       // PUSH Ev
 525:         N,      N,              // reserved
 526: 
 527:         // Grp 3, byte version
 528:         EA|B,   F,              // TEST EA,imm
 529:         N,      N,              // reserved
 530:         EA|B,   EA|B,           // NOT
 531:         EA|B,   F|EA|B,         // NEG
 532:         mAX|EA, F|mAX,          // MUL
 533:         mAX|EA, F|mAX,          // IMUL
 534:         mAX|EA, F|mAX,          // DIV
 535: #if 0
 536:         // Could generate an exception we want to catch
 537:         mAX|EA|N,       F|mAX|N,        // IDIV
 538: #else
 539:         mAX|EA, F|mAX,          // IDIV
 540: #endif
 541: 
 542: };
 543: 
 544: /********************************************
 545:  * For floating point opcodes 0xD8..0xDF, with Irm < 0xC0.
 546:  *      [][][0] = read
 547:  *          [1] = write
 548:  */
 549: 
 550: static unsigned grpf1[8][8][2] =
 551: {
 552:         // 0xD8
 553:         EA|S,   S|C,    // FADD  float
 554:         EA|S,   S|C,    // FMUL  float
 555:         EA|S,   C,      // FCOM  float
 556:         EA|S,   S|C,    // FCOMP float
 557:         EA|S,   S|C,    // FSUB  float
 558:         EA|S,   S|C,    // FSUBR float
 559:         EA|S,   S|C,    // FDIV  float
 560:         EA|S,   S|C,    // FDIVR float
 561: 
 562:         // 0xD9
 563:         EA,     S|C,    // FLD  float
 564:         N,      N,      //
 565:         S,      EA|C,   // FST  float
 566:         S,      EA|S|C, // FSTP float
 567:         N,      N,      // FLDENV
 568:         N,      N,      // FLDCW
 569:         N,      N,      // FSTENV
 570:         N,      N,      // FSTCW
 571: 
 572:         // 0xDA
 573:         EA|S,   S|C,    // FIADD  long
 574:         EA|S,   S|C,    // FIMUL  long
 575:         EA|S,   C,      // FICOM  long
 576:         EA|S,   S|C,    // FICOMP long
 577:         EA|S,   S|C,    // FISUB  long
 578:         EA|S,   S|C,    // FISUBR long
 579:         EA|S,   S|C,    // FIDIV  long
 580:         EA|S,   S|C,    // FIDIVR long
 581: 
 582:         // 0xDB
 583:         EA,     S|C,    // FILD long
 584:         S,      EA|S|C, // FISTTP int
 585:         S,      EA|C,   // FIST long
 586:         S,      EA|S|C, // FISTP long
 587:         N,      N,      //
 588:         EA,     S|C,    // FLD real80
 589:         N,      N,      //
 590:         S,      EA|S|C, // FSTP real80
 591: 
 592:         // 0xDC
 593:         EA|S,   S|C,    // FADD  double
 594:         EA|S,   S|C,    // FMUL  double
 595:         EA|S,   C,      // FCOM  double
 596:         EA|S,   S|C,    // FCOMP double
 597:         EA|S,   S|C,    // FSUB  double
 598:         EA|S,   S|C,    // FSUBR double
 599:         EA|S,   S|C,    // FDIV  double
 600:         EA|S,   S|C,    // FDIVR double
 601: 
 602:         // 0xDD
 603:         EA,     S|C,    // FLD double
 604:         S,      EA|S|C, // FISTTP long
 605:         S,      EA|C,   // FST double
 606:         S,      EA|S|C, // FSTP double
 607:         N,      N,      // FRSTOR
 608:         N,      N,      //
 609:         N,      N,      // FSAVE
 610:         C,      EA,     // FSTSW
 611: 
 612:         // 0xDE
 613:         EA|S,   S|C,    // FIADD  short
 614:         EA|S,   S|C,    // FIMUL  short
 615:         EA|S,   C,      // FICOM  short
 616:         EA|S,   S|C,    // FICOMP short
 617:         EA|S,   S|C,    // FISUB  short
 618:         EA|S,   S|C,    // FISUBR short
 619:         EA|S,   S|C,    // FIDIV  short
 620:         EA|S,   S|C,    // FIDIVR short
 621: 
 622:         // 0xDF
 623:         EA,     S|C,    // FILD short
 624:         S,      EA|S|C, // FISTTP short
 625:         S,      EA|C,   // FIST short
 626:         S,      EA|S|C, // FISTP short
 627:         EA,     S|C,    // FBLD packed BCD
 628:         EA,     S|C,    // FILD long long
 629:         S,      EA|S|C, // FBSTP packed BCD
 630:         S,      EA|S|C, // FISTP long long
 631: };
 632: 
 633: 
 634: /********************************************
 635:  * Micro-ops for floating point opcodes 0xD8..0xDF, with Irm < 0xC0.
 636:  */
 637: 
 638: static unsigned char uopsgrpf1[8][8] =
 639: {
 640:         // 0xD8
 641:         2,              // FADD  float
 642:         2,              // FMUL  float
 643:         2,              // FCOM  float
 644:         2,              // FCOMP float
 645:         2,              // FSUB  float
 646:         2,              // FSUBR float
 647:         2,              // FDIV  float
 648:         2,              // FDIVR float
 649: 
 650:         // 0xD9
 651:         1,              // FLD  float
 652:         0,              //
 653:         2,              // FST  float
 654:         2,              // FSTP float
 655:         5,              // FLDENV
 656:         3,              // FLDCW
 657:         5,              // FSTENV
 658:         5,              // FSTCW
 659: 
 660:         // 0xDA
 661:         5,              // FIADD  long
 662:         5,              // FIMUL  long
 663:         5,              // FICOM  long
 664:         5,              // FICOMP long
 665:         5,              // FISUB  long
 666:         5,              // FISUBR long
 667:         5,              // FIDIV  long
 668:         5,              // FIDIVR long
 669: 
 670:         // 0xDB
 671:         4,              // FILD long
 672:         0,              //
 673:         4,              // FIST long
 674:         4,              // FISTP long
 675:         0,              //
 676:         4,              // FLD real80
 677:         0,              //
 678:         5,              // FSTP real80
 679: 
 680:         // 0xDC
 681:         2,              // FADD  double
 682:         2,              // FMUL  double
 683:         2,              // FCOM  double
 684:         2,              // FCOMP double
 685:         2,              // FSUB  double
 686:         2,              // FSUBR double
 687:         2,              // FDIV  double
 688:         2,              // FDIVR double
 689: 
 690:         // 0xDD
 691:         1,              // FLD double
 692:         0,              //
 693:         2,              // FST double
 694:         2,              // FSTP double
 695:         5,              // FRSTOR
 696:         0,              //
 697:         5,              // FSAVE
 698:         5,              // FSTSW
 699: 
 700:         // 0xDE
 701:         5,              // FIADD  short
 702:         5,              // FIMUL  short
 703:         5,              // FICOM  short
 704:         5,              // FICOMP short
 705:         5,              // FISUB  short
 706:         5,              // FISUBR short
 707:         5,              // FIDIV  short
 708:         5,              // FIDIVR short
 709: 
 710:         // 0xDF
 711:         4,              // FILD short
 712:         0,              //
 713:         4,              // FIST short
 714:         4,              // FISTP short
 715:         5,              // FBLD packed BCD
 716:         4,              // FILD long long
 717:         5,              // FBSTP packed BCD
 718:         4,              // FISTP long long
 719: };
 720: 
 721: /**************************************************
 722:  * Determine number of micro-ops for Pentium Pro and Pentium II processors.
 723:  * 0 means special case,
 724:  * 5 means 'complex'
 725:  */
 726: 
 727: static const unsigned char insuops[256] =
 728: {       0,0,0,0,        1,1,4,5,                /* 00 */
 729:         0,0,0,0,        1,1,4,0,                /* 08 */
 730:         0,0,0,0,        2,2,4,5,                /* 10 */
 731:         0,0,0,0,        2,2,4,5,                /* 18 */
 732:         0,0,0,0,        1,1,0,1,                /* 20 */
 733:         0,0,0,0,        1,1,0,1,                /* 28 */
 734:         0,0,0,0,        1,1,0,1,                /* 30 */
 735:         0,0,0,0,        1,1,0,1,                /* 38 */
 736:         1,1,1,1,        1,1,1,1,                /* 40 */
 737:         1,1,1,1,        1,1,1,1,                /* 48 */
 738:         3,3,3,3,        3,3,3,3,                /* 50 */
 739:         2,2,2,2,        3,2,2,2,                /* 58 */
 740:         5,5,5,5,        0,0,0,0,                /* 60 */
 741:         3,3,0,0,        5,5,5,5,                /* 68 */
 742:         1,1,1,1,        1,1,1,1,                /* 70 */
 743:         1,1,1,1,        1,1,1,1,                /* 78 */
 744:         0,0,0,0,        0,0,0,0,                /* 80 */
 745:         0,0,0,0,        0,1,4,0,                /* 88 */
 746:         1,3,3,3,        3,3,3,3,                /* 90 */
 747:         1,1,5,0,        5,5,1,1,                /* 98 */
 748:         1,1,2,2,        5,5,5,5,                /* A0 */
 749:         1,1,3,3,        2,2,3,3,                /* A8 */
 750:         1,1,1,1,        1,1,1,1,                /* B0 */
 751:         1,1,1,1,        1,1,1,1,                /* B8 */
 752:         0,0,5,4,        0,0,0,0,                /* C0 */
 753:         5,3,5,5,        5,3,5,5,                /* C8 */
 754:         0,0,0,0,        4,3,0,2,                /* D0 */
 755:         0,0,0,0,        0,0,0,0,                /* D8 */
 756:         4,4,4,2,        5,5,5,5,                /* E0 */
 757:         4,1,5,1,        5,5,5,5,                /* E8 */
 758:         0,0,5,5,        5,1,0,0,                /* F0 */
 759:         1,1,5,5,        4,4,0,0,                /* F8 */
 760: };
 761: 
 762: static unsigned char uopsx[8] = { 1,1,2,5,1,1,1,5 };
 763: 
 764: /************************************************
 765:  * Determine number of micro-ops for Pentium Pro and Pentium II processors.
 766:  * 5 means 'complex'.
 767:  * Doesn't currently handle:
 768:  *      floating point
 769:  *      MMX
 770:  *      0F opcodes
 771:  *      prefix bytes
 772:  */
 773: 
 774: STATIC int uops(code *c)
 775: {   int n;
 776:     int op;
 777:     int op2;
 778: 
 779:     op = c->Iop & 0xFF;
 780:     if ((c->Iop & 0xFF00) == 0x0F00)
 781:         op = 0x0F;
 782:     n = insuops[op];
 783:     if (!n)                             // if special case
 784:     {   unsigned char irm,mod,reg,rm;
 785: 
 786:         irm = c->Irm;
 787:         mod = (irm >> 6) & 3;
 788:         reg = (irm >> 3) & 7;
 789:         rm = irm & 7;
 790: 
 791:         switch (op)
 792:         {
 793:             case 0x10:
 794:             case 0x11:                  // ADC rm,r
 795:             case 0x18:
 796:             case 0x19:                  // SBB rm,r
 797:                 n = (mod == 3) ? 2 : 4;
 798:                 break;
 799: 
 800:             case 0x12:
 801:             case 0x13:                  // ADC r,rm
 802:             case 0x1A:
 803:             case 0x1B:                  // SBB r,rm
 804:                 n = (mod == 3) ? 2 : 3;
 805:                 break;
 806: 
 807:             case 0x00:
 808:             case 0x01:                  // ADD rm,r
 809:             case 0x08:
 810:             case 0x09:                  // OR rm,r
 811:             case 0x20:
 812:             case 0x21:                  // AND rm,r
 813:             case 0x28:
 814:             case 0x29:                  // SUB rm,r
 815:             case 0x30:
 816:             case 0x31:                  // XOR rm,r
 817:                 n = (mod == 3) ? 1 : 4;
 818:                 break;
 819: 
 820:             case 0x02:
 821:             case 0x03:                  // ADD r,rm
 822:             case 0x0A:
 823:             case 0x0B:                  // OR r,rm
 824:             case 0x22:
 825:             case 0x23:                  // AND r,rm
 826:             case 0x2A:
 827:             case 0x2B:                  // SUB r,rm
 828:             case 0x32:
 829:             case 0x33:                  // XOR r,rm
 830:             case 0x38:
 831:             case 0x39:                  // CMP rm,r
 832:             case 0x3A:
 833:             case 0x3B:                  // CMP r,rm
 834:             case 0x69:                  // IMUL rm,r,imm
 835:             case 0x6B:                  // IMUL rm,r,imm8
 836:             case 0x84:
 837:             case 0x85:                  // TEST rm,r
 838:                 n = (mod == 3) ? 1 : 2;
 839:                 break;
 840: 
 841:             case 0x80:
 842:             case 0x81:
 843:             case 0x82:
 844:             case 0x83:
 845:                 if (reg == 2 || reg == 3)       // ADC/SBB rm,imm
 846:                     n = (mod == 3) ? 2 : 4;
 847:                 else if (reg == 7)              // CMP rm,imm
 848:                     n = (mod == 3) ? 1 : 2;
 849:                 else
 850:                     n = (mod == 3) ? 1 : 4;
 851:                 break;
 852: 
 853:             case 0x86:
 854:             case 0x87:                          // XCHG rm,r
 855:                 n = (mod == 3) ? 3 : 5;
 856:                 break;
 857: 
 858:             case 0x88:
 859:             case 0x89:                          // MOV rm,r
 860:                 n = (mod == 3) ? 1 : 2;
 861:                 break;
 862: 
 863:             case 0x8A:
 864:             case 0x8B:                          // MOV r,rm
 865:                 n = 1;
 866:                 break;
 867: 
 868:             case 0x8C:                          // MOV Sreg,rm
 869:                 n = (mod == 3) ? 1 : 3;
 870:                 break;
 871: 
 872:             case 0x8F:
 873:                 if (reg == 0)                   // POP m
 874:                     n = 5;
 875:                 break;
 876: 
 877:             case 0xC6:
 878:             case 0xC7:
 879:                 if (reg == 0)                   // MOV rm,imm
 880:                     n = (mod == 3) ? 1 : 2;
 881:                 break;
 882: 
 883:             case 0xD0:
 884:             case 0xD1:
 885:                 if (reg == 2 || reg == 3)       // RCL/RCR rm,1
 886:                     n = (mod == 3) ? 2 : 4;
 887:                 else
 888:                     n = (mod == 3) ? 1 : 4;
 889:                 break;
 890: 
 891:             case 0xC0:
 892:             case 0xC1:                          // RCL/RCR rm,imm8
 893:             case 0xD2:
 894:             case 0xD3:
 895:                 if (reg == 2 || reg == 3)       // RCL/RCR rm,CL
 896:                     n = 5;
 897:                 else
 898:                     n = (mod == 3) ? 1 : 4;
 899:                 break;
 900: 
 901:             case 0xD8:
 902:             case 0xD9:
 903:             case 0xDA:
 904:             case 0xDB:
 905:             case 0xDC:
 906:             case 0xDD:
 907:             case 0xDE:
 908:             case 0xDF:
 909:                 // Floating point opcodes
 910:                 if (irm < 0xC0)
 911:                 {   n = uopsgrpf1[op - 0xD8][reg];
 912:                     break;
 913:                 }
 914:                 n = uopsx[op - 0xD8];
 915:                 switch (op)
 916:                 {
 917:                     case 0xD9:
 918:                         switch (irm)
 919:                         {
 920:                             case 0xE0:          // FCHS
 921:                                 n = 3;
 922:                                 break;
 923:                             case 0xE8:
 924:                             case 0xE9:
 925:                             case 0xEA:
 926:                             case 0xEB:
 927:                             case 0xEC:
 928:                             case 0xED:
 929:                                 n = 2;
 930:                                 break;
 931:                             case 0xF0:
 932:                             case 0xF1:
 933:                             case 0xF2:
 934:                             case 0xF3:
 935:                             case 0xF4:
 936:                             case 0xF5:
 937:                             case 0xF8:
 938:                             case 0xF9:
 939:                             case 0xFB:
 940:                             case 0xFC:
 941:                             case 0xFD:
 942:                             case 0xFE:
 943:                             case 0xFF:
 944:                                 n = 5;
 945:                                 break;
 946:                         }
 947:                         break;
 948:                     case 0xDE:
 949:                         if (irm == 0xD9)        // FCOMPP
 950:                             n = 2;
 951:                         break;
 952:                 }
 953:                 break;
 954: 
 955:             case 0xF6:
 956:                 if (reg == 6 || reg == 7)       // DIV AL,rm8
 957:                     n = (mod == 3) ? 3 : 4;
 958:                 else if (reg == 4 || reg == 5 || reg == 0)      // MUL/IMUL/TEST rm8
 959:                     n = (mod == 3) ? 1 : 2;
 960:                 else if (reg == 2 || reg == 3)  // NOT/NEG rm
 961:                     n = (mod == 3) ? 1 : 4;
 962:                 break;
 963: 
 964:             case 0xF7:
 965:                 if (reg == 6 || reg == 7)       // DIV EAX,rm
 966:                     n = 4;
 967:                 else if (reg == 4 || reg == 5)  // MUL/IMUL rm
 968:                     n = (mod == 3) ? 3 : 4;
 969:                 else if (reg == 2 || reg == 3)  // NOT/NEG rm
 970:                     n = (mod == 3) ? 1 : 4;
 971:                 break;
 972: 
 973:             case 0xFF:
 974:                 if (reg == 2 || reg == 3 ||     // CALL rm, CALL m,rm
 975:                     reg == 5)                   // JMP seg:offset
 976:                     n = 5;
 977:                 else if (reg == 4)
 978:                     n = (mod == 3) ? 1 : 2;
 979:                 else if (reg == 0 || reg == 1)  // INC/DEC rm
 980:                     n = (mod == 3) ? 1 : 4;
 981:                 else if (reg == 6)              // PUSH rm
 982:                     n = (mod == 3) ? 3 : 4;
 983:                 break;
 984: 
 985:             case 0x0F:
 986:                 op2 = c->Iop & 0xFF;
 987:                 if ((op2 & 0xF0) == 0x80)       // Jcc
 988:                 {   n = 1;
 989:                     break;
 990:                 }
 991:                 if ((op2 & 0xF0) == 0x90)       // SETcc
 992:                 {   n = (mod == 3) ? 1 : 3;
 993:                     break;
 994:                 }
 995:                 if (op2 == 0xB6 || op2 == 0xB7 ||       // MOVZX
 996:                     op2 == 0xBE || op2 == 0xBF)         // MOVSX
 997:                 {   n = 1;
 998:                     break;
 999:                 }
1000:                 if (op2 == 0xAF)                        // IMUL r,m
1001:                 {   n = (mod == 3) ? 1 : 2;
1002:                     break;
1003:                 }
1004:                 break;
1005:         }
1006:     }
1007:     if (n == 0)
1008:         n = 5;                                  // copout for now
1009:     return n;
1010: }
1011: 
1012: /******************************************
1013:  * Determine pairing classification.
1014:  * Don't deal with floating point, just assume they are all NP (Not Pairable).
1015:  * Returns:
1016:  *      NP,UV,PU,PV optionally OR'd with PE
1017:  */
1018: 
1019: STATIC int pair_class(code *c)
1020: {   unsigned char op;
1021:     unsigned char irm,mod,reg,rm;
1022:     unsigned a32;
1023:     int pc;
1024: 
1025:     // Of course, with Intel this is *never* simple, and Intel's
1026:     // documentation is vague about the specifics.
1027: 
1028:     op = c->Iop & 0xFF;
1029:     if ((c->Iop & 0xFF00) == 0x0F00)
1030:         op = 0x0F;
1031:     pc = pentcycl[op];
1032:     a32 = I32;
1033:     if (c->Iflags & CFaddrsize)
1034:         a32 ^= 1;
1035:     irm = c->Irm;
1036:     mod = (irm >> 6) & 3;
1037:     reg = (irm >> 3) & 7;
1038:     rm = irm & 7;
1039:     switch (op)
1040:     {
1041:         case 0x0F:                              // 2 byte opcode
1042:             if ((c->Iop & 0xF0) == 0x80)        // if Jcc
1043:                 pc = PV | PF;
1044:             break;
1045: 
1046:         case 0x80:
1047:         case 0x81:
1048:         case 0x83:
1049:             if (reg == 2 ||                     // ADC EA,immed
1050:                 reg == 3)                       // SBB EA,immed
1051:             {   pc = PU;
1052:                 goto L2;
1053:             }
1054:             goto L1;                            // AND/OR/XOR/ADD/SUB/CMP EA,immed
1055: 
1056:         case 0x84:
1057:         case 0x85:                              // TEST EA,reg
1058:             if (mod == 3)                       // TEST reg,reg
1059:                 pc = UV;
1060:             break;
1061: 
1062:         case 0xC0:
1063:         case 0xC1:
1064:             if (reg >= 4)
1065:                 pc = PU;
1066:             break;
1067: 
1068:         case 0xC6:
1069:         case 0xC7:
1070:             if (reg == 0)                       // MOV EA,immed
1071:             {
1072:         L1:
1073:                 pc = UV;
1074:         L2:
1075:                 // if EA contains a displacement then
1076:                 // can't execute in V, or pair in U
1077:                 switch (mod)
1078:                 {   case 0:
1079:                         if (a32)
1080:                         {   if (rm == 5 ||
1081:                                 (rm == 4 && (c->Isib & 7) == 5)
1082:                                )
1083:                                 pc = NP;
1084:                         }
1085:                         else if (rm == 6)
1086:                             pc = NP;
1087:                         break;
1088:                     case 1:
1089:                     case 2:
1090:                         pc = NP;
1091:                         break;
1092:                 }
1093:             }
1094:             break;
1095: 
1096:         case 0xD9:
1097:             if (irm < 0xC0)
1098:             {
1099:                 if (reg == 0)
1100:                     pc = FX;
1101:             }
1102:             else if (irm < 0xC8)
1103:                 pc = FX;
1104:             else if (irm < 0xD0)
1105:                 pc = PV;
1106:             else
1107:             {
1108:                 switch (irm)
1109:                 {
1110:                     case 0xE0:
1111:                     case 0xE1:
1112:                     case 0xE4:
1113:                         pc = FX;
1114:                         break;
1115:                 }
1116:             }
1117:             break;
1118: 
1119:         case 0xDB:
1120:             if (irm < 0xC0 && (reg == 0 || reg == 5))
1121:                 pc = FX;
1122:             break;
1123: 
1124:         case 0xDD:
1125:             if (irm < 0xC0)
1126:             {
1127:                 if (reg == 0)
1128:                     pc = FX;
1129:             }
1130:             else if (irm >= 0xE0 && irm < 0xF0)
1131:                 pc = FX;
1132:             break;
1133: 
1134:         case 0xDF:
1135:             if (irm < 0xC0 && (reg == 0 || reg == 5))
1136:                 pc = FX;
1137:             break;
1138: 
1139:         case 0xFE:
1140:             if (reg == 0 || reg == 1)           // INC/DEC EA
1141:                 pc = UV;
1142:             break;
1143:         case 0xFF:
1144:             if (reg == 0 || reg == 1)           // INC/DEC EA
1145:                 pc = UV;
1146:             else if (reg == 2 || reg == 4)      // CALL/JMP near ptr EA
1147:                 pc = PE|PV;
1148:             else if (reg == 6 && mod == 3)      // PUSH reg
1149:                 pc = PE | UV;
1150:             break;
1151:     }
1152:     if (c->Iflags & CFPREFIX && pc == UV)       // if prefix byte
1153:         pc = PU;
1154:     return pc;
1155: }
1156: 
1157: /******************************************
1158:  * For an instruction, determine what is read
1159:  * and what is written, and what is used for addressing.
1160:  * Determine operand size if EA (larger is ok).
1161:  */
1162: 
1163: STATIC void getinfo(Cinfo *ci,code *c)
1164: {
1165:     memset(ci,0,sizeof(Cinfo));
1166:     if (!c)
1167:         return;
1168:     ci->c = c;
1169: 
1170:     if (PRO)
1171:     {
1172:         ci->uops = uops(c);
1173:         ci->isz = calccodsize(c);
1174:     }
1175:     else
1176:         ci->pair = pair_class(c);
1177: 
1178:     unsigned char op;
1179:     unsigned char op2;
1180:     unsigned char irm,mod,reg,rm;
1181:     unsigned a32;
1182:     int pc;
1183:     unsigned r,w;
1184:     int sz = I32 ? 4 : 2;
1185: 
1186:     ci->r = 0;
1187:     ci->w = 0;
1188:     ci->a = 0;
1189:     op = c->Iop & 0xFF;
1190:     if ((c->Iop & 0xFF00) == 0x0F00)
1191:         op = 0x0F;
1192:     //printf("\tgetinfo %x, op %x \n",c,op);
1193:     pc = pentcycl[op];
1194:     a32 = I32;
1195:     if (c->Iflags & CFaddrsize)
1196:         a32 ^= 1;
1197:     if (c->Iflags & CFopsize)
1198:         sz ^= 2 | 4;
1199:     irm = c->Irm;
1200:     mod = (irm >> 6) & 3;
1201:     reg = (irm >> 3) & 7;
1202:     rm = irm & 7;
1203: 
1204:     r = oprw[op][0];
1205:     w = oprw[op][1];
1206: 
1207:     switch (op)
1208:     {
1209:         case 0x50:
1210:         case 0x51:
1211:         case 0x52:
1212:         case 0x53:
1213:         case 0x55:
1214:         case 0x56:
1215:         case 0x57:                              // PUSH reg
1216:             ci->flags |= CIFLpush;
1217:         case 0x54:                              // PUSH ESP
1218:         case 0x6A:                              // PUSH imm8
1219:         case 0x68:                              // PUSH imm
1220:         case 0x0E:
1221:         case 0x16:
1222:         case 0x1E:
1223:         case 0x06:
1224:         case 0x9C:
1225:         Lpush:
1226:             ci->spadjust = -sz;
1227:             ci->a |= mSP;
1228:             break;
1229: 
1230:         case 0x58:
1231:         case 0x59:
1232:         case 0x5A:
1233:         case 0x5B:
1234:         case 0x5C:
1235:         case 0x5D:
1236:         case 0x5E:
1237:         case 0x5F:                              // POP reg
1238:         case 0x1F:
1239:         case 0x07:
1240:         case 0x17:
1241:         case 0x9D:                              // POPF
1242:         Lpop:
1243:             ci->spadjust = sz;
1244:             ci->a |= mSP;
1245:             break;
1246: 
1247:         case 0x80:
1248:             if (reg == 7)                       // CMP
1249:                 c->Iflags |= CFpsw;
1250:             r = B | grprw[0][reg][0];           // Grp 1 (byte)
1251:             w = B | grprw[0][reg][1];
1252:             break;
1253: 
1254:         case 0x81:
1255:         case 0x83:
1256:             if (reg == 7)                       // CMP
1257:                 c->Iflags |= CFpsw;
1258:             else if (irm == modregrm(3,0,SP))   // ADD ESP,imm
1259:             {
1260:                 assert(c->IFL2 == FLconst);
1261:                 ci->spadjust = (op == 0x81) ? c->IEV2.Vint : (signed char)c->IEV2.Vint;
1262:             }
1263:             else if (irm == modregrm(3,5,SP))   // SUB ESP,imm
1264:             {
1265:                 assert(c->IFL2 == FLconst);
1266:                 ci->spadjust = (op == 0x81) ? -c->IEV2.Vint : -(signed char)c->IEV2.Vint;
1267:             }
1268:             r = grprw[0][reg][0];               // Grp 1
1269:             w = grprw[0][reg][1];
1270:             break;
1271: 
1272:         case 0x8F:
1273:             if (reg == 0)                       // POP rm
1274:                 goto Lpop;
1275:             break;
1276: 
1277:         case 0xA0:
1278:         case 0xA1:
1279:         case 0xA2:
1280:         case 0xA3:
1281:             // Fake having an EA to simplify code in conflict()
1282:             ci->flags |= CIFLea;
1283:             ci->reg = 0;
1284:             ci->sibmodrm = a32 ? modregrm(0,0,5) : modregrm(0,0,6);
1285:             c->IFL1 = c->IFL2;
1286:             c->IEV1 = c->IEV2;
1287:             break;
1288: 
1289:         case 0xC2:
1290:         case 0xC3:
1291:         case 0xCA:
1292:         case 0xCB:                              // RET
1293:             ci->a |= mSP;
1294:             break;
1295: 
1296:         case 0xE8:
1297:             if (c->Iflags & CFclassinit)        // call to __j_classinit
1298:             {   r = 0;
1299:                 w = F;
1300: #if CLASSINIT2
1301:                 ci->pair = UV;                  // it is patched to CMP EAX,0
1302: #else
1303:                 ci->pair = NP;
1304: #endif
1305:             }
1306:             break;
1307: 
1308:         case 0xF6:
1309:             r = grprw[3][reg][0];               // Grp 3, byte version
1310:             w = grprw[3][reg][1];
1311:             break;
1312: 
1313:         case 0xF7:
1314:             r = grprw[1][reg][0];               // Grp 3
1315:             w = grprw[1][reg][1];
1316:             break;
1317: 
1318:         case 0x0F:
1319:             op2 = c->Iop & 0xFF;
1320:             if ((op2 & 0xF0) == 0x80)           // if Jxx instructions
1321:             {
1322:                 ci->r = F | N;
1323:                 ci->w = N;
1324:                 goto Lret;
1325:             }
1326:             ci->r = N;
1327:             ci->w = N;          // copout for now
1328:             goto Lret;
1329: 
1330:         case 0xD7:                              // XLAT
1331:             ci->a = mAX | mBX;
1332:             break;
1333: 
1334:         case 0xFF:
1335:             r = grprw[2][reg][0];               // Grp 5
1336:             w = grprw[2][reg][1];
1337:             if (reg == 6)                       // PUSH rm
1338:                 goto Lpush;
1339:             break;
1340: 
1341:         case 0x38:
1342:         case 0x39:
1343:         case 0x3A:
1344:         case 0x3B:
1345:         case 0x3C:                              // CMP AL,imm8
1346:         case 0x3D:                              // CMP EAX,imm32
1347:             // For CMP opcodes, always test for flags
1348:             c->Iflags |= CFpsw;
1349:             break;
1350: 
1351:         case 0xD0:
1352:         case 0xD1:
1353:         case 0xD2:
1354:         case 0xD3:
1355:         case 0xC0:
1356:         case 0xC1:
1357:             if (reg == 2 || reg == 3)           // if RCL or RCR
1358:                 c->Iflags |= CFpsw;             // always test for flags
1359:             break;
1360: 
1361:         case 0xD8:
1362:         case 0xD9:
1363:         case 0xDA:
1364:         case 0xDB:
1365:         case 0xDC:
1366:         case 0xDD:
1367:         case 0xDE:
1368:         case 0xDF:
1369:             if (irm < 0xC0)
1370:             {   r = grpf1[op - 0xD8][reg][0];
1371:                 w = grpf1[op - 0xD8][reg][1];
1372:                 switch (op)
1373:                 {
1374:                     case 0xD8:
1375:                         if (reg == 3)           // if FCOMP
1376:                             ci->fpuadjust = -1;
1377:                         else
1378:                             ci->fp_op = FPfop;
1379:                         break;
1380: 
1381:                     case 0xD9:
1382:                         if (reg == 0)           // if FLD float
1383:                         {   ci->fpuadjust = 1;
1384:                             ci->fp_op = FPfld;
1385:                         }
1386:                         else if (reg == 3)      // if FSTP float
1387:                         {   ci->fpuadjust = -1;
1388:                             ci->fp_op = FPfstp;
1389:                         }
1390:                         else if (reg == 5 || reg == 7)
1391:                             sz = 2;
1392:                         else if (reg == 4 || reg == 6)
1393:                             sz = 28;
1394:                         break;
1395:                     case 0xDA:
1396:                         if (reg == 3)           // if FICOMP
1397:                             ci->fpuadjust = -1;
1398:                         break;
1399:                     case 0xDB:
1400:                         if (reg == 0 || reg == 5)
1401:                         {   ci->fpuadjust = 1;
1402:                             ci->fp_op = FPfld;  // FILD / FLD long double
1403:                         }
1404:                         if (reg == 3 || reg == 7)
1405:                             ci->fpuadjust = -1;
1406:                         if (reg == 7)
1407:                             ci->fp_op = FPfstp; // FSTP long double
1408:                         if (reg == 5 || reg == 7)
1409:                             sz = 10;
1410:                         break;
1411:                     case 0xDC:
1412:                         sz = 8;
1413:                         if (reg == 3)           // if FCOMP
1414:                             ci->fpuadjust = -1;
1415:                         else
1416:                             ci->fp_op = FPfop;
1417:                         break;
1418:                     case 0xDD:
1419:                         if (reg == 0)           // if FLD double
1420:                         {   ci->fpuadjust = 1;
1421:                             ci->fp_op = FPfld;
1422:                         }
1423:                         if (reg == 3)           // if FSTP double
1424:                         {   ci->fpuadjust = -1;
1425:                             ci->fp_op = FPfstp;
1426:                         }
1427:                         if (reg == 7)
1428:                             sz = 2;
1429:                         else if (reg == 4 || reg == 6)
1430:                             sz = 108;
1431:                         else
1432:                             sz = 8;
1433:                         break;
1434:                     case 0xDE:
1435:                         sz = 2;
1436:                         if (reg == 3)           // if FICOMP
1437:                             ci->fpuadjust = -1;
1438:                         break;
1439:                     case 0xDF:
1440:                         sz = 2;
1441:                         if (reg == 4 || reg == 6)
1442:                             sz = 10;
1443:                         else if (reg == 5 || reg == 7)
1444:                             sz = 8;
1445:                         if (reg == 0 || reg == 4 || reg == 5)
1446:                             ci->fpuadjust = 1;
1447:                         else if (reg == 3 || reg == 6 || reg == 7)
1448:                             ci->fpuadjust = -1;
1449:                         break;
1450:                 }
1451:                 break;
1452:             }
1453:             else if (op == 0xDE)
1454:             {   ci->fpuadjust = -1;             // pop versions of Fop's
1455:                 if (irm == 0xD9)
1456:                     ci->fpuadjust = -2;         // FCOMPP
1457:             }
1458: 
1459:             // Most floating point opcodes aren't staged, but are
1460:             // sent right through, in order to make use of the large
1461:             // latencies with floating point instructions.
1462:             if (ci->fp_op == FPfld ||
1463:                 (op == 0xD9 && (irm & 0xF8) == 0xC0))
1464:                 ;                               // FLD ST(i)
1465:             else
1466:                 ci->flags |= CIFLnostage;
1467: 
1468:             switch (op)
1469:             {
1470:                 case 0xD8:
1471:                     r = S;
1472:                     w = C;
1473:                     if ((irm & ~7) == 0xD0)
1474:                         w |= S;
1475:                     break;
1476:                 case 0xD9:
1477:                     // FCHS or FABS or FSQRT
1478:                     if (irm == 0xE0 || irm == 0xE1 || irm == 0xFA)
1479:                         ci->fp_op = FPfop;
1480:                     r = S;
1481:                     w = S|C;
1482:                     break;
1483:                 case 0xDA:
1484:                     if (irm == 0xE9)    // FUCOMPP
1485:                     {   r = S;
1486:                         w = S|C;
1487:                         break;
1488:                     }
1489:                     break;
1490:                 case 0xDB:
1491:                     if (irm == 0xE2)    // FCLEX
1492:                     {   r = 0;
1493:                         w = C;
1494:                         break;
1495:                     }
1496:                     if (irm == 0xE3)    // FINIT
1497:                     {   r = 0;
1498:                         w = S|C;
1499:                         break;
1500:                     }
1501:                     break;
1502:                 case 0xDC:
1503:                 case 0xDE:
1504:                     if ((irm & 0xF0) != 0xD0)
1505:                     {   r = S;
1506:                         w = S|C;
1507:                         break;
1508:                     }
1509:                     break;
1510:                 case 0xDD:
1511:                     // Not entirely correct, but conservative
1512:                     r = S;
1513:                     w = S|C;
1514:                     break;
1515:                 case 0xDF:
1516:                     if (irm == 0xE0)    // FSTSW AX
1517:                     {   r = C;
1518:                         w = mAX;
1519:                         break;
1520:                     }
1521:                     break;
1522:             }
1523:             break;
1524: #if DEBUG
1525:         default:
1526:             //printf("\t\tNo special case\n");
1527:             break;
1528: #endif
1529:     }
1530: 
1531:     if ((r | w) & B)                            // if byte operation
1532:         sz = 1;                                 // operand size is 1
1533: 
1534:     ci->r = r & ~(R | EA);
1535:     ci->w = w & ~(R | EA);
1536:     if (r & R)
1537:         ci->r |= mask[(r & B) ? (reg & 3) : reg];
1538:     if (w & R)
1539:         ci->w |= mask[(w & B) ? (reg & 3) : reg];
1540: 
1541:     // OR in bits for EA addressing mode
1542:     if ((r | w) & EA)
1543:     {   unsigned char sib;
1544: 
1545:         sib = 0;
1546:         switch (mod)
1547:         {
1548:             case 0:
1549:                 if (a32)
1550:                 {
1551:                     if (rm == 4)
1552:                     {   sib = c->Isib;
1553:                         if ((sib & modregrm(0,7,0)) != modregrm(0,4,0))
1554:                             ci->a |= mask[(sib >> 3) & 7];      // index register
1555:                         if ((sib & 7) != 5)
1556:                             ci->a |= mask[sib & 7];             // base register
1557:                     }
1558:                     else if (rm != 5)
1559:                         ci->a |= mask[rm];
1560:                 }
1561:                 else
1562:                 {   static unsigned char ea16[8] = {mBX|mSI,mBX|mDI,mBP|mSI,mBP|mDI,mSI,mDI,0,mBX};
1563:                     ci->a |= ea16[rm];
1564:                 }
1565:                 goto Lmem;
1566: 
1567:             case 1:
1568:             case 2:
1569:                 if (a32)
1570:                 {
1571:                     if (rm == 4)
1572:                     {   sib = c->Isib;
1573:                         if ((sib & modregrm(0,7,0)) != modregrm(0,4,0))
1574:                             ci->a |= mask[(sib >> 3) & 7];      // index register
1575:                         ci->a |= mask[sib & 7];                 // base register
1576:                     }
1577:                     else
1578:                         ci->a |= mask[rm];
1579:                 }
1580:                 else
1581:                 {   static unsigned char ea16[8] = {mBX|mSI,mBX|mDI,mBP|mSI,mBP|mDI,mSI,mDI,mBP,mBX};
1582:                     ci->a |= ea16[rm];
1583:                 }
1584: 
1585:             Lmem:
1586:                 if (r & EA)
1587:                     ci->r |= mMEM;
1588:                 if (w & EA)
1589:                     ci->w |= mMEM;
1590:                 ci->flags |= CIFLea;
1591:                 break;
1592: 
1593:             case 3:
1594:                 if (r & EA)
1595:                     ci->r |= mask[(r & B) ? (rm & 3) : rm];
1596:                 if (w & EA)
1597:                     ci->w |= mask[(w & B) ? (rm & 3) : rm];
1598:                 break;
1599:         }
1600:         // Adjust sibmodrm so that addressing modes can be compared simply
1601:         irm &= modregrm(3,0,7);
1602:         if (a32)
1603:         {
1604:             if (irm != modregrm(0,0,5))
1605:             {
1606:                 switch (mod)
1607:                 {   case 0:
1608:                         if ((sib & 7) != 5)     // if not disp32[index]
1609:                         {   c->IFL1 = FLconst;
1610:                             c->IEVpointer1 = 0;
1611:                             irm |= 0x80;
1612:                         }
1613:                         break;
1614:                     case 1:
1615:                         c->IEVpointer1 = (signed char) c->IEVpointer1;
1616:                         irm = modregrm(2,0,rm);
1617:                         break;
1618:                 }
1619:             }
1620:         }
1621:         else
1622:         {
1623:             if (irm != modregrm(0,0,6))
1624:             {
1625:                 switch (mod)
1626:                 {   case 0:
1627:                         c->IFL1 = FLconst;
1628:                         c->IEVpointer1 = 0;
1629:                         irm |= 0x80;
1630:                         break;
1631:                     case 1:
1632:                         c->IEVpointer1 = (signed char) c->IEVpointer1;
1633:                         irm = modregrm(2,0,rm);
1634:                         break;
1635:                 }
1636:             }
1637:         }
1638: 
1639:         ci->r |= ci->a;
1640:         ci->reg = reg;
1641:         ci->sibmodrm = (sib << 8) | irm;
1642:     }
1643: Lret:
1644:     if (ci->w & mSP)                    // if stack pointer is modified
1645:         ci->w |= mMEM;                  // then we are implicitly writing to memory
1646:     if (op == 0x8D)                     // if LEA
1647:         ci->r &= ~mMEM;                 // memory is not actually read
1648:     ci->sz = sz;
1649: #if DEBUG
1650:     //printf("\t\t"); ci->print();
1651: #endif
1652: }
1653: 
1654: /******************************************
1655:  * Determine if two instructions can pair.
1656:  * Assume that in general, cu can pair in the U pipe and cv in the V.
1657:  * Look for things like register contentions.
1658:  * Input:
1659:  *      cu      instruction for U pipe
1660:  *      cv      instruction for V pipe
1661:  * Returns:
1662:  *      !=0 if they can pair
1663:  */
1664: 
1665: STATIC int pair_test(Cinfo *cu,Cinfo *cv)
1666: {   unsigned pcu;
1667:     unsigned pcv;
1668:     unsigned r1,w1;
1669:     unsigned r2,w2;
1670:     unsigned x;
1671: 
1672:     pcu = cu->pair;
1673:     if (!(pcu & PU))
1674:     {
1675:         // See if pairs with FXCH and cv is FXCH
1676:         if (pcu & FX && cv->c->Iop == 0xD9 && (cv->c->Irm & ~7) == 0xC8)
1677:             goto Lpair;
1678:         goto Lnopair;
1679:     }
1680:     pcv = cv->pair;
1681:     if (!(pcv & PV))
1682:         goto Lnopair;
1683: 
1684:     r1 = cu->r;
1685:     w1 = cu->w;
1686:     r2 = cv->r;
1687:     w2 = cv->w;
1688: 
1689:     x = w1 & (r2 | w2) & ~(F|mMEM);     // register contention
1690:     if (x &&                            // if register contention
1691:         !(x == mSP && pcu & pcv & PE)   // and not exception
1692:        )
1693:         goto Lnopair;
1694: 
1695:     // Look for flags contention
1696:     if (w1 & r2 & F && !(pcv & PF))
1697:         goto Lnopair;
1698: 
1699: Lpair:
1700:     return 1;
1701: 
1702: Lnopair:
1703:     return 0;
1704: }
1705: 
1706: /******************************************
1707:  * Determine if two instructions have an AGI or register contention.
1708:  * Returns:
1709:  *      !=0 if they have an AGI
1710:  */
1711: 
1712: STATIC int pair_agi(Cinfo *c1,Cinfo *c2)
1713: {   unsigned x;
1714: 
1715:     x = c1->w & c2->a;
1716:     return x && !(x == mSP && c1->pair & c2->pair & PE);
1717: }
1718: 
1719: /********************************************
1720:  * Determine if three instructions can decode simultaneously
1721:  * in Pentium Pro and Pentium II.
1722:  * Input:
1723:  *      c0,c1,c2        candidates for decoders 0,1,2
1724:  *                      c2 can be NULL
1725:  * Returns:
1726:  *      !=0 if they can decode simultaneously
1727:  */
1728: 
1729: STATIC int triple_test(Cinfo *c0,Cinfo *c1,Cinfo *c2)
1730: {   int c2isz;
1731: 
1732:     assert(c0);
1733:     if (!c1)
1734:         goto Lnopair;
1735:     c2isz = c2 ? c2->isz : 0;
1736:     if (c0->isz > 7 || c1->isz > 7 || c2isz > 7 ||
1737:         c0->isz + c1->isz + c2isz > 16)
1738:         goto Lnopair;
1739: 
1740:     // 4-1-1 decode
1741:     if (c1->uops > 1 ||
1742:         (c2 && c2->uops > 1))
1743:         goto Lnopair;
1744: 
1745: Lpair:
warning C4102: 'Lpair' : unreferenced label
1746: return 1; 1747: 1748: Lnopair: 1749: return 0; 1750: } 1751: 1752: /******************************************** 1753: * Get next instruction worth looking at for scheduling. 1754: * Returns: 1755: * NULL no more instructions 1756: */ 1757: 1758: STATIC code * cnext(code *c) 1759: { 1760: while (1) 1761: { 1762: c = code_next(c); 1763: if (!c) 1764: break; 1765: if (c->Iflags & (CFtarg | CFtarg2)) 1766: break; 1767: if (!(c->Iop == NOP || 1768: c->Iop == (ESCAPE | ESClinnum))) 1769: break; 1770: } 1771: return c; 1772: } 1773: 1774: /****************************************** 1775: * Instruction scheduler. 1776: * Input: 1777: * c list of instructions to schedule 1778: * scratch scratch registers we can use 1779: * Returns: 1780: * revised list of scheduled instructions 1781: */ 1782: 1783: /////////////////////////////////// 1784: // Determine if c1 and c2 are swappable. 1785: // c1 comes before c2. 1786: // If they do not conflict 1787: // return 0 1788: // If they do conflict 1789: // return 0x100 + delay_clocks 1790: // Input: 1791: // fpsched if 1, then adjust fxch_pre and fxch_post to swap, 1792: // then return 0 1793: // if 2, then adjust ci1 as well as ci2 1794: 1795: STATIC int conflict(Cinfo *ci1,Cinfo *ci2,int fpsched) 1796: { 1797: code *c1; 1798: code *c2; 1799: unsigned r1,w1,a1; 1800: unsigned r2,w2,a2; 1801: int sz1,sz2; 1802: int i = 0; 1803: int delay_clocks; 1804: 1805: c1 = ci1->c; 1806: c2 = ci2->c; 1807: 1808: //printf("conflict %x %x\n",c1,c2); 1809: 1810: r1 = ci1->r; 1811: w1 = ci1->w; 1812: a1 = ci1->a; 1813: sz1 = ci1->sz; 1814: 1815: r2 = ci2->r; 1816: w2 = ci2->w; 1817: a2 = ci2->a; 1818: sz2 = ci2->sz; 1819: 1820: //printf("r1 %lx w1 %lx a1 %lx sz1 %x\n",r1,w1,a1,sz1); 1821: //printf("r2 %lx w2 %lx a2 %lx sz2 %x\n",r2,w2,a2,sz2); 1822: 1823: if ((c1->Iflags | c2->Iflags) & CFvolatile) 1824: goto Lconflict; 1825: 1826: // Determine if we should handle FPU register conflicts separately 1827: //if (fpsched) printf("fp_op %d,%d:\n",ci1->fp_op,ci2->fp_op); 1828: if (fpsched && ci1->fp_op && ci2->fp_op) 1829: { 1830: w1 &= ~(S|C); 1831: r1 &= ~(S|C); 1832: w2 &= ~(S|C); 1833: r2 &= ~(S|C); 1834: } 1835: else 1836: fpsched = 0; 1837: 1838: if ((r1 | r2) & N) 1839: { 1840: goto Lconflict; 1841: } 1842: 1843: #if 0 1844: if (c1->Iop == 0xFF && c2->Iop == 0x8B) 1845: { c1->print(); c2->print(); i = 1; 1846: printf("r1=%lx, w1=%lx, a1=%lx, sz1=%d, r2=%lx, w2=%lx, a2=%lx, sz2=%d\n",r1,w1,a1,sz1,r2,w2,a2,sz2); 1847: } 1848: #endif 1849: L1: 1850: if (w1 & r2 || (r1 | w1) & w2) 1851: { unsigned char ifl1,ifl2; 1852: 1853: if (i) printf("test\n"); 1854: 1855: #if 0 1856: if (c1->IFL1 != c2->IFL1) printf("t1\n"); 1857: if ((c1->Irm & modregrm(3,0,7)) != (c2->Irm & modregrm(3,0,7))) printf("t2\n"); 1858: if ((issib(c1->Irm) && c1->Isib != c2->Isib)) printf("t3\n"); 1859: if (c1->IEVpointer1 + sz1 <= c2->IEVpointer1) printf("t4\n"); 1860: if (c2->IEVpointer1 + sz2 <= c1->IEVpointer1) printf("t5\n"); 1861: #endif 1862: 1863: #if 1 // make sure CFpsw is reliably set 1864: if (w1 & w2 & F && // if both instructions write to flags 1865: w1 != F && 1866: w2 != F && 1867: !((r1 | r2) & F) && // but neither instruction reads them 1868: !((c1->Iflags | c2->Iflags) & CFpsw)) // and we don't care about flags 1869: { 1870: w1 &= ~F; 1871: w2 &= ~F; // remove conflict 1872: goto L1; // and try again 1873: } 1874: #endif 1875: // If other than the memory reference is a conflict 1876: if (w1 & r2 & ~mMEM || (r1 | w1) & w2 & ~mMEM) 1877: { if (i) printf("\t1\n"); 1878: if (i) printf("r1=%x, w1=%x, a1=%x, sz1=%d, r2=%x, w2=%x, a2=%x, sz2=%d\n",r1,w1,a1,sz1,r2,w2,a2,sz2); 1879: goto Lconflict; 1880: } 1881: 1882: // If referring to distinct types, then no dependency 1883: if (c1->Irex && c2->Irex && c1->Irex != c2->Irex) 1884: goto Lswap; 1885: 1886: ifl1 = c1->IFL1; 1887: ifl2 = c2->IFL1; 1888: 1889: // Special case: Allow indexed references using registers other than 1890: // ESP and EBP to be swapped with PUSH instructions 1891: if (((c1->Iop & ~7) == 0x50 || // PUSH reg 1892: c1->Iop == 0x6A || // PUSH imm8 1893: c1->Iop == 0x68 || // PUSH imm16/imm32 1894: (c1->Iop == 0xFF && ci1->reg == 6) // PUSH EA 1895: ) && 1896: ci2->flags & CIFLea && !(a2 & mSP) && 1897: !(a2 & mBP && (long)c2->IEVpointer1 < 0) 1898: ) 1899: { 1900: if (c1->Iop == 0xFF) 1901: { 1902: if (!(w2 & mMEM)) 1903: goto Lswap; 1904: } 1905: else 1906: goto Lswap; 1907: } 1908: 1909: // Special case: Allow indexed references using registers other than 1910: // ESP and EBP to be swapped with PUSH instructions 1911: if (((c2->Iop & ~7) == 0x50 || // PUSH reg 1912: c2->Iop == 0x6A || // PUSH imm8 1913: c2->Iop == 0x68 || // PUSH imm16/imm32 1914: (c2->Iop == 0xFF && ci2->reg == 6) // PUSH EA 1915: ) && 1916: ci1->flags & CIFLea && !(a1 & mSP) && 1917: !(a2 & mBP && (long)c2->IEVpointer1 < 0) 1918: ) 1919: { 1920: if (c2->Iop == 0xFF) 1921: { 1922: if (!(w1 & mMEM)) 1923: goto Lswap; 1924: } 1925: else 1926: goto Lswap; 1927: } 1928: 1929: // If not both an EA addressing mode, conflict 1930: if (!(ci1->flags & ci2->flags & CIFLea)) 1931: { if (i) printf("\t2\n"); 1932: goto Lconflict; 1933: } 1934: 1935: if (ci1->sibmodrm == ci2->sibmodrm) 1936: { if (ifl1 != ifl2) 1937: goto Lswap; 1938: switch (ifl1) 1939: { 1940: case FLconst: 1941: if (c1->IEV1.Vint != c2->IEV1.Vint && 1942: (c1->IEV1.Vint + sz1 <= c2->IEV1.Vint || 1943: c2->IEV1.Vint + sz2 <= c1->IEV1.Vint)) 1944: goto Lswap; 1945: break; 1946: case FLdatseg: 1947: if (c1->IEVseg1 != c2->IEVseg1 || 1948: c1->IEV1.Vint + sz1 <= c2->IEV1.Vint || 1949: c2->IEV1.Vint + sz2 <= c1->IEV1.Vint) 1950: goto Lswap; 1951: break; 1952: } 1953: } 1954: 1955: if ((c1->Iflags | c2->Iflags) & CFunambig && 1956: (ifl1 != ifl2 || 1957: ci1->sibmodrm != ci2->sibmodrm || 1958: (c1->IEV1.Vint != c2->IEV1.Vint && 1959: (c1->IEV1.Vint + sz1 <= c2->IEV1.Vint || 1960: c2->IEV1.Vint + sz2 <= c1->IEV1.Vint) 1961: ) 1962: ) 1963: ) 1964: { 1965: // Assume that [EBP] and [ESP] can point to the same location 1966: if (((a1 | a2) & (mBP | mSP)) == (mBP | mSP)) 1967: goto Lconflict; 1968: goto Lswap; 1969: } 1970: 1971: if (i) printf("\t3\n"); 1972: goto Lconflict; 1973: } 1974: 1975: Lswap: 1976: if (fpsched) 1977: { unsigned char a1,b1;
warning C6246: Local declaration of 'a1' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '1799' of 'c:\projects\extern\d\dmd\src\backend\cgsched.c': Lines: 1799
1978: unsigned char a2,b2;
warning C6246: Local declaration of 'a2' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '1800' of 'c:\projects\extern\d\dmd\src\backend\cgsched.c': Lines: 1800
1979: 1980: //printf("\tfpsched %d,%d:\n",ci1->fp_op,ci2->fp_op); 1981: a1 = ci1->fxch_pre; 1982: b1 = ci1->fxch_post; 1983: a2 = ci2->fxch_pre; 1984: b2 = ci2->fxch_post; 1985: 1986: #define X(a,b) ((a << 8) | b) 1987: switch (X(ci1->fp_op,ci2->fp_op)) 1988: { 1989: case X(FPfstp,FPfld): 1990: if (a1 || b1) 1991: goto Lconflict; 1992: if (a2) 1993: goto Lconflict; 1994: if (b2 == 0) 1995: ci2->fxch_post++; 1996: else if (b2 == 1) 1997: { 1998: ci2->fxch_pre++; 1999: ci2->fxch_post++; 2000: } 2001: else 2002: { 2003: goto Lconflict; 2004: } 2005: break; 2006: 2007: case X(FPfstp,FPfop): 2008: if (a1 || b1) 2009: goto Lconflict; 2010: ci2->fxch_pre++; 2011: ci2->fxch_post++; 2012: break; 2013: 2014: case X(FPfop,FPfop): 2015: if (a1 == 0 && b1 == 1 && a2 == 0 && b2 == 0) 2016: { ci2->fxch_pre = 1; 2017: ci2->fxch_post = 1; 2018: break; 2019: } 2020: if (a1 == 0 && b1 == 0 && a2 == 1 && b2 == 1) 2021: break; 2022: goto Lconflict; 2023: 2024: case X(FPfop,FPfld): 2025: if (a1 || b1) 2026: goto Lconflict; 2027: if (a2) 2028: goto Lconflict; 2029: if (b2) 2030: break; 2031: else if (fpsched == 2) 2032: ci1->fxch_post = 1; 2033: ci2->fxch_post = 1; 2034: break; 2035: 2036: default: 2037: goto Lconflict; 2038: } 2039: #undef X 2040: //printf("\tpre = %d, post = %d\n",ci2->fxch_pre,ci2->fxch_post); 2041: } 2042: 2043: //printf("w1 = x%x, w2 = x%x\n",w1,w2); 2044: if (i) printf("no conflict\n\n"); 2045: return 0; 2046: 2047: Lconflict: 2048: //printf("r1=%x, w1=%x, r2=%x, w2=%x\n",r1,w1,r2,w2); 2049: delay_clocks = 0; 2050: 2051: // Determine if AGI 2052: if (!PRO && pair_agi(ci1,ci2)) 2053: delay_clocks = 1; 2054: 2055: // Special delays for floating point 2056: if (fpsched) 2057: { if (ci1->fp_op == FPfld && ci2->fp_op == FPfstp) 2058: delay_clocks = 1; 2059: else if (ci1->fp_op == FPfop && ci2->fp_op == FPfstp) 2060: delay_clocks = 3; 2061: else if (ci1->fp_op == FPfop && ci2->fp_op == FPfop) 2062: delay_clocks = 2; 2063: } 2064: else if (PRO) 2065: { 2066: // Look for partial register write stalls 2067: if (w1 & r2 & ALLREGS && sz1 < sz2) 2068: delay_clocks = 7; 2069: } 2070: else if ((w1 | r1) & (w2 | r2) & (C | S)) 2071: { int reg; 2072: int op; 2073: 2074: op = c1->Iop; 2075: reg = c1->Irm & modregrm(0,7,0); 2076: if (ci1->fp_op == FPfld || 2077: (op == 0xD9 && (c1->Irm & 0xF8) == 0xC0) 2078: ) 2079: ; // FLD 2080: else if (op == 0xD9 && (c1->Irm & 0xF8) == 0xC8) 2081: ; // FXCH 2082: else if (c2->Iop == 0xD9 && (c2->Irm & 0xF8) == 0xC8) 2083: ; // FXCH 2084: else 2085: delay_clocks = 3; 2086: } 2087: 2088: if (i) printf("conflict %d\n\n",delay_clocks); 2089: return 0x100 + delay_clocks; 2090: } 2091: 2092: struct Schedule 2093: { 2094: #define TBLMAX (2*3*20) // must be divisible by both 2 and 3 2095: // (U,V pipe in Pentium, 3 decode units 2096: // in Pentium Pro) 2097: 2098: Cinfo *tbl[TBLMAX]; // even numbers are U pipe, odd numbers are V 2099: int tblmax; // max number of slots used 2100: 2101: Cinfo cinfo[TBLMAX]; 2102: int cinfomax; 2103: 2104: list_t stagelist; // list of instructions in staging area 2105: 2106: int fpustackused; // number of slots in FPU stack that are used 2107: 2108: void initialize(int fpustackinit); // initialize scheduler 2109: int stage(code *c); // stage instruction 2110: int insert(Cinfo *ci); // insert c into schedule 2111: code **assemble(code **pc); // reassemble scheduled instructions 2112: }; 2113: 2114: /****************************** 2115: */ 2116: 2117: void Schedule::initialize(int fpustackinit) 2118: { 2119: //printf("Schedule::initialize(fpustackinit = %d)\n", fpustackinit); 2120: memset(this,0,sizeof(Schedule)); 2121: fpustackused = fpustackinit; 2122: } 2123: 2124: /****************************** 2125: */ 2126: 2127: code **Schedule::assemble(code **pc) 2128: { int i; 2129: list_t l; 2130: code *c; 2131: 2132: #ifdef DEBUG 2133: if (debugs) printf("assemble:\n"); 2134: #endif 2135: assert(!*pc); 2136: 2137: // Try to insert the rest of the staged instructions 2138: for (l = stagelist; l; l = list_next(l)) 2139: { Cinfo *ci; 2140: 2141: ci = (Cinfo *)list_ptr(l); 2142: if (!insert(ci)) 2143: break; 2144: } 2145: 2146: // Get the instructions out of the schedule table 2147: assert((unsigned)tblmax <= TBLMAX); 2148: for (i = 0; i < tblmax; i++) 2149: { Cinfo *ci; 2150: 2151: ci = tbl[i]; 2152: #ifdef DEBUG 2153: if (debugs) 2154: { 2155: if (PRO) 2156: { static char tbl[3][4] = { "0 "," 1 "," 2" }; 2157: 2158: if (ci) 2159: printf("%s %d ",tbl[i - ((i / 3) * 3)],ci->uops); 2160: else 2161: printf("%s ",tbl[i - ((i / 3) * 3)]); 2162: } 2163: else 2164: { 2165: printf((i & 1) ? " V " : "U "); 2166: } 2167: if (ci) 2168: ci->c->print(); 2169: else 2170: printf("\n"); 2171: } 2172: #endif 2173: if (!ci) 2174: continue; 2175: fpustackused += ci->fpuadjust; 2176: //printf("stage()1: fpustackused = %d\n", fpustackused); 2177: c = ci->c; 2178: if (i == 0) 2179: c->Iflags |= CFtarg; // by definition, first is always a jump target 2180: else 2181: c->Iflags &= ~CFtarg; // the rest are not 2182: 2183: // Put in any FXCH prefix 2184: if (ci->fxch_pre) 2185: { code *cf; 2186: assert(i); 2187: cf = gen2(NULL,0xD9,0xC8 + ci->fxch_pre); 2188: *pc = cf; 2189: pc = &code_next(cf); 2190: } 2191: 2192: *pc = c; 2193: do 2194: { 2195: assert(*pc != code_next(*pc)); 2196: pc = &code_next(*pc); 2197: } while (*pc); 2198: 2199: // Put in any FXCH postfix 2200: if (ci->fxch_post) 2201: { int j; 2202: 2203: for (j = i + 1; j < tblmax; j++) 2204: { if (tbl[j]) 2205: { if (tbl[j]->fxch_pre == ci->fxch_post) 2206: { 2207: tbl[j]->fxch_pre = 0; // they cancel each other out 2208: goto L1; 2209: } 2210: break; 2211: } 2212: } 2213: { code *cf; 2214: cf = gen2(NULL,0xD9,0xC8 + ci->fxch_post); 2215: *pc = cf; 2216: pc = &code_next(cf); 2217: } 2218: } 2219: L1: ; 2220: } 2221: 2222: // Just append any instructions left in the staging area 2223: for (; l; l = list_next(l)) 2224: { Cinfo *ci = (Cinfo *)list_ptr(l); 2225: code *c = ci->c;
warning C6246: Local declaration of 'c' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '2130' of 'c:\projects\extern\d\dmd\src\backend\cgsched.c': Lines: 2130
2226: 2227: #ifdef DEBUG 2228: if (debugs) { printf("appending: "); c->print(); } 2229: #endif 2230: *pc = c; 2231: do 2232: { 2233: pc = &code_next(*pc); 2234: 2235: } while (*pc); 2236: fpustackused += ci->fpuadjust; 2237: //printf("stage()2: fpustackused = %d\n", fpustackused); 2238: } 2239: list_free(&stagelist); 2240: 2241: return pc; 2242: } 2243: 2244: /****************************** 2245: * Insert c into scheduling table. 2246: * Returns: 2247: * 0 could not be scheduled; have to start a new one 2248: */ 2249: 2250: int Schedule::insert(Cinfo *ci) 2251: { code *c; 2252: int clocks; 2253: int i; 2254: int ic = 0; 2255: int imin; 2256: targ_size_t offset; 2257: targ_size_t vpointer; 2258: int movesp = 0; 2259: int reg2 = -1; // avoid "may be uninitialized" warning 2260: 2261: //printf("insert "); ci->c->print(); 2262: //printf("insert() %d\n", fpustackused); 2263: c = ci->c; 2264: //printf("\tc->Iop %x\n",c->Iop); 2265: vpointer = c->IEVpointer1; 2266: assert((unsigned)tblmax <= TBLMAX); 2267: if (tblmax == TBLMAX) // if out of space 2268: goto Lnoinsert; 2269: if (tblmax == 0) // if table is empty 2270: { // Just stuff it in the first slot 2271: i = tblmax; 2272: goto Linsert; 2273: } 2274: else if (c->Iflags & (CFtarg | CFtarg2)) 2275: // Jump targets can only be first in the scheduler 2276: goto Lnoinsert; 2277: 2278: // Special case of: 2279: // PUSH reg1 2280: // MOV reg2,x[ESP] 2281: if (c->Iop == 0x8B && 2282: (c->Irm & modregrm(3,0,7)) == modregrm(1,0,4) && 2283: c->Isib == modregrm(0,4,SP) && 2284: c->IFL1 == FLconst && 2285: ((signed char)c->IEVpointer1) >= REGSIZE 2286: ) 2287: { 2288: movesp = 1; // this is a MOV reg2,offset[ESP] 2289: offset = (signed char)c->IEVpointer1; 2290: reg2 = (c->Irm >> 3) & 7; 2291: } 2292: 2293: 2294: // Start at tblmax, and back up until we get a conflict 2295: ic = -1; 2296: imin = 0; 2297: for (i = tblmax; i >= 0; i--) 2298: { Cinfo *cit; 2299: 2300: cit = tbl[i]; 2301: if (!cit) 2302: continue; 2303: 2304: // Look for special case swap 2305: if (movesp && 2306: (cit->c->Iop & ~7) == 0x50 && // if PUSH reg1 2307: (cit->c->Iop & 7) != reg2 && // if reg1 != reg2 2308: ((signed char)c->IEVpointer1) >= -cit->spadjust 2309: )
warning C4146: unary minus operator applied to unsigned type, result still unsigned
warning C4018: '>=' : signed/unsigned mismatch
2310: { 2311: c->IEVpointer1 += cit->spadjust; 2312: //printf("\t1, spadjust = %d, ptr = x%x\n",cit->spadjust,c->IEVpointer1); 2313: continue; 2314: } 2315: 2316: if (movesp && 2317: cit->c->Iop == 0x83 && 2318: cit->c->Irm == modregrm(3,5,SP) && // if SUB ESP,offset 2319: cit->c->IFL2 == FLconst && 2320: ((signed char)c->IEVpointer1) >= -cit->spadjust 2321: )
warning C4146: unary minus operator applied to unsigned type, result still unsigned
warning C4018: '>=' : signed/unsigned mismatch
2322: { 2323: //printf("\t2, spadjust = %d\n",cit->spadjust); 2324: c->IEVpointer1 += cit->spadjust; 2325: continue; 2326: } 2327: 2328: clocks = conflict(cit,ci,1); 2329: if (clocks) 2330: { int j; 2331: 2332: ic = i; // where the conflict occurred 2333: clocks &= 0xFF; // convert to delay count 2334: 2335: // Move forward the delay clocks 2336: if (clocks == 0) 2337: j = i + 1; 2338: else if (PRO) 2339: j = (((i + 3) / 3) * 3) + clocks * 3; 2340: else 2341: { j = ((i + 2) & ~1) + clocks * 2; 2342: 2343: // It's possible we skipped over some AGI generating 2344: // instructions due to movesp. 2345: int k; 2346: for (k = i + 1; k < j; k++) 2347: { 2348: if (k >= TBLMAX) 2349: goto Lnoinsert; 2350: if (tbl[k] && pair_agi(tbl[k],ci)) 2351: { 2352: k = ((k + 2) & ~1) + 1; 2353: } 2354: } 2355: j = k; 2356: } 2357: 2358: if (j >= TBLMAX) // exceed table size? 2359: goto Lnoinsert; 2360: imin = j; // first possible slot c can go in 2361: break; 2362: } 2363: } 2364: 2365: 2366: // Scan forward looking for a hole to put it in 2367: for (i = imin; i < TBLMAX; i++) 2368: { 2369: if (tbl[i]) 2370: { 2371: // In case, due to movesp, we skipped over some AGI instructions 2372: if (!PRO && pair_agi(tbl[i],ci)) 2373: { 2374: i = ((i + 2) & ~1) + 1; 2375: if (i >= TBLMAX) 2376: goto Lnoinsert; 2377: } 2378: } 2379: else 2380: { 2381: if (PRO) 2382: { int i0 = (i / 3) * 3; // index of decode unit 0 2383: Cinfo *ci0; 2384: 2385: assert(((TBLMAX / 3) * 3) == TBLMAX); 2386: switch (i - i0) 2387: { 2388: case 0: // i0 can handle any instruction 2389: goto Linsert; 2390: case 1: 2391: ci0 = tbl[i0]; 2392: if (ci->uops > 1) 2393: { 2394: if (i0 >= imin && ci0->uops == 1) 2395: goto L1; 2396: i++; 2397: break; 2398: } 2399: if (triple_test(ci0,ci,tbl[i0 + 2])) 2400: goto Linsert; 2401: break; 2402: case 2: 2403: ci0 = tbl[i0]; 2404: if (ci->uops > 1) 2405: { 2406: if (i0 >= imin && ci0->uops == 1) 2407: { 2408: if (i >= tblmax) 2409: { if (i + 1 >= TBLMAX) 2410: goto Lnoinsert; 2411: tblmax = i + 1; 2412: } 2413: tbl[i0 + 2] = tbl[i0 + 1]; 2414: tbl[i0 + 1] = ci0; 2415: i = i0; 2416: goto Linsert; 2417: } 2418: break; 2419: } 2420: if (triple_test(ci0,tbl[i0 + 1],ci)) 2421: goto Linsert; 2422: break; 2423: default: 2424: assert(0); 2425: } 2426: } 2427: else 2428: { 2429: assert((TBLMAX & 1) == 0); 2430: if (i & 1) // if V pipe 2431: { 2432: if (pair_test(tbl[i - 1],ci)) 2433: { 2434: goto Linsert; 2435: } 2436: else if (i > imin && pair_test(ci,tbl[i - 1])) 2437: { 2438: L1: 2439: tbl[i] = tbl[i - 1]; 2440: if (i >= tblmax) 2441: tblmax = i + 1; 2442: i--; 2443: //printf("\tswapping with x%02x\n",tbl[i + 1]->c->Iop); 2444: goto Linsert; 2445: } 2446: } 2447: else // will always fit in U pipe 2448: { 2449: assert(!tbl[i + 1]); // because V pipe should be empty
warning C6201: Index '120' is out of valid index range '0' to '119' for possibly stack allocated buffer 'tbl'
2450: goto Linsert; 2451: } 2452: } 2453: } 2454: } 2455: 2456: Lnoinsert: 2457: //printf("\tnoinsert\n"); 2458: c->IEVpointer1 = vpointer; // reset to original value 2459: return 0; 2460: 2461: Linsert: 2462: // Insert at location i 2463: assert(i < TBLMAX); 2464: assert(tblmax <= TBLMAX); 2465: tbl[i] = ci; 2466: //printf("\tinsert at location %d\n",i); 2467: 2468: // If it's a scheduled floating point code, we have to adjust 2469: // the FXCH values 2470: if (ci->fp_op) 2471: { int j; 2472: 2473: ci->fxch_pre = 0; 2474: ci->fxch_post = 0; // start over again 2475: 2476: int fpu = fpustackused; 2477: for (j = 0; j < tblmax; j++) 2478: { 2479: if (tbl[j]) 2480: { 2481: fpu += tbl[j]->fpuadjust; 2482: if (fpu >= 8) // if FPU stack overflow 2483: { tbl[i] = NULL; 2484: //printf("fpu stack overflow\n"); 2485: goto Lnoinsert; 2486: } 2487: } 2488: } 2489: 2490: for (j = tblmax; j > i; j--) 2491: { 2492: if (j < TBLMAX && tbl[j]) 2493: conflict(tbl[j],ci,2); 2494: } 2495: } 2496: 2497: if (movesp) 2498: { // Adjust [ESP] offsets 2499: int j; 2500: 2501: //printf("\tic = %d, inserting at %d\n",ic,i); 2502: assert((unsigned)tblmax <= TBLMAX); 2503: for (j = ic + 1; j < i; j++) 2504: { Cinfo *cit; 2505: 2506: cit = tbl[j]; 2507: if (cit) 2508: { 2509: c->IEVpointer1 -= cit->spadjust; 2510: //printf("\t3, spadjust = %d, ptr = x%x\n",cit->spadjust,c->IEVpointer1); 2511: } 2512: } 2513: } 2514: if (i >= tblmax) 2515: tblmax = i + 1; 2516: 2517: // Now do a hack. Look back at immediately preceding instructions, 2518: // and see if we can swap with a push. 2519: if (0 && movesp) 2520: { int j; 2521: 2522: while (1) 2523: { 2524: for (j = 1; i > j; j++) 2525: if (tbl[i - j]) 2526: break; 2527: 2528: if (i >= j && tbl[i - j] && 2529: (tbl[i - j]->c->Iop & ~7) == 0x50 && // if PUSH reg1 2530: (tbl[i - j]->c->Iop & 7) != reg2 && // if reg1 != reg2 2531: (signed char)c->IEVpointer1 >= REGSIZE) 2532: { 2533: //printf("\t-4 prec, i-j=%d, i=%d\n",i-j,i); 2534: assert((unsigned)i < TBLMAX); 2535: assert((unsigned)(i - j) < TBLMAX); 2536: tbl[i] = tbl[i - j]; 2537: tbl[i - j] = ci; 2538: i -= j; 2539: c->IEVpointer1 -= REGSIZE; 2540: } 2541: else 2542: break; 2543: } 2544: } 2545: 2546: //printf("\tinsert\n"); 2547: return 1; 2548: } 2549: 2550: 2551: /****************************** 2552: * Insert c into staging area. 2553: * Returns: 2554: * 0 could not be scheduled; have to start a new one 2555: */ 2556: 2557: int Schedule::stage(code *c) 2558: { Cinfo *ci; 2559: list_t l; 2560: list_t ln; 2561: int agi; 2562: 2563: //printf("stage: "); c->print(); 2564: if (cinfomax == TBLMAX) // if out of space 2565: goto Lnostage; 2566: ci = &cinfo[cinfomax++]; 2567: getinfo(ci,c); 2568: 2569: if (c->Iflags & (CFtarg | CFtarg2 | CFvolatile)) 2570: { 2571: // Insert anything in stagelist 2572: for (l = stagelist; l; l = ln) 2573: { Cinfo *cs; 2574: 2575: ln = list_next(l); 2576: cs = (Cinfo *)list_ptr(l); 2577: if (!insert(cs)) 2578: return 0; 2579: list_subtract(&stagelist,cs); 2580: } 2581: return insert(ci); 2582: } 2583: 2584: // Look through stagelist, and insert any AGI conflicting instructions 2585: agi = 0; 2586: for (l = stagelist; l; l = ln) 2587: { Cinfo *cs; 2588: 2589: ln = list_next(l); 2590: cs = (Cinfo *)list_ptr(l); 2591: if (pair_agi(cs,ci)) 2592: { 2593: if (!insert(cs)) 2594: goto Lnostage; 2595: list_subtract(&stagelist,cs); 2596: agi = 1; // we put out an AGI 2597: } 2598: } 2599: 2600: // Look through stagelist, and insert any other conflicting instructions 2601: for (l = stagelist; l; l = ln) 2602: { Cinfo *cs; 2603: 2604: ln = list_next(l); 2605: cs = (Cinfo *)list_ptr(l); 2606: if (conflict(cs,ci,0) && // if conflict 2607: !(cs->flags & ci->flags & CIFLpush)) 2608: { 2609: if (cs->spadjust) 2610: { 2611: // We need to insert all previous adjustments to ESP 2612: list_t la,lan; 2613: 2614: for (la = stagelist; la != l; la = lan) 2615: { Cinfo *ca; 2616: 2617: lan = list_next(la); 2618: ca = (Cinfo *)list_ptr(la); 2619: if (ca->spadjust) 2620: { if (!insert(ca)) 2621: goto Lnostage; 2622: list_subtract(&stagelist,ca); 2623: } 2624: } 2625: } 2626: 2627: if (!insert(cs)) 2628: goto Lnostage; 2629: list_subtract(&stagelist,cs); 2630: } 2631: } 2632: 2633: // If floating point opcode, don't stage it, send it right out 2634: if (!agi && ci->flags & CIFLnostage) 2635: { 2636: if (!insert(ci)) 2637: goto Lnostage; 2638: return 1; 2639: } 2640: 2641: list_append(&stagelist,ci); // append to staging list 2642: return 1; 2643: 2644: Lnostage: 2645: return 0; 2646: } 2647: 2648: /******************************************** 2649: * Snip off tail of instruction sequence. 2650: * Returns: 2651: * next instruction (the tail) or 2652: * NULL for no more instructions 2653: */ 2654: 2655: STATIC code * csnip(code *c) 2656: { code **pc; 2657: unsigned iflags; 2658: 2659: if (c) 2660: { iflags = c->Iflags & CFclassinit; 2661: while (1) 2662: { 2663: pc = &code_next(c); 2664: c = *pc; 2665: if (!c) 2666: break; 2667: if (c->Iflags & (CFtarg | CFtarg2)) 2668: break; 2669: if (!(c->Iop == NOP || 2670: c->Iop == (ESCAPE | ESClinnum) || 2671: c->Iflags & iflags)) 2672: break; 2673: } 2674: *pc = NULL; 2675: } 2676: return c; 2677: } 2678: 2679: 2680: /****************************** 2681: * Schedule Pentium instructions, 2682: * based on Steve Russell's algorithm. 2683: */ 2684: 2685: code *schedule(code *c,regm_t scratch) 2686: { 2687: code *cresult = NULL; 2688: code **pctail = &cresult; 2689: Schedule sch; 2690: 2691: sch.initialize(0); // initialize scheduling table 2692: while (c) 2693: { 2694: if ((c->Iop == NOP || (c->Iop & 0xFF) == ESCAPE || c->Iflags & CFclassinit) && 2695: !(c->Iflags & (CFtarg | CFtarg2))) 2696: { code *cn; 2697: 2698: // Just append this instruction to pctail and go to the next one 2699: *pctail = c; 2700: cn = code_next(c); 2701: code_next(c) = NULL; 2702: pctail = &code_next(c); 2703: c = cn; 2704: continue; 2705: } 2706: 2707: //printf("init\n"); 2708: sch.initialize(sch.fpustackused); // initialize scheduling table 2709: 2710: while (c) 2711: { 2712: //printf("insert %p\n",c); 2713: if (!sch.stage(c)) // store c in scheduling table 2714: break; 2715: c = csnip(c); 2716: } 2717: 2718: //printf("assem %d\n",sch.tblmax); 2719: pctail = sch.assemble(pctail); // reassemble instruction stream 2720: } 2721: 2722: return cresult; 2723: } 2724: 2725: /**************************************************************************/ 2726: 2727: /******************************************** 2728: * Replace any occurrence of r1 in EA with r2. 2729: */ 2730: 2731: STATIC void repEA(code *c,unsigned r1,unsigned r2) 2732: { 2733: unsigned mod,reg,rm; 2734: unsigned rmn; 2735: 2736: rmn = c->Irm; 2737: mod = rmn & 0xC0; 2738: reg = rmn & modregrm(0,7,0); 2739: rm = rmn & 7; 2740: 2741: if (mod == 0xC0 && rm == r1) 2742: ; //c->Irm = mod | reg | r2; 2743: else if (is32bitaddr(I32,c->Iflags) && 2744: // If not disp32 2745: (rmn & modregrm(3,0,7)) != modregrm(0,0,5)) 2746: { 2747: if (rm == 4) 2748: { // SIB byte addressing 2749: unsigned sib; 2750: unsigned base; 2751: unsigned index; 2752: 2753: sib = c->Isib; 2754: base = sib & 7; 2755: index = (sib >> 3) & 7; 2756: if (base == r1 && 2757: !(r1 == 5 && mod == 0) && 2758: !(r2 == 5 && mod == 0) 2759: ) 2760: base = r2; 2761: if (index == r1) 2762: index = r2; 2763: c->Isib = (sib & 0xC0) | (index << 3) | base; 2764: } 2765: else if (rm == r1) 2766: { 2767: if (r1 == BP && r2 == SP) 2768: { // Replace [EBP] with [ESP] 2769: c->Irm = mod | reg | 4; 2770: c->Isib = modregrm(0,4,SP); 2771: } 2772: else if (r2 == BP && mod == 0) 2773: { 2774: c->Irm = modregrm(1,0,0) | reg | r2; 2775: c->IFL1 = FLconst; 2776: c->IEV1.Vint = 0; 2777: } 2778: else 2779: c->Irm = mod | reg | r2; 2780: } 2781: } 2782: } 2783: 2784: /****************************************** 2785: * Instruction scheduler. 2786: * Input: 2787: * c list of instructions to schedule 2788: * scratch scratch registers we can use 2789: * Returns: 2790: * revised list of scheduled instructions 2791: */ 2792: 2793: /****************************************** 2794: * Swap c1 and c2. 2795: * c1 comes before c2. 2796: * Swap in place to not disturb addresses of jmp targets 2797: */ 2798: 2799: STATIC void code_swap(code *c1,code *c2) 2800: { code cs; 2801: 2802: // Special case of: 2803: // PUSH reg1 2804: // MOV reg2,x[ESP] 2805: //printf("code_swap(%x, %x)\n",c1,c2); 2806: if ((c1->Iop & ~7) == 0x50 && 2807: c2->Iop == 0x8B && 2808: (c2->Irm & modregrm(3,0,7)) == modregrm(1,0,4) && 2809: c2->Isib == modregrm(0,4,SP) && 2810: c2->IFL1 == FLconst && 2811: ((signed char)c2->IEVpointer1) >= REGSIZE && 2812: (c1->Iop & 7) != ((c2->Irm >> 3) & 7) 2813: ) 2814: c2->IEVpointer1 -= REGSIZE; 2815: 2816: 2817: cs = *c2; 2818: *c2 = *c1; 2819: *c1 = cs; 2820: // Retain original CFtarg 2821: c1->Iflags = (c1->Iflags & ~(CFtarg | CFtarg2)) | (c2->Iflags & (CFtarg | CFtarg2)); 2822: c2->Iflags = (c2->Iflags & ~(CFtarg | CFtarg2)) | (cs.Iflags & (CFtarg | CFtarg2)); 2823: 2824: c1->next = c2->next; 2825: c2->next = cs.next; 2826: } 2827: 2828: code *peephole(code *cstart,regm_t scratch) 2829: { 2830: // Look for cases of: 2831: // MOV r1,r2 2832: // OP ?,r1 2833: // we can replace with: 2834: // MOV r1,r2 2835: // OP ?,r2 2836: // to improve pairing 2837: code *c; 2838: code *c1; 2839: unsigned r1,r2; 2840: unsigned mod,reg,rm; 2841: 2842: //printf("peephole\n"); 2843: for (c = cstart; c; c = c1) 2844: { unsigned char rmi; 2845: unsigned char rmn; 2846: 2847: //c->print(); 2848: c1 = cnext(c); 2849: Ln: 2850: if (!c1) 2851: break; 2852: if (c1->Iflags & (CFtarg | CFtarg2)) 2853: continue; 2854: 2855: // Do: 2856: // PUSH reg 2857: if (I32 && (c->Iop & ~7) == 0x50) 2858: { unsigned reg = c->Iop & 7;
warning C6246: Local declaration of 'reg' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '2840' of 'c:\projects\extern\d\dmd\src\backend\cgsched.c': Lines: 2840
2859: 2860: // MOV [ESP],reg => NOP 2861: if (c1->Iop == 0x8B && 2862: c1->Irm == modregrm(0,reg,4) && 2863: c1->Isib == modregrm(0,4,SP)) 2864: { c1->Iop = NOP; 2865: continue; 2866: } 2867: 2868: // PUSH [ESP] => PUSH reg 2869: if (c1->Iop == 0xFF && 2870: c1->Irm == modregrm(0,6,4) && 2871: c1->Isib == modregrm(0,4,SP)) 2872: { c1->Iop = 0x50 + reg; 2873: continue; 2874: } 2875: 2876: // CMP [ESP],imm => CMP reg,i,, 2877: if (c1->Iop == 0x83 && 2878: c1->Irm == modregrm(0,7,4) && 2879: c1->Isib == modregrm(0,4,SP)) 2880: { c1->Irm = modregrm(3,7,reg); 2881: if (c1->IFL2 == FLconst && (signed char)c1->IEV2.Vuns == 0) 2882: { // to TEST reg,reg 2883: c1->Iop = (c1->Iop & 1) | 0x84; 2884: c1->Irm = modregrm(3,reg,reg); 2885: } 2886: continue; 2887: } 2888: 2889: } 2890: 2891: rmi = c->Irm; 2892: 2893: // Do: 2894: // MOV reg,[ESP] => PUSH reg 2895: // ADD ESP,4 => NOP 2896: if (I32 && c->Iop == 0x8B && (rmi & 0xC7) == modregrm(0,0,4) && 2897: c->Isib == modregrm(0,4,SP) && 2898: c1->Iop == 0x83 && (c1->Irm & 0xC7) == modregrm(3,0,SP) && 2899: !(c1->Iflags & CFpsw) && c1->IFL2 == FLconst && c1->IEV2.Vint == 4) 2900: { unsigned reg = (rmi >> 3) & 7;
warning C6246: Local declaration of 'reg' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '2840' of 'c:\projects\extern\d\dmd\src\backend\cgsched.c': Lines: 2840
2901: c->Iop = 0x58 + reg; 2902: c1->Iop = NOP; 2903: continue; 2904: } 2905: 2906: if ((rmi & 0xC0) != 0xC0) 2907: { 2908: continue; 2909: } 2910: 2911: // Combine two SUBs of the same register 2912: if (c->Iop == c1->Iop && 2913: c->Iop == 0x83 && 2914: (rmi & modregrm(3,0,7)) == (c1->Irm & modregrm(3,0,7)) && 2915: !(c1->Iflags & CFpsw) && 2916: c->IFL2 == FLconst && c1->IFL2 == FLconst 2917: ) 2918: { int i = (signed char)c->IEV2.Vint; 2919: int i1 = (signed char)c1->IEV2.Vint; 2920: switch ((rmi & modregrm(0,7,0)) | ((c1->Irm & modregrm(0,7,0)) >> 3)) 2921: { 2922: case (0 << 3) | 0: // ADD, ADD 2923: case (5 << 3) | 5: // SUB, SUB 2924: i += i1; 2925: goto Laa; 2926: case (0 << 3) | 5: // ADD, SUB 2927: case (5 << 3) | 0: // SUB, ADD 2928: i -= i1; 2929: goto Laa; 2930: Laa: 2931: if ((signed char)i != i) 2932: c->Iop &= ~2; 2933: c->IEV2.Vint = i; 2934: c1->Iop = NOP; 2935: if (i == 0) 2936: c->Iop = NOP; 2937: continue; 2938: } 2939: } 2940: 2941: if (c->Iop == 0x8B) // MOV r1,EA 2942: { r1 = (rmi >> 3) & 7; 2943: r2 = rmi & 7; 2944: } 2945: else if (c->Iop == 0x89) // MOV EA,r2 2946: { r1 = rmi & 7; 2947: r2 = (rmi >> 3) & 7; 2948: } 2949: else 2950: { 2951: continue; 2952: } 2953: 2954: rmn = c1->Irm; 2955: mod = rmn & 0xC0; 2956: reg = rmn & modregrm(0,7,0); 2957: rm = rmn & 7; 2958: if (cod3_EA(c1)) 2959: repEA(c1,r1,r2); 2960: switch (c1->Iop) 2961: { 2962: case 0x50: 2963: case 0x51: 2964: case 0x52: 2965: case 0x53: 2966: case 0x54: 2967: case 0x55: 2968: case 0x56: 2969: case 0x57: // PUSH reg 2970: if ((c1->Iop & 7) == r1) 2971: { c1->Iop = 0x50 | r2; 2972: //printf("schedule PUSH reg\n"); 2973: } 2974: break; 2975: 2976: case 0x81: 2977: case 0x83: 2978: // Look for CMP EA,imm 2979: if (reg == modregrm(0,7,0)) 2980: { 2981: if (mod == 0xC0 && rm == r1) 2982: c1->Irm = mod | reg | r2; 2983: } 2984: break; 2985: 2986: case 0x84: // TEST reg,byte ptr EA 2987: if (r1 >= 4 || r2 >= 4) // if not a byte register 2988: break; 2989: if ((rmn & 0xC0) == 0xC0) 2990: { 2991: if ((rmn & 3) == r1) 2992: { c1->Irm = rmn = (rmn & modregrm(3,7,4)) | r2; 2993: //printf("schedule 1\n"); 2994: } 2995: } 2996: if ((rmn & modregrm(0,3,0)) == modregrm(0,r1,0)) 2997: { c1->Irm = (rmn & modregrm(3,4,7)) | modregrm(0,r2,0); 2998: //printf("schedule 2\n"); 2999: } 3000: break; 3001: case 0x85: // TEST reg,word ptr EA 3002: if ((rmn & 0xC0) == 0xC0) 3003: { 3004: if ((rmn & 7) == r1) 3005: { c1->Irm = rmn = (rmn & modregrm(3,7,0)) | r2; 3006: //printf("schedule 3\n"); 3007: } 3008: } 3009: if ((rmn & modregrm(0,7,0)) == modregrm(0,r1,0)) 3010: { c1->Irm = (rmn & modregrm(3,0,7)) | modregrm(0,r2,0); 3011: //printf("schedule 4\n"); 3012: } 3013: break; 3014: 3015: case 0x89: // MOV EA,reg 3016: if ((rmn & modregrm(0,7,0)) == modregrm(0,r1,0)) 3017: { c1->Irm = (rmn & modregrm(3,0,7)) | modregrm(0,r2,0); 3018: //printf("schedule 5\n"); 3019: if (c1->Irm == modregrm(3,r2,r2)) 3020: goto Lnop; 3021: } 3022: break; 3023: 3024: case 0x8B: // MOV reg,EA 3025: if ((rmn & 0xC0) == 0xC0 && 3026: (rmn & 7) == r1) // if EA == r1 3027: { c1->Irm = (rmn & modregrm(3,7,0)) | r2; 3028: //printf("schedule 6\n"); 3029: if (c1->Irm == modregrm(3,r2,r2)) 3030: goto Lnop; 3031: } 3032: break; 3033: 3034: case 0x3C: // CMP AL,imm8 3035: if (r1 == AX && r2 < 4) 3036: { c1->Iop = 0x80; 3037: c1->Irm = modregrm(3,7,r2); 3038: //printf("schedule 7, r2 = %d\n", r2); 3039: } 3040: break; 3041: 3042: case 0x3D: // CMP AX,imm16 3043: if (r1 == AX) 3044: { c1->Iop = 0x81; 3045: c1->Irm = modregrm(3,7,r2); 3046: if (c1->IFL2 == FLconst && 3047: c1->IEV2.Vuns == (signed char)c1->IEV2.Vuns) 3048: c1->Iop = 0x83; 3049: //printf("schedule 8\n"); 3050: } 3051: break; 3052: } 3053: continue; 3054: Lnop: 3055: c1->Iop = NOP; 3056: c1 = cnext(c1); 3057: goto Ln; 3058: } 3059: L1: ;
warning C4102: 'L1' : unreferenced label
3060: return cstart; 3061: } 3062: 3063: /*****************************************************************/ 3064: 3065: /********************************************** 3066: * Replace complex instructions with simple ones more conducive 3067: * to scheduling. 3068: */ 3069: 3070: code *simpleops(code *c,regm_t scratch) 3071: { code *cstart; 3072: code **pc; 3073: unsigned reg; 3074: code *c2; 3075: 3076: // Worry about using registers not saved yet by prolog 3077: scratch &= ~fregsaved; 3078: 3079: if (!(scratch & (scratch - 1))) // if 0 or 1 registers 3080: return c; 3081: 3082: reg = findreg(scratch); 3083: 3084: cstart = c; 3085: for (pc = &cstart; *pc; pc = &code_next(*pc)) 3086: { 3087: c = *pc; 3088: if (c->Iflags & (CFtarg | CFtarg2 | CFopsize)) 3089: continue; 3090: if (c->Iop == 0x83 && 3091: (c->Irm & modregrm(0,7,0)) == modregrm(0,7,0) && 3092: (c->Irm & modregrm(3,0,0)) != modregrm(3,0,0) 3093: ) 3094: { // Replace CMP mem,imm with: 3095: // MOV reg,mem 3096: // CMP reg,imm 3097: targ_long imm; 3098: 3099: //printf("replacing CMP\n"); 3100: c->Iop = 0x8B; 3101: c->Irm = (c->Irm & modregrm(3,0,7)) | modregrm(0,reg,0); 3102: 3103: c2 = code_calloc(); 3104: if (reg == AX) 3105: c2->Iop = 0x3D; 3106: else 3107: { c2->Iop = 0x83; 3108: c2->Irm = modregrm(3,7,reg); 3109: } 3110: c2->IFL2 = c->IFL2; 3111: c2->IEV2 = c->IEV2; 3112: 3113: // See if c2 should be replaced by a TEST 3114: imm = c2->IEV2.Vuns; 3115: if (!(c2->Iop & 1)) 3116: imm &= 0xFF; 3117: else if (I32 ? c->Iflags & CFopsize : !(c->Iflags & CFopsize)) 3118: imm = (short) imm; 3119: if (imm == 0) 3120: { 3121: c2->Iop = 0x85; // TEST reg,reg 3122: c2->Irm = modregrm(3,reg,reg); 3123: } 3124: goto L1; 3125: } 3126: else if (c->Iop == 0xFF && 3127: (c->Irm & modregrm(0,7,0)) == modregrm(0,6,0) && 3128: (c->Irm & modregrm(3,0,0)) != modregrm(3,0,0) 3129: ) 3130: { // Replace PUSH mem with: 3131: // MOV reg,mem 3132: // PUSH reg 3133: 3134: // printf("replacing PUSH\n"); 3135: c->Iop = 0x8B; 3136: c->Irm = (c->Irm & modregrm(3,0,7)) | modregrm(0,reg,0); 3137: 3138: c2 = gen1(NULL,0x50 + reg); 3139: L1: 3140: //c->print(); 3141: //c2->print(); 3142: c2->next = c->next; 3143: c->next = c2; 3144: 3145: // Switch to another reg 3146: if (scratch & ~mask[reg]) 3147: reg = findreg(scratch & ~mask[reg]); 3148: } 3149: } 3150: return cstart; 3151: } 3152: 3153: #if DEBUG 3154: static const char *fpops[] = {"fstp","fld","fop"}; 3155: void Cinfo::print() 3156: { 3157: Cinfo *ci = this; 3158: 3159: if (ci == NULL) 3160: { 3161: printf("Cinfo 0\n"); 3162: return; 3163: } 3164: 3165: printf("Cinfo %p: c %p, pair %x, sz %d, isz %d, flags - ", 3166: ci,c,pair,sz,isz); 3167: if (ci->flags & CIFLarraybounds) 3168: printf("arraybounds,"); 3169: if (ci->flags & CIFLea) 3170: printf("ea,"); 3171: if (ci->flags & CIFLnostage) 3172: printf("nostage,"); 3173: if (ci->flags & CIFLpush) 3174: printf("push,"); 3175: if (ci->flags & ~(CIFLarraybounds|CIFLnostage|CIFLpush|CIFLea)) 3176: printf("bad flag,"); 3177: printf("\n\tr %lx w %lx a %lx reg %x uops %x sibmodrm %x spadjust %ld\n", 3178: (long)r,(long)w,(long)a,reg,uops,sibmodrm,(long)spadjust); 3179: if (ci->fp_op) 3180: printf("\tfp_op %s, fxch_pre %x, fxch_post %x\n", 3181: fpops[fp_op-1],fxch_pre,fxch_post); 3182: } 3183: #endif 3184: #endif 3185: