1: // Copyright (C) 1995-1998 by Symantec
   2: // Copyright (C) 2000-2009 by Digital Mars
   3: // All Rights Reserved
   4: // http://www.digitalmars.com
   5: // Written by Walter Bright
   6: /*
   7:  * This source file is made available for personal use
   8:  * only. The license is in /dmd/src/dmd/backendlicense.txt
   9:  * or /dm/src/dmd/backendlicense.txt
  10:  * For any other uses, please contact Digital Mars.
  11:  */
  12: 
  13: #if !SPP
  14: 
  15: #include        <stdio.h>
  16: #include        <string.h>
  17: #include        <time.h>
  18: 
  19: #include        "cc.h"
  20: #include        "el.h"
  21: #include        "code.h"
  22: #include        "oper.h"
  23: #include        "global.h"
  24: #include        "type.h"
  25: #include        "exh.h"
  26: #include        "list.h"
  27: 
  28: static char __file__[] = __FILE__;      /* for tassert.h                */
  29: #include        "tassert.h"
  30: 
  31: // If we use Pentium Pro scheduler
  32: #if 0
  33: #define PRO     (config.target_scheduler >= TARGET_PentiumPro)
  34: #else
  35: #define PRO     (config.target_cpu >= TARGET_PentiumPro)
  36: #endif
  37: 
  38: // Struct where we gather information about an instruction
  39: struct Cinfo
  40: {
  41:     code *c;            // the instruction
  42:     unsigned char pair; // pairing information
  43:     unsigned char sz;   // operand size
  44:     unsigned char isz;  // instruction size
  45: 
  46:     // For floating point scheduling
  47:     unsigned char fxch_pre;
  48:     unsigned char fxch_post;
  49:     unsigned char fp_op;
  50:         #define FPfstp  1       // FSTP mem
  51:         #define FPfld   2       // FLD mem
  52:         #define FPfop   3       // Fop ST0,mem or Fop ST0
  53: 
  54:     unsigned char flags;
  55: #define CIFLarraybounds 1       // this instruction is a jmp to array bounds
  56: #define CIFLea          2       // this instruction has a memory-referencing
  57:                                 // modregrm EA byte
  58: #define CIFLnostage     4       // don't stage these instructions
  59: #define CIFLpush        8       // it's a push we can swap around
  60: 
  61:     unsigned r;         // read mask
  62:     unsigned w;         // write mask
  63:     unsigned a;         // registers used in addressing mode
  64:     unsigned char reg;  // reg field of modregrm byte
  65:     unsigned char uops; // Pentium Pro micro-ops
  66:     unsigned sibmodrm;  // (sib << 8) + mod__rm byte
  67:     unsigned spadjust;  // if !=0, then amount ESP changes as a result of this
  68:                         // instruction being executed
  69:     int fpuadjust;      // if !=0, then amount FPU stack changes as a result
  70:                         // of this instruction being executed
  71: #if DEBUG
  72:     void print();       // pretty-printer
  73: #endif
  74: };
  75: 
  76: code *simpleops(code *c,regm_t scratch);
  77: code *schedule(code *c,regm_t scratch);
  78: code *peephole(code *c,regm_t scratch);
  79: 
  80: /*****************************************
  81:  * Do Pentium optimizations.
  82:  * Input:
  83:  *      scratch         scratch registers we can use
  84:  */
  85: 
  86: void cgsched_pentium(code **pc,regm_t scratch)
  87: {
  88:     //printf("scratch = x%02x\n",scratch);
  89:     if (config.target_scheduler >= TARGET_80486)
  90:     {
  91:         if (!I64)
  92:             *pc = peephole(*pc,0);
  93:         if (I32)                        // forget about 16 bit code
  94:         {
  95:             if (config.target_cpu == TARGET_Pentium ||
  96:                 config.target_cpu == TARGET_PentiumMMX)
  97:                 *pc = simpleops(*pc,scratch);
  98:             *pc = schedule(*pc,0);
  99:         }
 100:     }
 101: }
 102: 
 103: #define NP      0       // not pairable
 104: #define PU      1       // pairable in U only, never executed in V
 105: #define PV      2       // pairable in V only
 106: #define UV      (PU|PV) // pairable in both U and V
 107: #define PE      4       // register contention exception
 108: #define PF      8       // flags contention exception
 109: #define FX      0x10    // pairable with FXCH instruction
 110: 
 111: static unsigned char pentcycl[256] =
 112: {
 113:         UV,UV,UV,UV,    UV,UV,NP,NP,    // 0
 114:         UV,UV,UV,UV,    UV,UV,NP,NP,    // 8
 115:         PU,PU,PU,PU,    PU,PU,NP,NP,    // 10
 116:         PU,PU,PU,PU,    PU,PU,NP,NP,    // 18
 117:         UV,UV,UV,UV,    UV,UV,NP,NP,    // 20
 118:         UV,UV,UV,UV,    UV,UV,NP,NP,    // 28
 119:         UV,UV,UV,UV,    UV,UV,NP,NP,    // 30
 120:         UV,UV,UV,UV,    UV,UV,NP,NP,    // 38
 121: 
 122:         UV,UV,UV,UV,    UV,UV,UV,UV,    // 40
 123:         UV,UV,UV,UV,    UV,UV,UV,UV,    // 48
 124:         PE|UV,PE|UV,PE|UV,PE|UV,        PE|UV,PE|UV,PE|UV,PE|UV, // 50  PUSH reg
 125:         PE|UV,PE|UV,PE|UV,PE|UV,        PE|UV,PE|UV,PE|UV,PE|UV, // 58  POP reg
 126:         NP,NP,NP,NP,    NP,NP,NP,NP,    // 60
 127:         PE|UV,NP,PE|UV,NP,      NP,NP,NP,NP,    // 68
 128:         PV|PF,PV|PF,PV|PF,PV|PF,        PV|PF,PV|PF,PV|PF,PV|PF,        // 70   Jcc rel8
 129:         PV|PF,PV|PF,PV|PF,PV|PF,        PV|PF,PV|PF,PV|PF,PV|PF,        // 78   Jcc rel8
 130: 
 131:         NP,NP,NP,NP,    NP,NP,NP,NP,    // 80
 132:         UV,UV,UV,UV,    NP,UV,NP,NP,    // 88
 133:         NP,NP,NP,NP,    NP,NP,NP,NP,    // 90
 134:         NP,NP,NP,NP,    NP,NP,NP,NP,    // 98
 135:         UV,UV,UV,UV,    NP,NP,NP,NP,    // A0
 136:         UV,UV,NP,NP,    NP,NP,NP,NP,    // A8
 137:         UV,UV,UV,UV,    UV,UV,UV,UV,    // B0
 138:         UV,UV,UV,UV,    UV,UV,UV,UV,    // B8
 139: 
 140:         NP,NP,NP,NP,    NP,NP,NP,NP,    // C0
 141:         NP,NP,NP,NP,    NP,NP,NP,NP,    // C8
 142:         PU,PU,NP,NP,    NP,NP,NP,NP,    // D0
 143:         FX,NP,FX,FX,    NP,NP,FX,NP,    // D8   all floating point
 144:         NP,NP,NP,NP,    NP,NP,NP,NP,    // E0
 145:         PE|PV,PV,NP,PV, NP,NP,NP,NP,    // E8
 146:         NP,NP,NP,NP,    NP,NP,NP,NP,    // F0
 147:         NP,NP,NP,NP,    NP,NP,NP,NP,    // F8
 148: };
 149: 
 150: /********************************************
 151:  * For each opcode, determine read [0] and written [1] masks.
 152:  */
 153: 
 154: #define EA      0x100000
 155: #define R       0x200000        // register (reg of modregrm field)
 156: #define N       0x400000        // other things modified, not swappable
 157: #define B       0x800000        // it's a byte operation
 158: #define C       0x1000000       // floating point flags
 159: #define mMEM    0x2000000       // memory
 160: #define S       0x4000000       // floating point stack
 161: #define F       0x8000000       // flags
 162: 
 163: static unsigned oprw[256][2] =
 164: {
 165:         // 00
 166:         EA|R|B, F|EA|B,         // ADD
 167:         EA|R,   F|EA,
 168:         EA|R|B, F|R|B,
 169:         EA|R,   F|R,
 170:         mAX,    F|mAX,
 171:         mAX,    F|mAX,
 172:         N,      N,              // PUSH ES
 173:         N,      N,              // POP  ES
 174: 
 175:         // 08
 176:         EA|R|B, F|EA|B,         // OR
 177:         EA|R,   F|EA,
 178:         EA|R|B, F|R|B,
 179:         EA|R,   F|R,
 180:         mAX,    F|mAX,
 181:         mAX,    F|mAX,
 182:         N,      N,              // PUSH CS
 183:         N,      N,              // 2 byte escape
 184: 
 185:         // 10
 186:         F|EA|R|B,F|EA|B,        // ADC
 187:         F|EA|R, F|EA,
 188:         F|EA|R|B,F|R|B,
 189:         F|EA|R, F|R,
 190:         F|mAX,  F|mAX,
 191:         F|mAX,  F|mAX,
 192:         N,      N,              // PUSH SS
 193:         N,      N,              // POP  SS
 194: 
 195:         // 18
 196:         F|EA|R|B,F|EA|B,        // SBB
 197:         F|EA|R, F|EA,
 198:         F|EA|R|B,F|R|B,
 199:         F|EA|R, F|R,
 200:         F|mAX,  F|mAX,
 201:         F|mAX,  F|mAX,
 202:         N,      N,              // PUSH DS
 203:         N,      N,              // POP  DS
 204: 
 205:         // 20
 206:         EA|R|B, F|EA|B,         // AND
 207:         EA|R,   F|EA,
 208:         EA|R|B, F|R|B,
 209:         EA|R,   F|R,
 210:         mAX,    F|mAX,
 211:         mAX,    F|mAX,
 212:         N,      N,              // SEG ES
 213:         F|mAX,  F|mAX,          // DAA
 214: 
 215:         // 28
 216:         EA|R|B, F|EA|B,         // SUB
 217:         EA|R,   F|EA,
 218:         EA|R|B, F|R|B,
 219:         EA|R,   F|R,
 220:         mAX,    F|mAX,
 221:         mAX,    F|mAX,
 222:         N,      N,              // SEG CS
 223:         F|mAX,  F|mAX,          // DAS
 224: 
 225:         // 30
 226:         EA|R|B, F|EA|B,         // XOR
 227:         EA|R,   F|EA,
 228:         EA|R|B, F|R|B,
 229:         EA|R,   F|R,
 230:         mAX,    F|mAX,
 231:         mAX,    F|mAX,
 232:         N,      N,              // SEG SS
 233:         F|mAX,  F|mAX,          // AAA
 234: 
 235:         // 38
 236:         EA|R|B, F,              // CMP
 237:         EA|R,   F,
 238:         EA|R|B, F,
 239:         EA|R,   F,
 240:         mAX,    F,              // CMP AL,imm8
 241:         mAX,    F,              // CMP EAX,imm16/32
 242:         N,      N,              // SEG DS
 243:         N,      N,              // AAS
 244: 
 245:         // 40
 246:         mAX,    F|mAX,          // INC EAX
 247:         mCX,    F|mCX,
 248:         mDX,    F|mDX,
 249:         mBX,    F|mBX,
 250:         mSP,    F|mSP,
 251:         mBP,    F|mBP,
 252:         mSI,    F|mSI,
 253:         mDI,    F|mDI,
 254: 
 255:         // 48
 256:         mAX,    F|mAX,          // DEC EAX
 257:         mCX,    F|mCX,
 258:         mDX,    F|mDX,
 259:         mBX,    F|mBX,
 260:         mSP,    F|mSP,
 261:         mBP,    F|mBP,
 262:         mSI,    F|mSI,
 263:         mDI,    F|mDI,
 264: 
 265:         // 50
 266:         mAX|mSP,        mSP|mMEM,               // PUSH EAX
 267:         mCX|mSP,        mSP|mMEM,
 268:         mDX|mSP,        mSP|mMEM,
 269:         mBX|mSP,        mSP|mMEM,
 270:         mSP|mSP,        mSP|mMEM,
 271:         mBP|mSP,        mSP|mMEM,
 272:         mSI|mSP,        mSP|mMEM,
 273:         mDI|mSP,        mSP|mMEM,
 274: 
 275:         // 58
 276:         mSP|mMEM,       mAX|mSP,                // POP EAX
 277:         mSP|mMEM,       mCX|mSP,
 278:         mSP|mMEM,       mDX|mSP,
 279:         mSP|mMEM,       mBX|mSP,
 280:         mSP|mMEM,       mSP|mSP,
 281:         mSP|mMEM,       mBP|mSP,
 282:         mSP|mMEM,       mSI|mSP,
 283:         mSP|mMEM,       mDI|mSP,
 284: 
 285:         // 60
 286:         N,      N,              // PUSHA
 287:         N,      N,              // POPA
 288:         N,      N,              // BOUND Gv,Ma
 289:         N,      N,              // ARPL  Ew,Rw
 290:         N,      N,              // SEG FS
 291:         N,      N,              // SEG GS
 292:         N,      N,              // operand size prefix
 293:         N,      N,              // address size prefix
 294: 
 295:         // 68
 296:         mSP,    mSP|mMEM,       // PUSH immed16/32
 297:         EA,     F|R,            // IMUL Gv,Ev,lv
 298:         mSP,    mSP|mMEM,       // PUSH immed8
 299:         EA,     F|R,            // IMUL Gv,Ev,lb
 300:         N,      N,              // INSB Yb,DX
 301:         N,      N,              // INSW/D Yv,DX
 302:         N,      N,              // OUTSB DX,Xb
 303:         N,      N,              // OUTSW/D DX,Xv
 304: 
 305:         // 70
 306:         F|N,    N,
 307:         F|N,    N,
 308:         F|N,    N,
 309:         F|N,    N,
 310:         F|N,    N,
 311:         F|N,    N,
 312:         F|N,    N,
 313:         F|N,    N,
 314: 
 315:         // 78
 316:         F|N,    N,
 317:         F|N,    N,
 318:         F|N,    N,
 319:         F|N,    N,
 320:         F|N,    N,
 321:         F|N,    N,
 322:         F|N,    N,
 323:         F|N,    N,
 324: 
 325:         // 80
 326:         N,      N,
 327:         N,      N,
 328:         N,      N,
 329:         N,      N,
 330:         EA|R,   F,              // TEST EA,r8
 331:         EA|R,   F,              // TEST EA,r16/32
 332:         EA|R,   EA|R,           // XCHG EA,r8
 333:         EA|R,   EA|R,           // XCHG EA,r16/32
 334: 
 335:         // 88
 336:         R|B,    EA|B,           // MOV EA8,r8
 337:         R,      EA,             // MOV EA,r16/32
 338:         EA|B,   R|B,            // MOV r8,EA8
 339:         EA,     R,              // MOV r16/32,EA
 340:         N,      N,              // MOV EA,segreg
 341:         EA,     R,              // LEA r16/32,EA
 342:         N,      N,              // MOV segreg,EA
 343:         mSP|mMEM, EA|mSP,       // POP mem16/32
 344: 
 345:         // 90
 346:         0,              0,              // NOP
 347:         mAX|mCX,        mAX|mCX,
 348:         mAX|mDX,        mAX|mDX,
 349:         mAX|mBX,        mAX|mBX,
 350:         mAX|mSP,        mAX|mSP,
 351:         mAX|mBP,        mAX|mBP,
 352:         mAX|mSI,        mAX|mSI,
 353:         mAX|mDI,        mAX|mDI,
 354: 
 355:         // 98
 356:         mAX,            mAX,            // CBW
 357:         mAX,            mDX,            // CWD
 358:         N,              N|F,            // CALL far ptr
 359:         N,              N,              // WAIT
 360:         F|mSP,          mSP|mMEM,       // PUSHF
 361:         mSP|mMEM,       F|mSP,          // POPF
 362:         mAX,            F,              // SAHF
 363:         F,              mAX,            // LAHF
 364: 
 365:         // A0
 366:         mMEM,           mAX,            // MOV AL,moffs8
 367:         mMEM,           mAX,            // MOV EAX,moffs32
 368:         mAX,            mMEM,           // MOV moffs8,AL
 369:         mAX,            mMEM,           // MOV moffs32,EAX
 370:         N,              N,              // MOVSB
 371:         N,              N,              // MOVSW/D
 372:         N,              N,              // CMPSB
 373:         N,              N,              // CMPSW/D
 374: 
 375:         // A8
 376:         mAX,    F,                      // TEST AL,imm8
 377:         mAX,    F,                      // TEST AX,imm16
 378:         N,      N,                      // STOSB
 379:         N,      N,                      // STOSW/D
 380:         N,      N,                      // LODSB
 381:         N,      N,                      // LODSW/D
 382:         N,      N,                      // SCASB
 383:         N,      N,                      // SCASW/D
 384: 
 385:         // B0
 386:         0,      mAX,                    // MOV AL,imm8
 387:         0,      mCX,
 388:         0,      mDX,
 389:         0,      mBX,
 390:         0,      mAX,
 391:         0,      mCX,
 392:         0,      mDX,
 393:         0,      mBX,
 394: 
 395:         // B8
 396:         0,      mAX,                    // MOV AX,imm16
 397:         0,      mCX,
 398:         0,      mDX,
 399:         0,      mBX,
 400:         0,      mSP,
 401:         0,      mBP,
 402:         0,      mSI,
 403:         0,      mDI,
 404: 
 405:         // C0
 406:         EA,     F|EA,           // Shift Eb,Ib
 407:         EA,     F|EA,
 408:         N,      N,
 409:         N,      N,
 410:         N,      N,
 411:         N,      N,
 412:         0,      EA|B,           // MOV EA8,imm8
 413:         0,      EA,             // MOV EA,imm16
 414: 
 415:         // C8
 416:         N,      N,              // ENTER
 417:         N,      N,              // LEAVE
 418:         N,      N,              // RETF lw
 419:         N,      N,              // RETF
 420:         N,      N,              // INT 3
 421:         N,      N,              // INT lb
 422:         N,      N,              // INTO
 423:         N,      N,              // IRET
 424: 
 425:         // D0
 426:         EA,             F|EA,           // Shift EA,1
 427:         EA,             F|EA,
 428:         EA|mCX,         F|EA,           // Shift EA,CL
 429:         EA|mCX,         F|EA,
 430:         mAX,            F|mAX,          // AAM
 431:         mAX,            F|mAX,          // AAD
 432:         N,              N,              // reserved
 433:         mAX|mBX|mMEM,   mAX,            // XLAT
 434: 
 435:         // D8
 436:         N,      N,
 437:         N,      N,
 438:         N,      N,
 439:         N,      N,
 440:         N,      N,
 441:         N,      N,
 442:         N,      N,
 443:         N,      N,
 444: 
 445:         // E0
 446:         F|mCX|N,mCX|N,          // LOOPNE jb
 447:         F|mCX|N,mCX|N,          // LOOPE  jb
 448:         mCX|N,  mCX|N,          // LOOP   jb
 449:         mCX|N,  N,              // JCXZ   jb
 450:         N,      N,              // IN AL,lb
 451:         N,      N,              // IN EAX,lb
 452:         N,      N,              // OUT lb,AL
 453:         N,      N,              // OUT lb,EAX
 454: 
 455:         // E8
 456:         N,      N|F,            // CALL jv
 457:         N,      N,              // JMP Jv
 458:         N,      N,              // JMP Ab
 459:         N,      N,              // JMP jb
 460:         N|mDX,  N|mAX,          // IN AL,DX
 461:         N|mDX,  N|mAX,          // IN AX,DX
 462:         N|mAX|mDX,N,            // OUT DX,AL
 463:         N|mAX|mDX,N,            // OUT DX,AX
 464: 
 465:         // F0
 466:         N,      N,              // LOCK
 467:         N,      N,              // reserved
 468:         N,      N,              // REPNE
 469:         N,      N,              // REP,REPE
 470:         N,      N,              // HLT
 471:         F,      F,              // CMC
 472:         N,      N,
 473:         N,      N,
 474: 
 475:         // F8
 476:         0,      F,              // CLC
 477:         0,      F,              // STC
 478:         N,      N,              // CLI
 479:         N,      N,              // STI
 480:         N,      N,              // CLD
 481:         N,      N,              // STD
 482:         EA,     F|EA,           // INC/DEC
 483:         N,      N,
 484: };
 485: 
 486: /****************************************
 487:  * Same thing, but for groups.
 488:  */
 489: 
 490: static unsigned grprw[8][8][2] =
 491: {
 492:         // Grp 1
 493:         EA,     F|EA,           // ADD
 494:         EA,     F|EA,           // OR
 495:         F|EA,   F|EA,           // ADC
 496:         F|EA,   F|EA,           // SBB
 497:         EA,     F|EA,           // AND
 498:         EA,     F|EA,           // SUB
 499:         EA,     F|EA,           // XOR
 500:         EA,     F,              // CMP
 501: 
 502:         // Grp 3
 503:         EA,     F,              // TEST EA,imm
 504:         N,      N,              // reserved
 505:         EA,     EA,             // NOT
 506:         EA,     F|EA,           // NEG
 507:         mAX|EA, F|mAX|mDX,      // MUL
 508:         mAX|EA, F|mAX|mDX,      // IMUL
 509:         mAX|mDX|EA,     F|mAX|mDX,      // DIV
 510: #if 0
 511:         // Could generate an exception we want to catch
 512:         mAX|mDX|EA|N,   F|mAX|mDX|N,    // IDIV
 513: #else
 514:         mAX|mDX|EA,     F|mAX|mDX,      // IDIV
 515: #endif
 516: 
 517:         // Grp 5
 518:         EA,     F|EA,           // INC Ev
 519:         EA,     F|EA,           // DEC Ev
 520:         N|EA,   N,              // CALL Ev
 521:         N|EA,   N,              // CALL eP
 522:         N|EA,   N,              // JMP Ev
 523:         N|EA,   N,              // JMP Ep
 524:         mSP|EA, mSP|mMEM,       // PUSH Ev
 525:         N,      N,              // reserved
 526: 
 527:         // Grp 3, byte version
 528:         EA|B,   F,              // TEST EA,imm
 529:         N,      N,              // reserved
 530:         EA|B,   EA|B,           // NOT
 531:         EA|B,   F|EA|B,         // NEG
 532:         mAX|EA, F|mAX,          // MUL
 533:         mAX|EA, F|mAX,          // IMUL
 534:         mAX|EA, F|mAX,          // DIV
 535: #if 0
 536:         // Could generate an exception we want to catch
 537:         mAX|EA|N,       F|mAX|N,        // IDIV
 538: #else
 539:         mAX|EA, F|mAX,          // IDIV
 540: #endif
 541: 
 542: };
 543: 
 544: /********************************************
 545:  * For floating point opcodes 0xD8..0xDF, with Irm < 0xC0.
 546:  *      [][][0] = read
 547:  *          [1] = write
 548:  */
 549: 
 550: static unsigned grpf1[8][8][2] =
 551: {
 552:         // 0xD8
 553:         EA|S,   S|C,    // FADD  float
 554:         EA|S,   S|C,    // FMUL  float
 555:         EA|S,   C,      // FCOM  float
 556:         EA|S,   S|C,    // FCOMP float
 557:         EA|S,   S|C,    // FSUB  float
 558:         EA|S,   S|C,    // FSUBR float
 559:         EA|S,   S|C,    // FDIV  float
 560:         EA|S,   S|C,    // FDIVR float
 561: 
 562:         // 0xD9
 563:         EA,     S|C,    // FLD  float
 564:         N,      N,      //
 565:         S,      EA|C,   // FST  float
 566:         S,      EA|S|C, // FSTP float
 567:         N,      N,      // FLDENV
 568:         N,      N,      // FLDCW
 569:         N,      N,      // FSTENV
 570:         N,      N,      // FSTCW
 571: 
 572:         // 0xDA
 573:         EA|S,   S|C,    // FIADD  long
 574:         EA|S,   S|C,    // FIMUL  long
 575:         EA|S,   C,      // FICOM  long
 576:         EA|S,   S|C,    // FICOMP long
 577:         EA|S,   S|C,    // FISUB  long
 578:         EA|S,   S|C,    // FISUBR long
 579:         EA|S,   S|C,    // FIDIV  long
 580:         EA|S,   S|C,    // FIDIVR long
 581: 
 582:         // 0xDB
 583:         EA,     S|C,    // FILD long
 584:         S,      EA|S|C, // FISTTP int
 585:         S,      EA|C,   // FIST long
 586:         S,      EA|S|C, // FISTP long
 587:         N,      N,      //
 588:         EA,     S|C,    // FLD real80
 589:         N,      N,      //
 590:         S,      EA|S|C, // FSTP real80
 591: 
 592:         // 0xDC
 593:         EA|S,   S|C,    // FADD  double
 594:         EA|S,   S|C,    // FMUL  double
 595:         EA|S,   C,      // FCOM  double
 596:         EA|S,   S|C,    // FCOMP double
 597:         EA|S,   S|C,    // FSUB  double
 598:         EA|S,   S|C,    // FSUBR double
 599:         EA|S,   S|C,    // FDIV  double
 600:         EA|S,   S|C,    // FDIVR double
 601: 
 602:         // 0xDD
 603:         EA,     S|C,    // FLD double
 604:         S,      EA|S|C, // FISTTP long
 605:         S,      EA|C,   // FST double
 606:         S,      EA|S|C, // FSTP double
 607:         N,      N,      // FRSTOR
 608:         N,      N,      //
 609:         N,      N,      // FSAVE
 610:         C,      EA,     // FSTSW
 611: 
 612:         // 0xDE
 613:         EA|S,   S|C,    // FIADD  short
 614:         EA|S,   S|C,    // FIMUL  short
 615:         EA|S,   C,      // FICOM  short
 616:         EA|S,   S|C,    // FICOMP short
 617:         EA|S,   S|C,    // FISUB  short
 618:         EA|S,   S|C,    // FISUBR short
 619:         EA|S,   S|C,    // FIDIV  short
 620:         EA|S,   S|C,    // FIDIVR short
 621: 
 622:         // 0xDF
 623:         EA,     S|C,    // FILD short
 624:         S,      EA|S|C, // FISTTP short
 625:         S,      EA|C,   // FIST short
 626:         S,      EA|S|C, // FISTP short
 627:         EA,     S|C,    // FBLD packed BCD
 628:         EA,     S|C,    // FILD long long
 629:         S,      EA|S|C, // FBSTP packed BCD
 630:         S,      EA|S|C, // FISTP long long
 631: };
 632: 
 633: 
 634: /********************************************
 635:  * Micro-ops for floating point opcodes 0xD8..0xDF, with Irm < 0xC0.
 636:  */
 637: 
 638: static unsigned char uopsgrpf1[8][8] =
 639: {
 640:         // 0xD8
 641:         2,              // FADD  float
 642:         2,              // FMUL  float
 643:         2,              // FCOM  float
 644:         2,              // FCOMP float
 645:         2,              // FSUB  float
 646:         2,              // FSUBR float
 647:         2,              // FDIV  float
 648:         2,              // FDIVR float
 649: 
 650:         // 0xD9
 651:         1,              // FLD  float
 652:         0,              //
 653:         2,              // FST  float
 654:         2,              // FSTP float
 655:         5,              // FLDENV
 656:         3,              // FLDCW
 657:         5,              // FSTENV
 658:         5,              // FSTCW
 659: 
 660:         // 0xDA
 661:         5,              // FIADD  long
 662:         5,              // FIMUL  long
 663:         5,              // FICOM  long
 664:         5,              // FICOMP long
 665:         5,              // FISUB  long
 666:         5,              // FISUBR long
 667:         5,              // FIDIV  long
 668:         5,              // FIDIVR long
 669: 
 670:         // 0xDB
 671:         4,              // FILD long
 672:         0,              //
 673:         4,              // FIST long
 674:         4,              // FISTP long
 675:         0,              //
 676:         4,              // FLD real80
 677:         0,              //
 678:         5,              // FSTP real80
 679: 
 680:         // 0xDC
 681:         2,              // FADD  double
 682:         2,              // FMUL  double
 683:         2,              // FCOM  double
 684:         2,              // FCOMP double
 685:         2,              // FSUB  double
 686:         2,              // FSUBR double
 687:         2,              // FDIV  double
 688:         2,              // FDIVR double
 689: 
 690:         // 0xDD
 691:         1,              // FLD double
 692:         0,              //
 693:         2,              // FST double
 694:         2,              // FSTP double
 695:         5,              // FRSTOR
 696:         0,              //
 697:         5,              // FSAVE
 698:         5,              // FSTSW
 699: 
 700:         // 0xDE
 701:         5,              // FIADD  short
 702:         5,              // FIMUL  short
 703:         5,              // FICOM  short
 704:         5,              // FICOMP short
 705:         5,              // FISUB  short
 706:         5,              // FISUBR short
 707:         5,              // FIDIV  short
 708:         5,              // FIDIVR short
 709: 
 710:         // 0xDF
 711:         4,              // FILD short
 712:         0,              //
 713:         4,              // FIST short
 714:         4,              // FISTP short
 715:         5,              // FBLD packed BCD
 716:         4,              // FILD long long
 717:         5,              // FBSTP packed BCD
 718:         4,              // FISTP long long
 719: };
 720: 
 721: /**************************************************
 722:  * Determine number of micro-ops for Pentium Pro and Pentium II processors.
 723:  * 0 means special case,
 724:  * 5 means 'complex'
 725:  */
 726: 
 727: static const unsigned char insuops[256] =
 728: {       0,0,0,0,        1,1,4,5,                /* 00 */
 729:         0,0,0,0,        1,1,4,0,                /* 08 */
 730:         0,0,0,0,        2,2,4,5,                /* 10 */
 731:         0,0,0,0,        2,2,4,5,                /* 18 */
 732:         0,0,0,0,        1,1,0,1,                /* 20 */
 733:         0,0,0,0,        1,1,0,1,                /* 28 */
 734:         0,0,0,0,        1,1,0,1,                /* 30 */
 735:         0,0,0,0,        1,1,0,1,                /* 38 */
 736:         1,1,1,1,        1,1,1,1,                /* 40 */
 737:         1,1,1,1,        1,1,1,1,                /* 48 */
 738:         3,3,3,3,        3,3,3,3,                /* 50 */
 739:         2,2,2,2,        3,2,2,2,                /* 58 */
 740:         5,5,5,5,        0,0,0,0,                /* 60 */
 741:         3,3,0,0,        5,5,5,5,                /* 68 */
 742:         1,1,1,1,        1,1,1,1,                /* 70 */
 743:         1,1,1,1,        1,1,1,1,                /* 78 */
 744:         0,0,0,0,        0,0,0,0,                /* 80 */
 745:         0,0,0,0,        0,1,4,0,                /* 88 */
 746:         1,3,3,3,        3,3,3,3,                /* 90 */
 747:         1,1,5,0,        5,5,1,1,                /* 98 */
 748:         1,1,2,2,        5,5,5,5,                /* A0 */
 749:         1,1,3,3,        2,2,3,3,                /* A8 */
 750:         1,1,1,1,        1,1,1,1,                /* B0 */
 751:         1,1,1,1,        1,1,1,1,                /* B8 */
 752:         0,0,5,4,        0,0,0,0,                /* C0 */
 753:         5,3,5,5,        5,3,5,5,                /* C8 */
 754:         0,0,0,0,        4,3,0,2,                /* D0 */
 755:         0,0,0,0,        0,0,0,0,                /* D8 */
 756:         4,4,4,2,        5,5,5,5,                /* E0 */
 757:         4,1,5,1,        5,5,5,5,                /* E8 */
 758:         0,0,5,5,        5,1,0,0,                /* F0 */
 759:         1,1,5,5,        4,4,0,0,                /* F8 */
 760: };
 761: 
 762: static unsigned char uopsx[8] = { 1,1,2,5,1,1,1,5 };
 763: 
 764: /************************************************
 765:  * Determine number of micro-ops for Pentium Pro and Pentium II processors.
 766:  * 5 means 'complex'.
 767:  * Doesn't currently handle:
 768:  *      floating point
 769:  *      MMX
 770:  *      0F opcodes
 771:  *      prefix bytes
 772:  */
 773: 
 774: STATIC int uops(code *c)
 775: {   int n;
 776:     int op;
 777:     int op2;
 778: 
 779:     op = c->Iop & 0xFF;
 780:     if ((c->Iop & 0xFF00) == 0x0F00)
 781:         op = 0x0F;
 782:     n = insuops[op];
 783:     if (!n)                             // if special case
 784:     {   unsigned char irm,mod,reg,rm;
 785: 
 786:         irm = c->Irm;
 787:         mod = (irm >> 6) & 3;
 788:         reg = (irm >> 3) & 7;
 789:         rm = irm & 7;
 790: 
 791:         switch (op)
 792:         {
 793:             case 0x10:
 794:             case 0x11:                  // ADC rm,r
 795:             case 0x18:
 796:             case 0x19:                  // SBB rm,r
 797:                 n = (mod == 3) ? 2 : 4;
 798:                 break;
 799: 
 800:             case 0x12:
 801:             case 0x13:                  // ADC r,rm
 802:             case 0x1A:
 803:             case 0x1B:                  // SBB r,rm
 804:                 n = (mod == 3) ? 2 : 3;
 805:                 break;
 806: 
 807:             case 0x00:
 808:             case 0x01:                  // ADD rm,r
 809:             case 0x08:
 810:             case 0x09:                  // OR rm,r
 811:             case 0x20:
 812:             case 0x21:                  // AND rm,r
 813:             case 0x28:
 814:             case 0x29:                  // SUB rm,r
 815:             case 0x30:
 816:             case 0x31:                  // XOR rm,r
 817:                 n = (mod == 3) ? 1 : 4;
 818:                 break;
 819: 
 820:             case 0x02:
 821:             case 0x03:                  // ADD r,rm
 822:             case 0x0A:
 823:             case 0x0B:                  // OR r,rm
 824:             case 0x22:
 825:             case 0x23:                  // AND r,rm
 826:             case 0x2A:
 827:             case 0x2B:                  // SUB r,rm
 828:             case 0x32:
 829:             case 0x33:                  // XOR r,rm
 830:             case 0x38:
 831:             case 0x39:                  // CMP rm,r
 832:             case 0x3A:
 833:             case 0x3B:                  // CMP r,rm
 834:             case 0x69:                  // IMUL rm,r,imm
 835:             case 0x6B:                  // IMUL rm,r,imm8
 836:             case 0x84:
 837:             case 0x85:                  // TEST rm,r
 838:                 n = (mod == 3) ? 1 : 2;
 839:                 break;
 840: 
 841:             case 0x80:
 842:             case 0x81:
 843:             case 0x82:
 844:             case 0x83:
 845:                 if (reg == 2 || reg == 3)       // ADC/SBB rm,imm
 846:                     n = (mod == 3) ? 2 : 4;
 847:                 else if (reg == 7)              // CMP rm,imm
 848:                     n = (mod == 3) ? 1 : 2;
 849:                 else
 850:                     n = (mod == 3) ? 1 : 4;
 851:                 break;
 852: 
 853:             case 0x86:
 854:             case 0x87:                          // XCHG rm,r
 855:                 n = (mod == 3) ? 3 : 5;
 856:                 break;
 857: 
 858:             case 0x88:
 859:             case 0x89:                          // MOV rm,r
 860:                 n = (mod == 3) ? 1 : 2;
 861:                 break;
 862: 
 863:             case 0x8A:
 864:             case 0x8B:                          // MOV r,rm
 865:                 n = 1;
 866:                 break;
 867: 
 868:             case 0x8C:                          // MOV Sreg,rm
 869:                 n = (mod == 3) ? 1 : 3;
 870:                 break;
 871: 
 872:             case 0x8F:
 873:                 if (reg == 0)                   // POP m
 874:                     n = 5;
 875:                 break;
 876: 
 877:             case 0xC6:
 878:             case 0xC7:
 879:                 if (reg == 0)                   // MOV rm,imm
 880:                     n = (mod == 3) ? 1 : 2;
 881:                 break;
 882: 
 883:             case 0xD0:
 884:             case 0xD1:
 885:                 if (reg == 2 || reg == 3)       // RCL/RCR rm,1
 886:                     n = (mod == 3) ? 2 : 4;
 887:                 else
 888:                     n = (mod == 3) ? 1 : 4;
 889:                 break;
 890: 
 891:             case 0xC0:
 892:             case 0xC1:                          // RCL/RCR rm,imm8
 893:             case 0xD2:
 894:             case 0xD3:
 895:                 if (reg == 2 || reg == 3)       // RCL/RCR rm,CL
 896:                     n = 5;
 897:                 else
 898:                     n = (mod == 3) ? 1 : 4;
 899:                 break;
 900: 
 901:             case 0xD8:
 902:             case 0xD9:
 903:             case 0xDA:
 904:             case 0xDB:
 905:             case 0xDC:
 906:             case 0xDD:
 907:             case 0xDE:
 908:             case 0xDF:
 909:                 // Floating point opcodes
 910:                 if (irm < 0xC0)
 911:                 {   n = uopsgrpf1[op - 0xD8][reg];
 912:                     break;
 913:                 }
 914:                 n = uopsx[op - 0xD8];
 915:                 switch (op)
 916:                 {
 917:                     case 0xD9:
 918:                         switch (irm)
 919:                         {
 920:                             case 0xE0:          // FCHS
 921:                                 n = 3;
 922:                                 break;
 923:                             case 0xE8:
 924:                             case 0xE9:
 925:                             case 0xEA:
 926:                             case 0xEB:
 927:                             case 0xEC:
 928:                             case 0xED:
 929:                                 n = 2;
 930:                                 break;
 931:                             case 0xF0:
 932:                             case 0xF1:
 933:                             case 0xF2:
 934:                             case 0xF3:
 935:                             case 0xF4:
 936:                             case 0xF5:
 937:                             case 0xF8:
 938:                             case 0xF9:
 939:                             case 0xFB:
 940:                             case 0xFC:
 941:                             case 0xFD:
 942:                             case 0xFE:
 943:                             case 0xFF:
 944:                                 n = 5;
 945:                                 break;
 946:                         }
 947:                         break;
 948:                     case 0xDE:
 949:                         if (irm == 0xD9)        // FCOMPP
 950:                             n = 2;
 951:                         break;
 952:                 }
 953:                 break;
 954: 
 955:             case 0xF6:
 956:                 if (reg == 6 || reg == 7)       // DIV AL,rm8
 957:                     n = (mod == 3) ? 3 : 4;
 958:                 else if (reg == 4 || reg == 5 || reg == 0)      // MUL/IMUL/TEST rm8
 959:                     n = (mod == 3) ? 1 : 2;
 960:                 else if (reg == 2 || reg == 3)  // NOT/NEG rm
 961:                     n = (mod == 3) ? 1 : 4;
 962:                 break;
 963: 
 964:             case 0xF7:
 965:                 if (reg == 6 || reg == 7)       // DIV EAX,rm
 966:                     n = 4;
 967:                 else if (reg == 4 || reg == 5)  // MUL/IMUL rm
 968:                     n = (mod == 3) ? 3 : 4;
 969:                 else if (reg == 2 || reg == 3)  // NOT/NEG rm
 970:                     n = (mod == 3) ? 1 : 4;
 971:                 break;
 972: 
 973:             case 0xFF:
 974:                 if (reg == 2 || reg == 3 ||     // CALL rm, CALL m,rm
 975:                     reg == 5)                   // JMP seg:offset
 976:                     n = 5;
 977:                 else if (reg == 4)
 978:                     n = (mod == 3) ? 1 : 2;
 979:                 else if (reg == 0 || reg == 1)  // INC/DEC rm
 980:                     n = (mod == 3) ? 1 : 4;
 981:                 else if (reg == 6)              // PUSH rm
 982:                     n = (mod == 3) ? 3 : 4;
 983:                 break;
 984: 
 985:             case 0x0F:
 986:                 op2 = c->Iop & 0xFF;
 987:                 if ((op2 & 0xF0) == 0x80)       // Jcc
 988:                 {   n = 1;
 989:                     break;
 990:                 }
 991:                 if ((op2 & 0xF0) == 0x90)       // SETcc
 992:                 {   n = (mod == 3) ? 1 : 3;
 993:                     break;
 994:                 }
 995:                 if (op2 == 0xB6 || op2 == 0xB7 ||       // MOVZX
 996:                     op2 == 0xBE || op2 == 0xBF)         // MOVSX
 997:                 {   n = 1;
 998:                     break;
 999:                 }
1000:                 if (op2 == 0xAF)                        // IMUL r,m
1001:                 {   n = (mod == 3) ? 1 : 2;
1002:                     break;
1003:                 }
1004:                 break;
1005:         }
1006:     }
1007:     if (n == 0)
1008:         n = 5;                                  // copout for now
1009:     return n;
1010: }
1011: 
1012: /******************************************
1013:  * Determine pairing classification.
1014:  * Don't deal with floating point, just assume they are all NP (Not Pairable).
1015:  * Returns:
1016:  *      NP,UV,PU,PV optionally OR'd with PE
1017:  */
1018: 
1019: STATIC int pair_class(code *c)
1020: {   unsigned char op;
1021:     unsigned char irm,mod,reg,rm;
1022:     unsigned a32;
1023:     int pc;
1024: 
1025:     // Of course, with Intel this is *never* simple, and Intel's
1026:     // documentation is vague about the specifics.
1027: 
1028:     op = c->Iop & 0xFF;
1029:     if ((c->Iop & 0xFF00) == 0x0F00)
1030:         op = 0x0F;
1031:     pc = pentcycl[op];
1032:     a32 = I32;
1033:     if (c->Iflags & CFaddrsize)
1034:         a32 ^= 1;
1035:     irm = c->Irm;
1036:     mod = (irm >> 6) & 3;
1037:     reg = (irm >> 3) & 7;
1038:     rm = irm & 7;
1039:     switch (op)
1040:     {
1041:         case 0x0F:                              // 2 byte opcode
1042:             if ((c->Iop & 0xF0) == 0x80)        // if Jcc
1043:                 pc = PV | PF;
1044:             break;
1045: 
1046:         case 0x80:
1047:         case 0x81:
1048:         case 0x83:
1049:             if (reg == 2 ||                     // ADC EA,immed
1050:                 reg == 3)                       // SBB EA,immed
1051:             {   pc = PU;
1052:                 goto L2;
1053:             }
1054:             goto L1;                            // AND/OR/XOR/ADD/SUB/CMP EA,immed
1055: 
1056:         case 0x84:
1057:         case 0x85:                              // TEST EA,reg
1058:             if (mod == 3)                       // TEST reg,reg
1059:                 pc = UV;
1060:             break;
1061: 
1062:         case 0xC0:
1063:         case 0xC1:
1064:             if (reg >= 4)
1065:                 pc = PU;
1066:             break;
1067: 
1068:         case 0xC6:
1069:         case 0xC7:
1070:             if (reg == 0)                       // MOV EA,immed
1071:             {
1072:         L1:
1073:                 pc = UV;
1074:         L2:
1075:                 // if EA contains a displacement then
1076:                 // can't execute in V, or pair in U
1077:                 switch (mod)
1078:                 {   case 0:
1079:                         if (a32)
1080:                         {   if (rm == 5 ||
1081:                                 (rm == 4 && (c->Isib & 7) == 5)
1082:                                )
1083:                                 pc = NP;
1084:                         }
1085:                         else if (rm == 6)
1086:                             pc = NP;
1087:                         break;
1088:                     case 1:
1089:                     case 2:
1090:                         pc = NP;
1091:                         break;
1092:                 }
1093:             }
1094:             break;
1095: 
1096:         case 0xD9:
1097:             if (irm < 0xC0)
1098:             {
1099:                 if (reg == 0)
1100:                     pc = FX;
1101:             }
1102:             else if (irm < 0xC8)
1103:                 pc = FX;
1104:             else if (irm < 0xD0)
1105:                 pc = PV;
1106:             else
1107:             {
1108:                 switch (irm)
1109:                 {
1110:                     case 0xE0:
1111:                     case 0xE1:
1112:                     case 0xE4:
1113:                         pc = FX;
1114:                         break;
1115:                 }
1116:             }
1117:             break;
1118: 
1119:         case 0xDB:
1120:             if (irm < 0xC0 && (reg == 0 || reg == 5))
1121:                 pc = FX;
1122:             break;
1123: 
1124:         case 0xDD:
1125:             if (irm < 0xC0)
1126:             {
1127:                 if (reg == 0)
1128:                     pc = FX;
1129:             }
1130:             else if (irm >= 0xE0 && irm < 0xF0)
1131:                 pc = FX;
1132:             break;
1133: 
1134:         case 0xDF:
1135:             if (irm < 0xC0 && (reg == 0 || reg == 5))
1136:                 pc = FX;
1137:             break;
1138: 
1139:         case 0xFE:
1140:             if (reg == 0 || reg == 1)           // INC/DEC EA
1141:                 pc = UV;
1142:             break;
1143:         case 0xFF:
1144:             if (reg == 0 || reg == 1)           // INC/DEC EA
1145:                 pc = UV;
1146:             else if (reg == 2 || reg == 4)      // CALL/JMP near ptr EA
1147:                 pc = PE|PV;
1148:             else if (reg == 6 && mod == 3)      // PUSH reg
1149:                 pc = PE | UV;
1150:             break;
1151:     }
1152:     if (c->Iflags & CFPREFIX && pc == UV)       // if prefix byte
1153:         pc = PU;
1154:     return pc;
1155: }
1156: 
1157: /******************************************
1158:  * For an instruction, determine what is read
1159:  * and what is written, and what is used for addressing.
1160:  * Determine operand size if EA (larger is ok).
1161:  */
1162: 
1163: STATIC void getinfo(Cinfo *ci,code *c)
1164: {
1165:     memset(ci,0,sizeof(Cinfo));
1166:     if (!c)
1167:         return;
1168:     ci->c = c;
1169: 
1170:     if (PRO)
1171:     {
1172:         ci->uops = uops(c);
1173:         ci->isz = calccodsize(c);
1174:     }
1175:     else
1176:         ci->pair = pair_class(c);
1177: 
1178:     unsigned char op;
1179:     unsigned char op2;
1180:     unsigned char irm,mod,reg,rm;
1181:     unsigned a32;
1182:     int pc;
1183:     unsigned r,w;
1184:     int sz = I32 ? 4 : 2;
1185: 
1186:     ci->r = 0;
1187:     ci->w = 0;
1188:     ci->a = 0;
1189:     op = c->Iop & 0xFF;
1190:     if ((c->Iop & 0xFF00) == 0x0F00)
1191:         op = 0x0F;
1192:     //printf("\tgetinfo %x, op %x \n",c,op);
1193:     pc = pentcycl[op];
1194:     a32 = I32;
1195:     if (c->Iflags & CFaddrsize)
1196:         a32 ^= 1;
1197:     if (c->Iflags & CFopsize)
1198:         sz ^= 2 | 4;
1199:     irm = c->Irm;
1200:     mod = (irm >> 6) & 3;
1201:     reg = (irm >> 3) & 7;
1202:     rm = irm & 7;
1203: 
1204:     r = oprw[op][0];
1205:     w = oprw[op][1];
1206: 
1207:     switch (op)
1208:     {
1209:         case 0x50:
1210:         case 0x51:
1211:         case 0x52:
1212:         case 0x53:
1213:         case 0x55:
1214:         case 0x56:
1215:         case 0x57:                              // PUSH reg
1216:             ci->flags |= CIFLpush;
1217:         case 0x54:                              // PUSH ESP
1218:         case 0x6A:                              // PUSH imm8
1219:         case 0x68:                              // PUSH imm
1220:         case 0x0E:
1221:         case 0x16:
1222:         case 0x1E:
1223:         case 0x06:
1224:         case 0x9C:
1225:         Lpush:
1226:             ci->spadjust = -sz;
1227:             ci->a |= mSP;
1228:             break;
1229: 
1230:         case 0x58:
1231:         case 0x59:
1232:         case 0x5A:
1233:         case 0x5B:
1234:         case 0x5C:
1235:         case 0x5D:
1236:         case 0x5E:
1237:         case 0x5F:                              // POP reg
1238:         case 0x1F:
1239:         case 0x07:
1240:         case 0x17:
1241:         case 0x9D:                              // POPF
1242:         Lpop:
1243:             ci->spadjust = sz;
1244:             ci->a |= mSP;
1245:             break;
1246: 
1247:         case 0x80:
1248:             if (reg == 7)                       // CMP
1249:                 c->Iflags |= CFpsw;
1250:             r = B | grprw[0][reg][0];           // Grp 1 (byte)
1251:             w = B | grprw[0][reg][1];
1252:             break;
1253: 
1254:         case 0x81:
1255:         case 0x83:
1256:             if (reg == 7)                       // CMP
1257:                 c->Iflags |= CFpsw;
1258:             else if (irm == modregrm(3,0,SP))   // ADD ESP,imm
1259:             {
1260:                 assert(c->IFL2 == FLconst);
1261:                 ci->spadjust = (op == 0x81) ? c->IEV2.Vint : (signed char)c->IEV2.Vint;
1262:             }
1263:             else if (irm == modregrm(3,5,SP))   // SUB ESP,imm
1264:             {
1265:                 assert(c->IFL2 == FLconst);
1266:                 ci->spadjust = (op == 0x81) ? -c->IEV2.Vint : -(signed char)c->IEV2.Vint;
1267:             }
1268:             r = grprw[0][reg][0];               // Grp 1
1269:             w = grprw[0][reg][1];
1270:             break;
1271: 
1272:         case 0x8F:
1273:             if (reg == 0)                       // POP rm
1274:                 goto Lpop;
1275:             break;
1276: 
1277:         case 0xA0:
1278:         case 0xA1:
1279:         case 0xA2:
1280:         case 0xA3:
1281:             // Fake having an EA to simplify code in conflict()
1282:             ci->flags |= CIFLea;
1283:             ci->reg = 0;
1284:             ci->sibmodrm = a32 ? modregrm(0,0,5) : modregrm(0,0,6);
1285:             c->IFL1 = c->IFL2;
1286:             c->IEV1 = c->IEV2;
1287:             break;
1288: 
1289:         case 0xC2:
1290:         case 0xC3:
1291:         case 0xCA:
1292:         case 0xCB:                              // RET
1293:             ci->a |= mSP;
1294:             break;
1295: 
1296:         case 0xE8:
1297:             if (c->Iflags & CFclassinit)        // call to __j_classinit
1298:             {   r = 0;
1299:                 w = F;
1300: #if CLASSINIT2
1301:                 ci->pair = UV;                  // it is patched to CMP EAX,0
1302: #else
1303:                 ci->pair = NP;
1304: #endif
1305:             }
1306:             break;
1307: 
1308:         case 0xF6:
1309:             r = grprw[3][reg][0];               // Grp 3, byte version
1310:             w = grprw[3][reg][1];
1311:             break;
1312: 
1313:         case 0xF7:
1314:             r = grprw[1][reg][0];               // Grp 3
1315:             w = grprw[1][reg][1];
1316:             break;
1317: 
1318:         case 0x0F:
1319:             op2 = c->Iop & 0xFF;
1320:             if ((op2 & 0xF0) == 0x80)           // if Jxx instructions
1321:             {
1322:                 ci->r = F | N;
1323:                 ci->w = N;
1324:                 goto Lret;
1325:             }
1326:             ci->r = N;
1327:             ci->w = N;          // copout for now
1328:             goto Lret;
1329: 
1330:         case 0xD7:                              // XLAT
1331:             ci->a = mAX | mBX;
1332:             break;
1333: 
1334:         case 0xFF:
1335:             r = grprw[2][reg][0];               // Grp 5
1336:             w = grprw[2][reg][1];
1337:             if (reg == 6)                       // PUSH rm
1338:                 goto Lpush;
1339:             break;
1340: 
1341:         case 0x38:
1342:         case 0x39:
1343:         case 0x3A:
1344:         case 0x3B:
1345:         case 0x3C:                              // CMP AL,imm8
1346:         case 0x3D:                              // CMP EAX,imm32
1347:             // For CMP opcodes, always test for flags
1348:             c->Iflags |= CFpsw;
1349:             break;
1350: 
1351:         case 0xD0:
1352:         case 0xD1:
1353:         case 0xD2:
1354:         case 0xD3:
1355:         case 0xC0:
1356:         case 0xC1:
1357:             if (reg == 2 || reg == 3)           // if RCL or RCR
1358:                 c->Iflags |= CFpsw;             // always test for flags
1359:             break;
1360: 
1361:         case 0xD8:
1362:         case 0xD9:
1363:         case 0xDA:
1364:         case 0xDB:
1365:         case 0xDC:
1366:         case 0xDD:
1367:         case 0xDE:
1368:         case 0xDF:
1369:             if (irm < 0xC0)
1370:             {   r = grpf1[op - 0xD8][reg][0];
1371:                 w = grpf1[op - 0xD8][reg][1];
1372:                 switch (op)
1373:                 {
1374:                     case 0xD8:
1375:                         if (reg == 3)           // if FCOMP
1376:                             ci->fpuadjust = -1;
1377:                         else
1378:                             ci->fp_op = FPfop;
1379:                         break;
1380: 
1381:                     case 0xD9:
1382:                         if (reg == 0)           // if FLD float
1383:                         {   ci->fpuadjust = 1;
1384:                             ci->fp_op = FPfld;
1385:                         }
1386:                         else if (reg == 3)      // if FSTP float
1387:                         {   ci->fpuadjust = -1;
1388:                             ci->fp_op = FPfstp;
1389:                         }
1390:                         else if (reg == 5 || reg == 7)
1391:                             sz = 2;
1392:                         else if (reg == 4 || reg == 6)
1393:                             sz = 28;
1394:                         break;
1395:                     case 0xDA:
1396:                         if (reg == 3)           // if FICOMP
1397:                             ci->fpuadjust = -1;
1398:                         break;
1399:                     case 0xDB:
1400:                         if (reg == 0 || reg == 5)
1401:                         {   ci->fpuadjust = 1;
1402:                             ci->fp_op = FPfld;  // FILD / FLD long double
1403:                         }
1404:                         if (reg == 3 || reg == 7)
1405:                             ci->fpuadjust = -1;
1406:                         if (reg == 7)
1407:                             ci->fp_op = FPfstp; // FSTP long double
1408:                         if (reg == 5 || reg == 7)
1409:                             sz = 10;
1410:                         break;
1411:                     case 0xDC:
1412:                         sz = 8;
1413:                         if (reg == 3)           // if FCOMP
1414:                             ci->fpuadjust = -1;
1415:                         else
1416:                             ci->fp_op = FPfop;
1417:                         break;
1418:                     case 0xDD:
1419:                         if (reg == 0)           // if FLD double
1420:                         {   ci->fpuadjust = 1;
1421:                             ci->fp_op = FPfld;
1422:                         }
1423:                         if (reg == 3)           // if FSTP double
1424:                         {   ci->fpuadjust = -1;
1425:                             ci->fp_op = FPfstp;
1426:                         }
1427:                         if (reg == 7)
1428:                             sz = 2;
1429:                         else if (reg == 4 || reg == 6)
1430:                             sz = 108;
1431:                         else
1432:                             sz = 8;
1433:                         break;
1434:                     case 0xDE:
1435:                         sz = 2;
1436:                         if (reg == 3)           // if FICOMP
1437:                             ci->fpuadjust = -1;
1438:                         break;
1439:                     case 0xDF:
1440:                         sz = 2;
1441:                         if (reg == 4 || reg == 6)
1442:                             sz = 10;
1443:                         else if (reg == 5 || reg == 7)
1444:                             sz = 8;
1445:                         if (reg == 0 || reg == 4 || reg == 5)
1446:                             ci->fpuadjust = 1;
1447:                         else if (reg == 3 || reg == 6 || reg == 7)
1448:                             ci->fpuadjust = -1;
1449:                         break;
1450:                 }
1451:                 break;
1452:             }
1453:             else if (op == 0xDE)
1454:             {   ci->fpuadjust = -1;             // pop versions of Fop's
1455:                 if (irm == 0xD9)
1456:                     ci->fpuadjust = -2;         // FCOMPP
1457:             }
1458: 
1459:             // Most floating point opcodes aren't staged, but are
1460:             // sent right through, in order to make use of the large
1461:             // latencies with floating point instructions.
1462:             if (ci->fp_op == FPfld ||
1463:                 (op == 0xD9 && (irm & 0xF8) == 0xC0))
1464:                 ;                               // FLD ST(i)
1465:             else
1466:                 ci->flags |= CIFLnostage;
1467: 
1468:             switch (op)
1469:             {
1470:                 case 0xD8:
1471:                     r = S;
1472:                     w = C;
1473:                     if ((irm & ~7) == 0xD0)
1474:                         w |= S;
1475:                     break;
1476:                 case 0xD9:
1477:                     // FCHS or FABS or FSQRT
1478:                     if (irm == 0xE0 || irm == 0xE1 || irm == 0xFA)
1479:                         ci->fp_op = FPfop;
1480:                     r = S;
1481:                     w = S|C;
1482:                     break;
1483:                 case 0xDA:
1484:                     if (irm == 0xE9)    // FUCOMPP
1485:                     {   r = S;
1486:                         w = S|C;
1487:                         break;
1488:                     }
1489:                     break;
1490:                 case 0xDB:
1491:                     if (irm == 0xE2)    // FCLEX
1492:                     {   r = 0;
1493:                         w = C;
1494:                         break;
1495:                     }
1496:                     if (irm == 0xE3)    // FINIT
1497:                     {   r = 0;
1498:                         w = S|C;
1499:                         break;
1500:                     }
1501:                     break;
1502:                 case 0xDC:
1503:                 case 0xDE:
1504:                     if ((irm & 0xF0) != 0xD0)
1505:                     {   r = S;
1506:                         w = S|C;
1507:                         break;
1508:                     }
1509:                     break;
1510:                 case 0xDD:
1511:                     // Not entirely correct, but conservative
1512:                     r = S;
1513:                     w = S|C;
1514:                     break;
1515:                 case 0xDF:
1516:                     if (irm == 0xE0)    // FSTSW AX
1517:                     {   r = C;
1518:                         w = mAX;
1519:                         break;
1520:                     }
1521:                     break;
1522:             }
1523:             break;
1524: #if DEBUG
1525:         default:
1526:             //printf("\t\tNo special case\n");
1527:             break;
1528: #endif
1529:     }
1530: 
1531:     if ((r | w) & B)                            // if byte operation
1532:         sz = 1;                                 // operand size is 1
1533: 
1534:     ci->r = r & ~(R | EA);
1535:     ci->w = w & ~(R | EA);
1536:     if (r & R)
1537:         ci->r |= mask[(r & B) ? (reg & 3) : reg];
1538:     if (w & R)
1539:         ci->w |= mask[(w & B) ? (reg & 3) : reg];
1540: 
1541:     // OR in bits for EA addressing mode
1542:     if ((r | w) & EA)
1543:     {   unsigned char sib;
1544: 
1545:         sib = 0;
1546:         switch (mod)
1547:         {
1548:             case 0:
1549:                 if (a32)
1550:                 {
1551:                     if (rm == 4)
1552:                     {   sib = c->Isib;
1553:                         if ((sib & modregrm(0,7,0)) != modregrm(0,4,0))
1554:                             ci->a |= mask[(sib >> 3) & 7];      // index register
1555:                         if ((sib & 7) != 5)
1556:                             ci->a |= mask[sib & 7];             // base register
1557:                     }
1558:                     else if (rm != 5)
1559:                         ci->a |= mask[rm];
1560:                 }
1561:                 else
1562:                 {   static unsigned char ea16[8] = {mBX|mSI,mBX|mDI,mBP|mSI,mBP|mDI,mSI,mDI,0,mBX};
1563:                     ci->a |= ea16[rm];
1564:                 }
1565:                 goto Lmem;
1566: 
1567:             case 1:
1568:             case 2:
1569:                 if (a32)
1570:                 {
1571:                     if (rm == 4)
1572:                     {   sib = c->Isib;
1573:                         if ((sib & modregrm(0,7,0)) != modregrm(0,4,0))
1574:                             ci->a |= mask[(sib >> 3) & 7];      // index register
1575:                         ci->a |= mask[sib & 7];                 // base register
1576:                     }
1577:                     else
1578:                         ci->a |= mask[rm];
1579:                 }
1580:                 else
1581:                 {   static unsigned char ea16[8] = {mBX|mSI,mBX|mDI,mBP|mSI,mBP|mDI,mSI,mDI,mBP,mBX};
1582:                     ci->a |= ea16[rm];
1583:                 }
1584: 
1585:             Lmem:
1586:                 if (r & EA)
1587:                     ci->r |= mMEM;
1588:                 if (w & EA)
1589:                     ci->w |= mMEM;
1590:                 ci->flags |= CIFLea;
1591:                 break;
1592: 
1593:             case 3:
1594:                 if (r & EA)
1595:                     ci->r |= mask[(r & B) ? (rm & 3) : rm];
1596:                 if (w & EA)
1597:                     ci->w |= mask[(w & B) ? (rm & 3) : rm];
1598:                 break;
1599:         }
1600:         // Adjust sibmodrm so that addressing modes can be compared simply
1601:         irm &= modregrm(3,0,7);
1602:         if (a32)
1603:         {
1604:             if (irm != modregrm(0,0,5))
1605:             {
1606:                 switch (mod)
1607:                 {   case 0:
1608:                         if ((sib & 7) != 5)     // if not disp32[index]
1609:                         {   c->IFL1 = FLconst;
1610:                             c->IEVpointer1 = 0;
1611:                             irm |= 0x80;
1612:                         }
1613:                         break;
1614:                     case 1:
1615:                         c->IEVpointer1 = (signed char) c->IEVpointer1;
1616:                         irm = modregrm(2,0,rm);
1617:                         break;
1618:                 }
1619:             }
1620:         }
1621:         else
1622:         {
1623:             if (irm != modregrm(0,0,6))
1624:             {
1625:                 switch (mod)
1626:                 {   case 0:
1627:                         c->IFL1 = FLconst;
1628:                         c->IEVpointer1 = 0;
1629:                         irm |= 0x80;
1630:                         break;
1631:                     case 1:
1632:                         c->IEVpointer1 = (signed char) c->IEVpointer1;
1633:                         irm = modregrm(2,0,rm);
1634:                         break;
1635:                 }
1636:             }
1637:         }
1638: 
1639:         ci->r |= ci->a;
1640:         ci->reg = reg;
1641:         ci->sibmodrm = (sib << 8) | irm;
1642:     }
1643: Lret:
1644:     if (ci->w & mSP)                    // if stack pointer is modified
1645:         ci->w |= mMEM;                  // then we are implicitly writing to memory
1646:     if (op == 0x8D)                     // if LEA
1647:         ci->r &= ~mMEM;                 // memory is not actually read
1648:     ci->sz = sz;
1649: #if DEBUG
1650:     //printf("\t\t"); ci->print();
1651: #endif
1652: }
1653: 
1654: /******************************************
1655:  * Determine if two instructions can pair.
1656:  * Assume that in general, cu can pair in the U pipe and cv in the V.
1657:  * Look for things like register contentions.
1658:  * Input:
1659:  *      cu      instruction for U pipe
1660:  *      cv      instruction for V pipe
1661:  * Returns:
1662:  *      !=0 if they can pair
1663:  */
1664: 
1665: STATIC int pair_test(Cinfo *cu,Cinfo *cv)
1666: {   unsigned pcu;
1667:     unsigned pcv;
1668:     unsigned r1,w1;
1669:     unsigned r2,w2;
1670:     unsigned x;
1671: 
1672:     pcu = cu->pair;
1673:     if (!(pcu & PU))
1674:     {
1675:         // See if pairs with FXCH and cv is FXCH
1676:         if (pcu & FX && cv->c->Iop == 0xD9 && (cv->c->Irm & ~7) == 0xC8)
1677:             goto Lpair;
1678:         goto Lnopair;
1679:     }
1680:     pcv = cv->pair;
1681:     if (!(pcv & PV))
1682:         goto Lnopair;
1683: 
1684:     r1 = cu->r;
1685:     w1 = cu->w;
1686:     r2 = cv->r;
1687:     w2 = cv->w;
1688: 
1689:     x = w1 & (r2 | w2) & ~(F|mMEM);     // register contention
1690:     if (x &&                            // if register contention
1691:         !(x == mSP && pcu & pcv & PE)   // and not exception
1692:        )
1693:         goto Lnopair;
1694: 
1695:     // Look for flags contention
1696:     if (w1 & r2 & F && !(pcv & PF))
1697:         goto Lnopair;
1698: 
1699: Lpair:
1700:     return 1;
1701: 
1702: Lnopair:
1703:     return 0;
1704: }
1705: 
1706: /******************************************
1707:  * Determine if two instructions have an AGI or register contention.
1708:  * Returns:
1709:  *      !=0 if they have an AGI
1710:  */
1711: 
1712: STATIC int pair_agi(Cinfo *c1,Cinfo *c2)
1713: {   unsigned x;
1714: 
1715:     x = c1->w & c2->a;
1716:     return x && !(x == mSP && c1->pair & c2->pair & PE);
1717: }
1718: 
1719: /********************************************
1720:  * Determine if three instructions can decode simultaneously
1721:  * in Pentium Pro and Pentium II.
1722:  * Input:
1723:  *      c0,c1,c2        candidates for decoders 0,1,2
1724:  *                      c2 can be NULL
1725:  * Returns:
1726:  *      !=0 if they can decode simultaneously
1727:  */
1728: 
1729: STATIC int triple_test(Cinfo *c0,Cinfo *c1,Cinfo *c2)
1730: {   int c2isz;
1731: 
1732:     assert(c0);
1733:     if (!c1)
1734:         goto Lnopair;
1735:     c2isz = c2 ? c2->isz : 0;
1736:     if (c0->isz > 7 || c1->isz > 7 || c2isz > 7 ||
1737:         c0->isz + c1->isz + c2isz > 16)
1738:         goto Lnopair;
1739: 
1740:     // 4-1-1 decode
1741:     if (c1->uops > 1 ||
1742:         (c2 && c2->uops > 1))
1743:         goto Lnopair;
1744: 
1745: Lpair:
warning C4102: 'Lpair' : unreferenced label
1746:     return 1;
1747: 
1748: Lnopair:
1749:     return 0;
1750: }
1751: 
1752: /********************************************
1753:  * Get next instruction worth looking at for scheduling.
1754:  * Returns:
1755:  *      NULL    no more instructions
1756:  */
1757: 
1758: STATIC code * cnext(code *c)
1759: {
1760:     while (1)
1761:     {
1762:         c = code_next(c);
1763:         if (!c)
1764:             break;
1765:         if (c->Iflags & (CFtarg | CFtarg2))
1766:             break;
1767:         if (!(c->Iop == NOP ||
1768:               c->Iop == (ESCAPE | ESClinnum)))
1769:             break;
1770:     }
1771:     return c;
1772: }
1773: 
1774: /******************************************
1775:  * Instruction scheduler.
1776:  * Input:
1777:  *      c               list of instructions to schedule
1778:  *      scratch         scratch registers we can use
1779:  * Returns:
1780:  *      revised list of scheduled instructions
1781:  */
1782: 
1783: ///////////////////////////////////
1784: // Determine if c1 and c2 are swappable.
1785: // c1 comes before c2.
1786: // If they do not conflict
1787: //      return 0
1788: // If they do conflict
1789: //      return 0x100 + delay_clocks
1790: // Input:
1791: //      fpsched         if 1, then adjust fxch_pre and fxch_post to swap,
1792: //                      then return 0
1793: //                      if 2, then adjust ci1 as well as ci2
1794: 
1795: STATIC int conflict(Cinfo *ci1,Cinfo *ci2,int fpsched)
1796: {
1797:     code *c1;
1798:     code *c2;
1799:     unsigned r1,w1,a1;
1800:     unsigned r2,w2,a2;
1801:     int sz1,sz2;
1802:     int i = 0;
1803:     int delay_clocks;
1804: 
1805:     c1 = ci1->c;
1806:     c2 = ci2->c;
1807: 
1808:     //printf("conflict %x %x\n",c1,c2);
1809: 
1810:     r1 = ci1->r;
1811:     w1 = ci1->w;
1812:     a1 = ci1->a;
1813:     sz1 = ci1->sz;
1814: 
1815:     r2 = ci2->r;
1816:     w2 = ci2->w;
1817:     a2 = ci2->a;
1818:     sz2 = ci2->sz;
1819: 
1820:     //printf("r1 %lx w1 %lx a1 %lx sz1 %x\n",r1,w1,a1,sz1);
1821:     //printf("r2 %lx w2 %lx a2 %lx sz2 %x\n",r2,w2,a2,sz2);
1822: 
1823:     if ((c1->Iflags | c2->Iflags) & CFvolatile)
1824:         goto Lconflict;
1825: 
1826:     // Determine if we should handle FPU register conflicts separately
1827:     //if (fpsched) printf("fp_op %d,%d:\n",ci1->fp_op,ci2->fp_op);
1828:     if (fpsched && ci1->fp_op && ci2->fp_op)
1829:     {
1830:         w1 &= ~(S|C);
1831:         r1 &= ~(S|C);
1832:         w2 &= ~(S|C);
1833:         r2 &= ~(S|C);
1834:     }
1835:     else
1836:         fpsched = 0;
1837: 
1838:     if ((r1 | r2) & N)
1839:     {
1840:         goto Lconflict;
1841:     }
1842: 
1843: #if 0
1844:     if (c1->Iop == 0xFF && c2->Iop == 0x8B)
1845:     {   c1->print(); c2->print(); i = 1;
1846:         printf("r1=%lx, w1=%lx, a1=%lx, sz1=%d, r2=%lx, w2=%lx, a2=%lx, sz2=%d\n",r1,w1,a1,sz1,r2,w2,a2,sz2);
1847:     }
1848: #endif
1849: L1:
1850:     if (w1 & r2 || (r1 | w1) & w2)
1851:     {   unsigned char ifl1,ifl2;
1852: 
1853: if (i) printf("test\n");
1854: 
1855: #if 0
1856: if (c1->IFL1 != c2->IFL1) printf("t1\n");
1857: if ((c1->Irm & modregrm(3,0,7)) != (c2->Irm & modregrm(3,0,7))) printf("t2\n");
1858: if ((issib(c1->Irm) && c1->Isib != c2->Isib)) printf("t3\n");
1859: if (c1->IEVpointer1 + sz1 <= c2->IEVpointer1) printf("t4\n");
1860: if (c2->IEVpointer1 + sz2 <= c1->IEVpointer1) printf("t5\n");
1861: #endif
1862: 
1863: #if 1   // make sure CFpsw is reliably set
1864:         if (w1 & w2 & F &&              // if both instructions write to flags
1865:             w1 != F &&
1866:             w2 != F &&
1867:             !((r1 | r2) & F) &&         // but neither instruction reads them
1868:             !((c1->Iflags | c2->Iflags) & CFpsw))       // and we don't care about flags
1869:         {
1870:             w1 &= ~F;
1871:             w2 &= ~F;                   // remove conflict
1872:             goto L1;                    // and try again
1873:         }
1874: #endif
1875:         // If other than the memory reference is a conflict
1876:         if (w1 & r2 & ~mMEM || (r1 | w1) & w2 & ~mMEM)
1877:         {   if (i) printf("\t1\n");
1878:             if (i) printf("r1=%x, w1=%x, a1=%x, sz1=%d, r2=%x, w2=%x, a2=%x, sz2=%d\n",r1,w1,a1,sz1,r2,w2,a2,sz2);
1879:             goto Lconflict;
1880:         }
1881: 
1882:         // If referring to distinct types, then no dependency
1883:         if (c1->Irex && c2->Irex && c1->Irex != c2->Irex)
1884:             goto Lswap;
1885: 
1886:         ifl1 = c1->IFL1;
1887:         ifl2 = c2->IFL1;
1888: 
1889:         // Special case: Allow indexed references using registers other than
1890:         // ESP and EBP to be swapped with PUSH instructions
1891:         if (((c1->Iop & ~7) == 0x50 ||          // PUSH reg
1892:              c1->Iop == 0x6A ||                 // PUSH imm8
1893:              c1->Iop == 0x68 ||                 // PUSH imm16/imm32
1894:              (c1->Iop == 0xFF && ci1->reg == 6) // PUSH EA
1895:             ) &&
1896:             ci2->flags & CIFLea && !(a2 & mSP) &&
1897:             !(a2 & mBP && (long)c2->IEVpointer1 < 0)
1898:            )
1899:         {
1900:             if (c1->Iop == 0xFF)
1901:             {
1902:                 if (!(w2 & mMEM))
1903:                     goto Lswap;
1904:             }
1905:             else
1906:                 goto Lswap;
1907:         }
1908: 
1909:         // Special case: Allow indexed references using registers other than
1910:         // ESP and EBP to be swapped with PUSH instructions
1911:         if (((c2->Iop & ~7) == 0x50 ||          // PUSH reg
1912:              c2->Iop == 0x6A ||                 // PUSH imm8
1913:              c2->Iop == 0x68 ||                 // PUSH imm16/imm32
1914:              (c2->Iop == 0xFF && ci2->reg == 6) // PUSH EA
1915:             ) &&
1916:             ci1->flags & CIFLea && !(a1 & mSP) &&
1917:             !(a2 & mBP && (long)c2->IEVpointer1 < 0)
1918:            )
1919:         {
1920:             if (c2->Iop == 0xFF)
1921:             {
1922:                 if (!(w1 & mMEM))
1923:                     goto Lswap;
1924:             }
1925:             else
1926:                 goto Lswap;
1927:         }
1928: 
1929:         // If not both an EA addressing mode, conflict
1930:         if (!(ci1->flags & ci2->flags & CIFLea))
1931:         {   if (i) printf("\t2\n");
1932:             goto Lconflict;
1933:         }
1934: 
1935:         if (ci1->sibmodrm == ci2->sibmodrm)
1936:         {   if (ifl1 != ifl2)
1937:                 goto Lswap;
1938:             switch (ifl1)
1939:             {
1940:                 case FLconst:
1941:                     if (c1->IEV1.Vint != c2->IEV1.Vint &&
1942:                         (c1->IEV1.Vint + sz1 <= c2->IEV1.Vint ||
1943:                          c2->IEV1.Vint + sz2 <= c1->IEV1.Vint))
1944:                         goto Lswap;
1945:                     break;
1946:                 case FLdatseg:
1947:                     if (c1->IEVseg1 != c2->IEVseg1 ||
1948:                         c1->IEV1.Vint + sz1 <= c2->IEV1.Vint ||
1949:                         c2->IEV1.Vint + sz2 <= c1->IEV1.Vint)
1950:                         goto Lswap;
1951:                     break;
1952:             }
1953:         }
1954: 
1955:         if ((c1->Iflags | c2->Iflags) & CFunambig &&
1956:             (ifl1 != ifl2 ||
1957:              ci1->sibmodrm != ci2->sibmodrm ||
1958:              (c1->IEV1.Vint != c2->IEV1.Vint &&
1959:               (c1->IEV1.Vint + sz1 <= c2->IEV1.Vint ||
1960:                c2->IEV1.Vint + sz2 <= c1->IEV1.Vint)
1961:              )
1962:             )
1963:            )
1964:         {
1965:             // Assume that [EBP] and [ESP] can point to the same location
1966:             if (((a1 | a2) & (mBP | mSP)) == (mBP | mSP))
1967:                 goto Lconflict;
1968:             goto Lswap;
1969:         }
1970: 
1971:         if (i) printf("\t3\n");
1972:         goto Lconflict;
1973:     }
1974: 
1975: Lswap:
1976:     if (fpsched)
1977:     {   unsigned char a1,b1;
warning C6246: Local declaration of 'a1' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '1799' of 'c:\projects\extern\d\dmd\src\backend\cgsched.c': Lines: 1799
1978:         unsigned char a2,b2;
warning C6246: Local declaration of 'a2' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '1800' of 'c:\projects\extern\d\dmd\src\backend\cgsched.c': Lines: 1800
1979: 
1980:         //printf("\tfpsched %d,%d:\n",ci1->fp_op,ci2->fp_op);
1981:         a1 = ci1->fxch_pre;
1982:         b1 = ci1->fxch_post;
1983:         a2 = ci2->fxch_pre;
1984:         b2 = ci2->fxch_post;
1985: 
1986:         #define X(a,b) ((a << 8) | b)
1987:         switch (X(ci1->fp_op,ci2->fp_op))
1988:         {
1989:             case X(FPfstp,FPfld):
1990:                 if (a1 || b1)
1991:                     goto Lconflict;
1992:                 if (a2)
1993:                     goto Lconflict;
1994:                 if (b2 == 0)
1995:                     ci2->fxch_post++;
1996:                 else if (b2 == 1)
1997:                 {
1998:                     ci2->fxch_pre++;
1999:                     ci2->fxch_post++;
2000:                 }
2001:                 else
2002:                 {
2003:                     goto Lconflict;
2004:                 }
2005:                 break;
2006: 
2007:             case X(FPfstp,FPfop):
2008:                 if (a1 || b1)
2009:                     goto Lconflict;
2010:                 ci2->fxch_pre++;
2011:                 ci2->fxch_post++;
2012:                 break;
2013: 
2014:             case X(FPfop,FPfop):
2015:                 if (a1 == 0 && b1 == 1 && a2 == 0 && b2 == 0)
2016:                 {   ci2->fxch_pre = 1;
2017:                     ci2->fxch_post = 1;
2018:                     break;
2019:                 }
2020:                 if (a1 == 0 && b1 == 0 && a2 == 1 && b2 == 1)
2021:                     break;
2022:                 goto Lconflict;
2023: 
2024:             case X(FPfop,FPfld):
2025:                 if (a1 || b1)
2026:                     goto Lconflict;
2027:                 if (a2)
2028:                     goto Lconflict;
2029:                 if (b2)
2030:                     break;
2031:                 else if (fpsched == 2)
2032:                     ci1->fxch_post = 1;
2033:                 ci2->fxch_post = 1;
2034:                 break;
2035: 
2036:             default:
2037:                 goto Lconflict;
2038:         }
2039:         #undef X
2040:         //printf("\tpre = %d, post = %d\n",ci2->fxch_pre,ci2->fxch_post);
2041:     }
2042: 
2043:     //printf("w1 = x%x, w2 = x%x\n",w1,w2);
2044:     if (i) printf("no conflict\n\n");
2045:     return 0;
2046: 
2047: Lconflict:
2048:     //printf("r1=%x, w1=%x, r2=%x, w2=%x\n",r1,w1,r2,w2);
2049:     delay_clocks = 0;
2050: 
2051:     // Determine if AGI
2052:     if (!PRO && pair_agi(ci1,ci2))
2053:         delay_clocks = 1;
2054: 
2055:     // Special delays for floating point
2056:     if (fpsched)
2057:     {   if (ci1->fp_op == FPfld && ci2->fp_op == FPfstp)
2058:             delay_clocks = 1;
2059:         else if (ci1->fp_op == FPfop && ci2->fp_op == FPfstp)
2060:             delay_clocks = 3;
2061:         else if (ci1->fp_op == FPfop && ci2->fp_op == FPfop)
2062:             delay_clocks = 2;
2063:     }
2064:     else if (PRO)
2065:     {
2066:         // Look for partial register write stalls
2067:         if (w1 & r2 & ALLREGS && sz1 < sz2)
2068:             delay_clocks = 7;
2069:     }
2070:     else if ((w1 | r1) & (w2 | r2) & (C | S))
2071:     {   int reg;
2072:         int op;
2073: 
2074:         op = c1->Iop;
2075:         reg = c1->Irm & modregrm(0,7,0);
2076:         if (ci1->fp_op == FPfld ||
2077:             (op == 0xD9 && (c1->Irm & 0xF8) == 0xC0)
2078:            )
2079:             ;                           // FLD
2080:         else if (op == 0xD9 && (c1->Irm & 0xF8) == 0xC8)
2081:             ;                           // FXCH
2082:         else if (c2->Iop == 0xD9 && (c2->Irm & 0xF8) == 0xC8)
2083:             ;                           // FXCH
2084:         else
2085:             delay_clocks = 3;
2086:     }
2087: 
2088:     if (i) printf("conflict %d\n\n",delay_clocks);
2089:     return 0x100 + delay_clocks;
2090: }
2091: 
2092: struct Schedule
2093: {
2094:     #define TBLMAX      (2*3*20)        // must be divisible by both 2 and 3
2095:                                         // (U,V pipe in Pentium, 3 decode units
2096:                                         //  in Pentium Pro)
2097: 
2098:     Cinfo *tbl[TBLMAX];         // even numbers are U pipe, odd numbers are V
2099:     int tblmax;                 // max number of slots used
2100: 
2101:     Cinfo cinfo[TBLMAX];
2102:     int cinfomax;
2103: 
2104:     list_t stagelist;           // list of instructions in staging area
2105: 
2106:     int fpustackused;           // number of slots in FPU stack that are used
2107: 
2108:     void initialize(int fpustackinit);          // initialize scheduler
2109:     int stage(code *c);         // stage instruction
2110:     int insert(Cinfo *ci);      // insert c into schedule
2111:     code **assemble(code **pc); // reassemble scheduled instructions
2112: };
2113: 
2114: /******************************
2115:  */
2116: 
2117: void Schedule::initialize(int fpustackinit)
2118: {
2119:     //printf("Schedule::initialize(fpustackinit = %d)\n", fpustackinit);
2120:     memset(this,0,sizeof(Schedule));
2121:     fpustackused = fpustackinit;
2122: }
2123: 
2124: /******************************
2125:  */
2126: 
2127: code **Schedule::assemble(code **pc)
2128: {   int i;
2129:     list_t l;
2130:     code *c;
2131: 
2132: #ifdef DEBUG
2133:     if (debugs) printf("assemble:\n");
2134: #endif
2135:     assert(!*pc);
2136: 
2137:     // Try to insert the rest of the staged instructions
2138:     for (l = stagelist; l; l = list_next(l))
2139:     {   Cinfo *ci;
2140: 
2141:         ci = (Cinfo *)list_ptr(l);
2142:         if (!insert(ci))
2143:             break;
2144:     }
2145: 
2146:     // Get the instructions out of the schedule table
2147:     assert((unsigned)tblmax <= TBLMAX);
2148:     for (i = 0; i < tblmax; i++)
2149:     {   Cinfo *ci;
2150: 
2151:         ci = tbl[i];
2152: #ifdef DEBUG
2153:         if (debugs)
2154:         {
2155:             if (PRO)
2156:             {   static char tbl[3][4] = { "0  "," 1 ","  2" };
2157: 
2158:                 if (ci)
2159:                     printf("%s %d ",tbl[i - ((i / 3) * 3)],ci->uops);
2160:                 else
2161:                     printf("%s   ",tbl[i - ((i / 3) * 3)]);
2162:             }
2163:             else
2164:             {
2165:                 printf((i & 1) ? " V " : "U  ");
2166:             }
2167:             if (ci)
2168:                 ci->c->print();
2169:             else
2170:                 printf("\n");
2171:         }
2172: #endif
2173:         if (!ci)
2174:             continue;
2175:         fpustackused += ci->fpuadjust;
2176:         //printf("stage()1: fpustackused = %d\n", fpustackused);
2177:         c = ci->c;
2178:         if (i == 0)
2179:             c->Iflags |= CFtarg;        // by definition, first is always a jump target
2180:         else
2181:             c->Iflags &= ~CFtarg;       // the rest are not
2182: 
2183:         // Put in any FXCH prefix
2184:         if (ci->fxch_pre)
2185:         {   code *cf;
2186:             assert(i);
2187:             cf = gen2(NULL,0xD9,0xC8 + ci->fxch_pre);
2188:             *pc = cf;
2189:             pc = &code_next(cf);
2190:         }
2191: 
2192:         *pc = c;
2193:         do
2194:         {
2195:             assert(*pc != code_next(*pc));
2196:             pc = &code_next(*pc);
2197:         } while (*pc);
2198: 
2199:         // Put in any FXCH postfix
2200:         if (ci->fxch_post)
2201:         {   int j;
2202: 
2203:             for (j = i + 1; j < tblmax; j++)
2204:             {   if (tbl[j])
2205:                 {   if (tbl[j]->fxch_pre == ci->fxch_post)
2206:                     {
2207:                         tbl[j]->fxch_pre = 0;           // they cancel each other out
2208:                         goto L1;
2209:                     }
2210:                     break;
2211:                 }
2212:             }
2213:             {   code *cf;
2214:                 cf = gen2(NULL,0xD9,0xC8 + ci->fxch_post);
2215:                 *pc = cf;
2216:                 pc = &code_next(cf);
2217:             }
2218:         }
2219:     L1: ;
2220:     }
2221: 
2222:     // Just append any instructions left in the staging area
2223:     for (; l; l = list_next(l))
2224:     {   Cinfo *ci = (Cinfo *)list_ptr(l);
2225:         code *c = ci->c;
warning C6246: Local declaration of 'c' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '2130' of 'c:\projects\extern\d\dmd\src\backend\cgsched.c': Lines: 2130
2226: 
2227: #ifdef DEBUG
2228:         if (debugs) { printf("appending: "); c->print(); }
2229: #endif
2230:         *pc = c;
2231:         do
2232:         {
2233:             pc = &code_next(*pc);
2234: 
2235:         } while (*pc);
2236:         fpustackused += ci->fpuadjust;
2237:         //printf("stage()2: fpustackused = %d\n", fpustackused);
2238:     }
2239:     list_free(&stagelist);
2240: 
2241:     return pc;
2242: }
2243: 
2244: /******************************
2245:  * Insert c into scheduling table.
2246:  * Returns:
2247:  *      0       could not be scheduled; have to start a new one
2248:  */
2249: 
2250: int Schedule::insert(Cinfo *ci)
2251: {   code *c;
2252:     int clocks;
2253:     int i;
2254:     int ic = 0;
2255:     int imin;
2256:     targ_size_t offset;
2257:     targ_size_t vpointer;
2258:     int movesp = 0;
2259:     int reg2 = -1;              // avoid "may be uninitialized" warning
2260: 
2261:     //printf("insert "); ci->c->print();
2262:     //printf("insert() %d\n", fpustackused);
2263:     c = ci->c;
2264:     //printf("\tc->Iop %x\n",c->Iop);
2265:     vpointer = c->IEVpointer1;
2266:     assert((unsigned)tblmax <= TBLMAX);
2267:     if (tblmax == TBLMAX)               // if out of space
2268:         goto Lnoinsert;
2269:     if (tblmax == 0)                    // if table is empty
2270:     {   // Just stuff it in the first slot
2271:         i = tblmax;
2272:         goto Linsert;
2273:     }
2274:     else if (c->Iflags & (CFtarg | CFtarg2))
2275:         // Jump targets can only be first in the scheduler
2276:         goto Lnoinsert;
2277: 
2278:     // Special case of:
2279:     //  PUSH reg1
2280:     //  MOV  reg2,x[ESP]
2281:     if (c->Iop == 0x8B &&
2282:         (c->Irm & modregrm(3,0,7)) == modregrm(1,0,4) &&
2283:         c->Isib == modregrm(0,4,SP) &&
2284:         c->IFL1 == FLconst &&
2285:         ((signed char)c->IEVpointer1) >= REGSIZE
2286:        )
2287:     {
2288:         movesp = 1;                     // this is a MOV reg2,offset[ESP]
2289:         offset = (signed char)c->IEVpointer1;
2290:         reg2 = (c->Irm >> 3) & 7;
2291:     }
2292: 
2293: 
2294:     // Start at tblmax, and back up until we get a conflict
2295:     ic = -1;
2296:     imin = 0;
2297:     for (i = tblmax; i >= 0; i--)
2298:     {   Cinfo *cit;
2299: 
2300:         cit = tbl[i];
2301:         if (!cit)
2302:             continue;
2303: 
2304:         // Look for special case swap
2305:         if (movesp &&
2306:             (cit->c->Iop & ~7) == 0x50 &&               // if PUSH reg1
2307:             (cit->c->Iop & 7) != reg2 &&                // if reg1 != reg2
2308:             ((signed char)c->IEVpointer1) >= -cit->spadjust
2309:            )
warning C4146: unary minus operator applied to unsigned type, result still unsigned
warning C4018: '>=' : signed/unsigned mismatch
2310:         {
2311:             c->IEVpointer1 += cit->spadjust;
2312:             //printf("\t1, spadjust = %d, ptr = x%x\n",cit->spadjust,c->IEVpointer1);
2313:             continue;
2314:         }
2315: 
2316:         if (movesp &&
2317:             cit->c->Iop == 0x83 &&
2318:             cit->c->Irm == modregrm(3,5,SP) &&          // if SUB ESP,offset
2319:             cit->c->IFL2 == FLconst &&
2320:             ((signed char)c->IEVpointer1) >= -cit->spadjust
2321:            )
warning C4146: unary minus operator applied to unsigned type, result still unsigned
warning C4018: '>=' : signed/unsigned mismatch
2322:         {
2323:             //printf("\t2, spadjust = %d\n",cit->spadjust);
2324:             c->IEVpointer1 += cit->spadjust;
2325:             continue;
2326:         }
2327: 
2328:         clocks = conflict(cit,ci,1);
2329:         if (clocks)
2330:         {   int j;
2331: 
2332:             ic = i;                     // where the conflict occurred
2333:             clocks &= 0xFF;             // convert to delay count
2334: 
2335:             // Move forward the delay clocks
2336:             if (clocks == 0)
2337:                 j = i + 1;
2338:             else if (PRO)
2339:                 j = (((i + 3) / 3) * 3) + clocks * 3;
2340:             else
2341:             {   j = ((i + 2) & ~1) + clocks * 2;
2342: 
2343:                 // It's possible we skipped over some AGI generating
2344:                 // instructions due to movesp.
2345:                 int k;
2346:                 for (k = i + 1; k < j; k++)
2347:                 {
2348:                     if (k >= TBLMAX)
2349:                         goto Lnoinsert;
2350:                     if (tbl[k] && pair_agi(tbl[k],ci))
2351:                     {
2352:                         k = ((k + 2) & ~1) + 1;
2353:                     }
2354:                 }
2355:                 j = k;
2356:             }
2357: 
2358:             if (j >= TBLMAX)                    // exceed table size?
2359:                 goto Lnoinsert;
2360:             imin = j;                           // first possible slot c can go in
2361:             break;
2362:         }
2363:     }
2364: 
2365: 
2366:     // Scan forward looking for a hole to put it in
2367:     for (i = imin; i < TBLMAX; i++)
2368:     {
2369:         if (tbl[i])
2370:         {
2371:             // In case, due to movesp, we skipped over some AGI instructions
2372:             if (!PRO && pair_agi(tbl[i],ci))
2373:             {
2374:                 i = ((i + 2) & ~1) + 1;
2375:                 if (i >= TBLMAX)
2376:                     goto Lnoinsert;
2377:             }
2378:         }
2379:         else
2380:         {
2381:             if (PRO)
2382:             {   int i0 = (i / 3) * 3;           // index of decode unit 0
2383:                 Cinfo *ci0;
2384: 
2385:                 assert(((TBLMAX / 3) * 3) == TBLMAX);
2386:                 switch (i - i0)
2387:                 {
2388:                     case 0:                     // i0 can handle any instruction
2389:                         goto Linsert;
2390:                     case 1:
2391:                         ci0 = tbl[i0];
2392:                         if (ci->uops > 1)
2393:                         {
2394:                             if (i0 >= imin && ci0->uops == 1)
2395:                                 goto L1;
2396:                             i++;
2397:                             break;
2398:                         }
2399:                         if (triple_test(ci0,ci,tbl[i0 + 2]))
2400:                             goto Linsert;
2401:                         break;
2402:                     case 2:
2403:                         ci0 = tbl[i0];
2404:                         if (ci->uops > 1)
2405:                         {
2406:                             if (i0 >= imin && ci0->uops == 1)
2407:                             {
2408:                                 if (i >= tblmax)
2409:                                 {   if (i + 1 >= TBLMAX)
2410:                                         goto Lnoinsert;
2411:                                     tblmax = i + 1;
2412:                                 }
2413:                                 tbl[i0 + 2] = tbl[i0 + 1];
2414:                                 tbl[i0 + 1] = ci0;
2415:                                 i = i0;
2416:                                 goto Linsert;
2417:                             }
2418:                             break;
2419:                         }
2420:                         if (triple_test(ci0,tbl[i0 + 1],ci))
2421:                             goto Linsert;
2422:                         break;
2423:                     default:
2424:                         assert(0);
2425:                 }
2426:             }
2427:             else
2428:             {
2429:                 assert((TBLMAX & 1) == 0);
2430:                 if (i & 1)                      // if V pipe
2431:                 {
2432:                     if (pair_test(tbl[i - 1],ci))
2433:                     {
2434:                         goto Linsert;
2435:                     }
2436:                     else if (i > imin && pair_test(ci,tbl[i - 1]))
2437:                     {
2438:                 L1:
2439:                         tbl[i] = tbl[i - 1];
2440:                         if (i >= tblmax)
2441:                             tblmax = i + 1;
2442:                         i--;
2443:                         //printf("\tswapping with x%02x\n",tbl[i + 1]->c->Iop);
2444:                         goto Linsert;
2445:                     }
2446:                 }
2447:                 else                    // will always fit in U pipe
2448:                 {
2449:                     assert(!tbl[i + 1]);        // because V pipe should be empty
warning C6201: Index '120' is out of valid index range '0' to '119' for possibly stack allocated buffer 'tbl'
2450:                     goto Linsert;
2451:                 }
2452:             }
2453:         }
2454:     }
2455: 
2456: Lnoinsert:
2457:     //printf("\tnoinsert\n");
2458:     c->IEVpointer1 = vpointer;  // reset to original value
2459:     return 0;
2460: 
2461: Linsert:
2462:     // Insert at location i
2463:     assert(i < TBLMAX);
2464:     assert(tblmax <= TBLMAX);
2465:     tbl[i] = ci;
2466:     //printf("\tinsert at location %d\n",i);
2467: 
2468:     // If it's a scheduled floating point code, we have to adjust
2469:     // the FXCH values
2470:     if (ci->fp_op)
2471:     {   int j;
2472: 
2473:         ci->fxch_pre = 0;
2474:         ci->fxch_post = 0;                      // start over again
2475: 
2476:         int fpu = fpustackused;
2477:         for (j = 0; j < tblmax; j++)
2478:         {
2479:             if (tbl[j])
2480:             {
2481:                 fpu += tbl[j]->fpuadjust;
2482:                 if (fpu >= 8)                   // if FPU stack overflow
2483:                 {   tbl[i] = NULL;
2484:                     //printf("fpu stack overflow\n");
2485:                     goto Lnoinsert;
2486:                 }
2487:             }
2488:         }
2489: 
2490:         for (j = tblmax; j > i; j--)
2491:         {
2492:             if (j < TBLMAX && tbl[j])
2493:                 conflict(tbl[j],ci,2);
2494:         }
2495:     }
2496: 
2497:     if (movesp)
2498:     {   // Adjust [ESP] offsets
2499:         int j;
2500: 
2501:         //printf("\tic = %d, inserting at %d\n",ic,i);
2502:         assert((unsigned)tblmax <= TBLMAX);
2503:         for (j = ic + 1; j < i; j++)
2504:         {   Cinfo *cit;
2505: 
2506:             cit = tbl[j];
2507:             if (cit)
2508:             {
2509:                 c->IEVpointer1 -= cit->spadjust;
2510:                 //printf("\t3, spadjust = %d, ptr = x%x\n",cit->spadjust,c->IEVpointer1);
2511:             }
2512:         }
2513:     }
2514:     if (i >= tblmax)
2515:         tblmax = i + 1;
2516: 
2517:     // Now do a hack. Look back at immediately preceding instructions,
2518:     // and see if we can swap with a push.
2519:     if (0 && movesp)
2520:     {   int j;
2521: 
2522:         while (1)
2523:         {
2524:             for (j = 1; i > j; j++)
2525:                 if (tbl[i - j])
2526:                     break;
2527: 
2528:             if (i >= j && tbl[i - j] &&
2529:                    (tbl[i - j]->c->Iop & ~7) == 0x50 &&       // if PUSH reg1
2530:                    (tbl[i - j]->c->Iop & 7) != reg2 &&  // if reg1 != reg2
2531:                    (signed char)c->IEVpointer1 >= REGSIZE)
2532:             {
2533:                 //printf("\t-4 prec, i-j=%d, i=%d\n",i-j,i);
2534:                 assert((unsigned)i < TBLMAX);
2535:                 assert((unsigned)(i - j) < TBLMAX);
2536:                 tbl[i] = tbl[i - j];
2537:                 tbl[i - j] = ci;
2538:                 i -= j;
2539:                 c->IEVpointer1 -= REGSIZE;
2540:             }
2541:             else
2542:                 break;
2543:         }
2544:     }
2545: 
2546:     //printf("\tinsert\n");
2547:     return 1;
2548: }
2549: 
2550: 
2551: /******************************
2552:  * Insert c into staging area.
2553:  * Returns:
2554:  *      0       could not be scheduled; have to start a new one
2555:  */
2556: 
2557: int Schedule::stage(code *c)
2558: {   Cinfo *ci;
2559:     list_t l;
2560:     list_t ln;
2561:     int agi;
2562: 
2563:     //printf("stage: "); c->print();
2564:     if (cinfomax == TBLMAX)             // if out of space
2565:         goto Lnostage;
2566:     ci = &cinfo[cinfomax++];
2567:     getinfo(ci,c);
2568: 
2569:     if (c->Iflags & (CFtarg | CFtarg2 | CFvolatile))
2570:     {
2571:         // Insert anything in stagelist
2572:         for (l = stagelist; l; l = ln)
2573:         {   Cinfo *cs;
2574: 
2575:             ln = list_next(l);
2576:             cs = (Cinfo *)list_ptr(l);
2577:             if (!insert(cs))
2578:                 return 0;
2579:             list_subtract(&stagelist,cs);
2580:         }
2581:         return insert(ci);
2582:     }
2583: 
2584:     // Look through stagelist, and insert any AGI conflicting instructions
2585:     agi = 0;
2586:     for (l = stagelist; l; l = ln)
2587:     {   Cinfo *cs;
2588: 
2589:         ln = list_next(l);
2590:         cs = (Cinfo *)list_ptr(l);
2591:         if (pair_agi(cs,ci))
2592:         {
2593:             if (!insert(cs))
2594:                 goto Lnostage;
2595:             list_subtract(&stagelist,cs);
2596:             agi = 1;                    // we put out an AGI
2597:         }
2598:     }
2599: 
2600:     // Look through stagelist, and insert any other conflicting instructions
2601:     for (l = stagelist; l; l = ln)
2602:     {   Cinfo *cs;
2603: 
2604:         ln = list_next(l);
2605:         cs = (Cinfo *)list_ptr(l);
2606:         if (conflict(cs,ci,0) &&                // if conflict
2607:             !(cs->flags & ci->flags & CIFLpush))
2608:         {
2609:             if (cs->spadjust)
2610:             {
2611:                 // We need to insert all previous adjustments to ESP
2612:                 list_t la,lan;
2613: 
2614:                 for (la = stagelist; la != l; la = lan)
2615:                 {   Cinfo *ca;
2616: 
2617:                     lan = list_next(la);
2618:                     ca = (Cinfo *)list_ptr(la);
2619:                     if (ca->spadjust)
2620:                     {   if (!insert(ca))
2621:                             goto Lnostage;
2622:                         list_subtract(&stagelist,ca);
2623:                     }
2624:                 }
2625:             }
2626: 
2627:             if (!insert(cs))
2628:                 goto Lnostage;
2629:             list_subtract(&stagelist,cs);
2630:         }
2631:     }
2632: 
2633:     // If floating point opcode, don't stage it, send it right out
2634:     if (!agi && ci->flags & CIFLnostage)
2635:     {
2636:         if (!insert(ci))
2637:             goto Lnostage;
2638:         return 1;
2639:     }
2640: 
2641:     list_append(&stagelist,ci);         // append to staging list
2642:     return 1;
2643: 
2644: Lnostage:
2645:     return 0;
2646: }
2647: 
2648: /********************************************
2649:  * Snip off tail of instruction sequence.
2650:  * Returns:
2651:  *      next instruction (the tail) or
2652:  *      NULL for no more instructions
2653:  */
2654: 
2655: STATIC code * csnip(code *c)
2656: {   code **pc;
2657:     unsigned iflags;
2658: 
2659:     if (c)
2660:     {   iflags = c->Iflags & CFclassinit;
2661:         while (1)
2662:         {
2663:             pc = &code_next(c);
2664:             c = *pc;
2665:             if (!c)
2666:                 break;
2667:             if (c->Iflags & (CFtarg | CFtarg2))
2668:                 break;
2669:             if (!(c->Iop == NOP ||
2670:                   c->Iop == (ESCAPE | ESClinnum) ||
2671:                   c->Iflags & iflags))
2672:                 break;
2673:         }
2674:         *pc = NULL;
2675:     }
2676:     return c;
2677: }
2678: 
2679: 
2680: /******************************
2681:  * Schedule Pentium instructions,
2682:  * based on Steve Russell's algorithm.
2683:  */
2684: 
2685: code *schedule(code *c,regm_t scratch)
2686: {
2687:     code *cresult = NULL;
2688:     code **pctail = &cresult;
2689:     Schedule sch;
2690: 
2691:     sch.initialize(0);                  // initialize scheduling table
2692:     while (c)
2693:     {
2694:         if ((c->Iop == NOP || (c->Iop & 0xFF) == ESCAPE || c->Iflags & CFclassinit) &&
2695:             !(c->Iflags & (CFtarg | CFtarg2)))
2696:         {   code *cn;
2697: 
2698:             // Just append this instruction to pctail and go to the next one
2699:             *pctail = c;
2700:             cn = code_next(c);
2701:             code_next(c) = NULL;
2702:             pctail = &code_next(c);
2703:             c = cn;
2704:             continue;
2705:         }
2706: 
2707:         //printf("init\n");
2708:         sch.initialize(sch.fpustackused);       // initialize scheduling table
2709: 
2710:         while (c)
2711:         {
2712:             //printf("insert %p\n",c);
2713:             if (!sch.stage(c))          // store c in scheduling table
2714:                 break;
2715:             c = csnip(c);
2716:         }
2717: 
2718:         //printf("assem %d\n",sch.tblmax);
2719:         pctail = sch.assemble(pctail);  // reassemble instruction stream
2720:     }
2721: 
2722:     return cresult;
2723: }
2724: 
2725: /**************************************************************************/
2726: 
2727: /********************************************
2728:  * Replace any occurrence of r1 in EA with r2.
2729:  */
2730: 
2731: STATIC void repEA(code *c,unsigned r1,unsigned r2)
2732: {
2733:     unsigned mod,reg,rm;
2734:     unsigned rmn;
2735: 
2736:     rmn = c->Irm;
2737:     mod = rmn & 0xC0;
2738:     reg = rmn & modregrm(0,7,0);
2739:     rm =  rmn & 7;
2740: 
2741:     if (mod == 0xC0 && rm == r1)
2742:         ; //c->Irm = mod | reg | r2;
2743:     else if (is32bitaddr(I32,c->Iflags) &&
2744:         // If not disp32
2745:         (rmn & modregrm(3,0,7)) != modregrm(0,0,5))
2746:     {
2747:         if (rm == 4)
2748:         {   // SIB byte addressing
2749:             unsigned sib;
2750:             unsigned base;
2751:             unsigned index;
2752: 
2753:             sib = c->Isib;
2754:             base = sib & 7;
2755:             index = (sib >> 3) & 7;
2756:             if (base == r1 &&
2757:                 !(r1 == 5 && mod == 0) &&
2758:                 !(r2 == 5 && mod == 0)
2759:                )
2760:                 base = r2;
2761:             if (index == r1)
2762:                 index = r2;
2763:             c->Isib = (sib & 0xC0) | (index << 3) | base;
2764:         }
2765:         else if (rm == r1)
2766:         {
2767:             if (r1 == BP && r2 == SP)
2768:             {   // Replace [EBP] with [ESP]
2769:                 c->Irm = mod | reg | 4;
2770:                 c->Isib = modregrm(0,4,SP);
2771:             }
2772:             else if (r2 == BP && mod == 0)
2773:             {
2774:                 c->Irm = modregrm(1,0,0) | reg | r2;
2775:                 c->IFL1 = FLconst;
2776:                 c->IEV1.Vint = 0;
2777:             }
2778:             else
2779:                 c->Irm = mod | reg | r2;
2780:         }
2781:     }
2782: }
2783: 
2784: /******************************************
2785:  * Instruction scheduler.
2786:  * Input:
2787:  *      c               list of instructions to schedule
2788:  *      scratch         scratch registers we can use
2789:  * Returns:
2790:  *      revised list of scheduled instructions
2791:  */
2792: 
2793: /******************************************
2794:  * Swap c1 and c2.
2795:  * c1 comes before c2.
2796:  * Swap in place to not disturb addresses of jmp targets
2797:  */
2798: 
2799: STATIC void code_swap(code *c1,code *c2)
2800: {   code cs;
2801: 
2802:     // Special case of:
2803:     //  PUSH reg1
2804:     //  MOV  reg2,x[ESP]
2805:     //printf("code_swap(%x, %x)\n",c1,c2);
2806:     if ((c1->Iop & ~7) == 0x50 &&
2807:         c2->Iop == 0x8B &&
2808:         (c2->Irm & modregrm(3,0,7)) == modregrm(1,0,4) &&
2809:         c2->Isib == modregrm(0,4,SP) &&
2810:         c2->IFL1 == FLconst &&
2811:         ((signed char)c2->IEVpointer1) >= REGSIZE &&
2812:         (c1->Iop & 7) != ((c2->Irm >> 3) & 7)
2813:        )
2814:         c2->IEVpointer1 -= REGSIZE;
2815: 
2816: 
2817:     cs = *c2;
2818:     *c2 = *c1;
2819:     *c1 = cs;
2820:     // Retain original CFtarg
2821:     c1->Iflags = (c1->Iflags & ~(CFtarg | CFtarg2)) | (c2->Iflags & (CFtarg | CFtarg2));
2822:     c2->Iflags = (c2->Iflags & ~(CFtarg | CFtarg2)) | (cs.Iflags & (CFtarg | CFtarg2));
2823: 
2824:     c1->next = c2->next;
2825:     c2->next = cs.next;
2826: }
2827: 
2828: code *peephole(code *cstart,regm_t scratch)
2829: {
2830:     // Look for cases of:
2831:     //  MOV r1,r2
2832:     //  OP ?,r1
2833:     // we can replace with:
2834:     //  MOV r1,r2
2835:     //  OP ?,r2
2836:     // to improve pairing
2837:     code *c;
2838:     code *c1;
2839:     unsigned r1,r2;
2840:     unsigned mod,reg,rm;
2841: 
2842:     //printf("peephole\n");
2843:     for (c = cstart; c; c = c1)
2844:     {   unsigned char rmi;
2845:         unsigned char rmn;
2846: 
2847:         //c->print();
2848:         c1 = cnext(c);
2849:     Ln:
2850:         if (!c1)
2851:             break;
2852:         if (c1->Iflags & (CFtarg | CFtarg2))
2853:             continue;
2854: 
2855:         // Do:
2856:         //      PUSH    reg
2857:         if (I32 && (c->Iop & ~7) == 0x50)
2858:         {   unsigned reg = c->Iop & 7;
warning C6246: Local declaration of 'reg' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '2840' of 'c:\projects\extern\d\dmd\src\backend\cgsched.c': Lines: 2840
2859: 
2860:             //  MOV     [ESP],reg       =>      NOP
2861:             if (c1->Iop == 0x8B &&
2862:                 c1->Irm == modregrm(0,reg,4) &&
2863:                 c1->Isib == modregrm(0,4,SP))
2864:             {   c1->Iop = NOP;
2865:                 continue;
2866:             }
2867: 
2868:             //  PUSH    [ESP]           =>      PUSH    reg
2869:             if (c1->Iop == 0xFF &&
2870:                 c1->Irm == modregrm(0,6,4) &&
2871:                 c1->Isib == modregrm(0,4,SP))
2872:             {   c1->Iop = 0x50 + reg;
2873:                 continue;
2874:             }
2875: 
2876:             //  CMP     [ESP],imm       =>      CMP     reg,i,,
2877:             if (c1->Iop == 0x83 &&
2878:                 c1->Irm == modregrm(0,7,4) &&
2879:                 c1->Isib == modregrm(0,4,SP))
2880:             {   c1->Irm = modregrm(3,7,reg);
2881:                 if (c1->IFL2 == FLconst && (signed char)c1->IEV2.Vuns == 0)
2882:                 {   // to TEST reg,reg
2883:                     c1->Iop = (c1->Iop & 1) | 0x84;
2884:                     c1->Irm = modregrm(3,reg,reg);
2885:                 }
2886:                 continue;
2887:             }
2888: 
2889:         }
2890: 
2891:         rmi = c->Irm;
2892: 
2893:         // Do:
2894:         //      MOV     reg,[ESP]       =>      PUSH    reg
2895:         //      ADD     ESP,4           =>      NOP
2896:         if (I32 && c->Iop == 0x8B && (rmi & 0xC7) == modregrm(0,0,4) &&
2897:             c->Isib == modregrm(0,4,SP) &&
2898:             c1->Iop == 0x83 && (c1->Irm & 0xC7) == modregrm(3,0,SP) &&
2899:             !(c1->Iflags & CFpsw) && c1->IFL2 == FLconst && c1->IEV2.Vint == 4)
2900:         {   unsigned reg = (rmi >> 3) & 7;
warning C6246: Local declaration of 'reg' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '2840' of 'c:\projects\extern\d\dmd\src\backend\cgsched.c': Lines: 2840
2901:             c->Iop = 0x58 + reg;
2902:             c1->Iop = NOP;
2903:             continue;
2904:         }
2905: 
2906:         if ((rmi & 0xC0) != 0xC0)
2907:         {
2908:             continue;
2909:         }
2910: 
2911:         // Combine two SUBs of the same register
2912:         if (c->Iop == c1->Iop &&
2913:             c->Iop == 0x83 &&
2914:             (rmi & modregrm(3,0,7)) == (c1->Irm & modregrm(3,0,7)) &&
2915:             !(c1->Iflags & CFpsw) &&
2916:             c->IFL2 == FLconst && c1->IFL2 == FLconst
2917:            )
2918:         {   int i = (signed char)c->IEV2.Vint;
2919:             int i1 = (signed char)c1->IEV2.Vint;
2920:             switch ((rmi & modregrm(0,7,0)) | ((c1->Irm & modregrm(0,7,0)) >> 3))
2921:             {
2922:                 case (0 << 3) | 0:              // ADD, ADD
2923:                 case (5 << 3) | 5:              // SUB, SUB
2924:                     i += i1;
2925:                     goto Laa;
2926:                 case (0 << 3) | 5:              // ADD, SUB
2927:                 case (5 << 3) | 0:              // SUB, ADD
2928:                     i -= i1;
2929:                     goto Laa;
2930:                 Laa:
2931:                     if ((signed char)i != i)
2932:                         c->Iop &= ~2;
2933:                     c->IEV2.Vint = i;
2934:                     c1->Iop = NOP;
2935:                     if (i == 0)
2936:                         c->Iop = NOP;
2937:                     continue;
2938:             }
2939:         }
2940: 
2941:         if (c->Iop == 0x8B)                     // MOV r1,EA
2942:         {   r1 = (rmi >> 3) & 7;
2943:             r2 = rmi & 7;
2944:         }
2945:         else if (c->Iop == 0x89)                // MOV EA,r2
2946:         {   r1 = rmi & 7;
2947:             r2 = (rmi >> 3) & 7;
2948:         }
2949:         else
2950:         {
2951:             continue;
2952:         }
2953: 
2954:         rmn = c1->Irm;
2955:         mod = rmn & 0xC0;
2956:         reg = rmn & modregrm(0,7,0);
2957:         rm =  rmn & 7;
2958:         if (cod3_EA(c1))
2959:             repEA(c1,r1,r2);
2960:         switch (c1->Iop)
2961:         {
2962:             case 0x50:
2963:             case 0x51:
2964:             case 0x52:
2965:             case 0x53:
2966:             case 0x54:
2967:             case 0x55:
2968:             case 0x56:
2969:             case 0x57:                          // PUSH reg
2970:                 if ((c1->Iop & 7) == r1)
2971:                 {   c1->Iop = 0x50 | r2;
2972:                     //printf("schedule PUSH reg\n");
2973:                 }
2974:                 break;
2975: 
2976:             case 0x81:
2977:             case 0x83:
2978:                 // Look for CMP EA,imm
2979:                 if (reg == modregrm(0,7,0))
2980:                 {
2981:                     if (mod == 0xC0 && rm == r1)
2982:                         c1->Irm = mod | reg | r2;
2983:                 }
2984:                 break;
2985: 
2986:             case 0x84:                  // TEST reg,byte ptr EA
2987:                 if (r1 >= 4 || r2 >= 4) // if not a byte register
2988:                     break;
2989:                 if ((rmn & 0xC0) == 0xC0)
2990:                 {
2991:                     if ((rmn & 3) == r1)
2992:                     {   c1->Irm = rmn = (rmn & modregrm(3,7,4)) | r2;
2993:                         //printf("schedule 1\n");
2994:                     }
2995:                 }
2996:                 if ((rmn & modregrm(0,3,0)) == modregrm(0,r1,0))
2997:                 {   c1->Irm = (rmn & modregrm(3,4,7)) | modregrm(0,r2,0);
2998:                     //printf("schedule 2\n");
2999:                 }
3000:                 break;
3001:             case 0x85:                  // TEST reg,word ptr EA
3002:                 if ((rmn & 0xC0) == 0xC0)
3003:                 {
3004:                     if ((rmn & 7) == r1)
3005:                     {   c1->Irm = rmn = (rmn & modregrm(3,7,0)) | r2;
3006:                         //printf("schedule 3\n");
3007:                     }
3008:                 }
3009:                 if ((rmn & modregrm(0,7,0)) == modregrm(0,r1,0))
3010:                 {   c1->Irm = (rmn & modregrm(3,0,7)) | modregrm(0,r2,0);
3011:                     //printf("schedule 4\n");
3012:                 }
3013:                 break;
3014: 
3015:             case 0x89:                  // MOV EA,reg
3016:                 if ((rmn & modregrm(0,7,0)) == modregrm(0,r1,0))
3017:                 {   c1->Irm = (rmn & modregrm(3,0,7)) | modregrm(0,r2,0);
3018:                     //printf("schedule 5\n");
3019:                     if (c1->Irm == modregrm(3,r2,r2))
3020:                         goto Lnop;
3021:                 }
3022:                 break;
3023: 
3024:             case 0x8B:                  // MOV reg,EA
3025:                 if ((rmn & 0xC0) == 0xC0 &&
3026:                     (rmn & 7) == r1)            // if EA == r1
3027:                 {   c1->Irm = (rmn & modregrm(3,7,0)) | r2;
3028:                     //printf("schedule 6\n");
3029:                     if (c1->Irm == modregrm(3,r2,r2))
3030:                         goto Lnop;
3031:                 }
3032:                 break;
3033: 
3034:             case 0x3C:                  // CMP AL,imm8
3035:                 if (r1 == AX && r2 < 4)
3036:                 {   c1->Iop = 0x80;
3037:                     c1->Irm = modregrm(3,7,r2);
3038:                     //printf("schedule 7, r2 = %d\n", r2);
3039:                 }
3040:                 break;
3041: 
3042:             case 0x3D:                  // CMP AX,imm16
3043:                 if (r1 == AX)
3044:                 {   c1->Iop = 0x81;
3045:                     c1->Irm = modregrm(3,7,r2);
3046:                     if (c1->IFL2 == FLconst &&
3047:                         c1->IEV2.Vuns == (signed char)c1->IEV2.Vuns)
3048:                         c1->Iop = 0x83;
3049:                     //printf("schedule 8\n");
3050:                 }
3051:                 break;
3052:         }
3053:         continue;
3054: Lnop:
3055:         c1->Iop = NOP;
3056:         c1 = cnext(c1);
3057:         goto Ln;
3058:     }
3059: L1: ;
warning C4102: 'L1' : unreferenced label
3060:     return cstart;
3061: }
3062: 
3063: /*****************************************************************/
3064: 
3065: /**********************************************
3066:  * Replace complex instructions with simple ones more conducive
3067:  * to scheduling.
3068:  */
3069: 
3070: code *simpleops(code *c,regm_t scratch)
3071: {   code *cstart;
3072:     code **pc;
3073:     unsigned reg;
3074:     code *c2;
3075: 
3076:     // Worry about using registers not saved yet by prolog
3077:     scratch &= ~fregsaved;
3078: 
3079:     if (!(scratch & (scratch - 1)))     // if 0 or 1 registers
3080:         return c;
3081: 
3082:     reg = findreg(scratch);
3083: 
3084:     cstart = c;
3085:     for (pc = &cstart; *pc; pc = &code_next(*pc))
3086:     {
3087:         c = *pc;
3088:         if (c->Iflags & (CFtarg | CFtarg2 | CFopsize))
3089:             continue;
3090:         if (c->Iop == 0x83 &&
3091:             (c->Irm & modregrm(0,7,0)) == modregrm(0,7,0) &&
3092:             (c->Irm & modregrm(3,0,0)) != modregrm(3,0,0)
3093:            )
3094:         {   // Replace CMP mem,imm with:
3095:             //  MOV reg,mem
3096:             //  CMP reg,imm
3097:             targ_long imm;
3098: 
3099:             //printf("replacing CMP\n");
3100:             c->Iop = 0x8B;
3101:             c->Irm = (c->Irm & modregrm(3,0,7)) | modregrm(0,reg,0);
3102: 
3103:             c2 = code_calloc();
3104:             if (reg == AX)
3105:                 c2->Iop = 0x3D;
3106:             else
3107:             {   c2->Iop = 0x83;
3108:                 c2->Irm = modregrm(3,7,reg);
3109:             }
3110:             c2->IFL2 = c->IFL2;
3111:             c2->IEV2 = c->IEV2;
3112: 
3113:             // See if c2 should be replaced by a TEST
3114:             imm = c2->IEV2.Vuns;
3115:             if (!(c2->Iop & 1))
3116:                 imm &= 0xFF;
3117:             else if (I32 ? c->Iflags & CFopsize : !(c->Iflags & CFopsize))
3118:                 imm = (short) imm;
3119:             if (imm == 0)
3120:             {
3121:                 c2->Iop = 0x85;                 // TEST reg,reg
3122:                 c2->Irm = modregrm(3,reg,reg);
3123:             }
3124:             goto L1;
3125:         }
3126:         else if (c->Iop == 0xFF &&
3127:             (c->Irm & modregrm(0,7,0)) == modregrm(0,6,0) &&
3128:             (c->Irm & modregrm(3,0,0)) != modregrm(3,0,0)
3129:            )
3130:         {   // Replace PUSH mem with:
3131:             //  MOV reg,mem
3132:             //  PUSH reg
3133: 
3134:            // printf("replacing PUSH\n");
3135:             c->Iop = 0x8B;
3136:             c->Irm = (c->Irm & modregrm(3,0,7)) | modregrm(0,reg,0);
3137: 
3138:             c2 = gen1(NULL,0x50 + reg);
3139:         L1:
3140: //c->print();
3141: //c2->print();
3142:             c2->next = c->next;
3143:             c->next = c2;
3144: 
3145:             // Switch to another reg
3146:             if (scratch & ~mask[reg])
3147:                 reg = findreg(scratch & ~mask[reg]);
3148:         }
3149:     }
3150:     return cstart;
3151: }
3152: 
3153: #if DEBUG
3154: static const char *fpops[] = {"fstp","fld","fop"};
3155: void Cinfo::print()
3156: {
3157:     Cinfo *ci = this;
3158: 
3159:     if (ci == NULL)
3160:     {
3161:         printf("Cinfo 0\n");
3162:         return;
3163:     }
3164: 
3165:     printf("Cinfo %p:  c %p, pair %x, sz %d, isz %d, flags - ",
3166:            ci,c,pair,sz,isz);
3167:     if (ci->flags & CIFLarraybounds)
3168:         printf("arraybounds,");
3169:     if (ci->flags & CIFLea)
3170:         printf("ea,");
3171:     if (ci->flags & CIFLnostage)
3172:         printf("nostage,");
3173:     if (ci->flags & CIFLpush)
3174:         printf("push,");
3175:     if (ci->flags & ~(CIFLarraybounds|CIFLnostage|CIFLpush|CIFLea))
3176:         printf("bad flag,");
3177:     printf("\n\tr %lx w %lx a %lx reg %x uops %x sibmodrm %x spadjust %ld\n",
3178:             (long)r,(long)w,(long)a,reg,uops,sibmodrm,(long)spadjust);
3179:     if (ci->fp_op)
3180:         printf("\tfp_op %s, fxch_pre %x, fxch_post %x\n",
3181:                 fpops[fp_op-1],fxch_pre,fxch_post);
3182: }
3183: #endif
3184: #endif
3185: