1: // Copyright (C) 1995-1998 by Symantec
2: // Copyright (C) 2000-2009 by Digital Mars
3: // All Rights Reserved
4: // http://www.digitalmars.com
5: // Written by Walter Bright
6: /*
7: * This source file is made available for personal use
8: * only. The license is in /dmd/src/dmd/backendlicense.txt
9: * or /dm/src/dmd/backendlicense.txt
10: * For any other uses, please contact Digital Mars.
11: */
12:
13: #if !SPP
14:
15: #include <stdio.h>
16: #include <string.h>
17: #include <time.h>
18:
19: #include "cc.h"
20: #include "el.h"
21: #include "code.h"
22: #include "oper.h"
23: #include "global.h"
24: #include "type.h"
25: #include "exh.h"
26: #include "list.h"
27:
28: static char __file__[] = __FILE__; /* for tassert.h */
29: #include "tassert.h"
30:
31: // If we use Pentium Pro scheduler
32: #if 0
33: #define PRO (config.target_scheduler >= TARGET_PentiumPro)
34: #else
35: #define PRO (config.target_cpu >= TARGET_PentiumPro)
36: #endif
37:
38: // Struct where we gather information about an instruction
39: struct Cinfo
40: {
41: code *c; // the instruction
42: unsigned char pair; // pairing information
43: unsigned char sz; // operand size
44: unsigned char isz; // instruction size
45:
46: // For floating point scheduling
47: unsigned char fxch_pre;
48: unsigned char fxch_post;
49: unsigned char fp_op;
50: #define FPfstp 1 // FSTP mem
51: #define FPfld 2 // FLD mem
52: #define FPfop 3 // Fop ST0,mem or Fop ST0
53:
54: unsigned char flags;
55: #define CIFLarraybounds 1 // this instruction is a jmp to array bounds
56: #define CIFLea 2 // this instruction has a memory-referencing
57: // modregrm EA byte
58: #define CIFLnostage 4 // don't stage these instructions
59: #define CIFLpush 8 // it's a push we can swap around
60:
61: unsigned r; // read mask
62: unsigned w; // write mask
63: unsigned a; // registers used in addressing mode
64: unsigned char reg; // reg field of modregrm byte
65: unsigned char uops; // Pentium Pro micro-ops
66: unsigned sibmodrm; // (sib << 8) + mod__rm byte
67: unsigned spadjust; // if !=0, then amount ESP changes as a result of this
68: // instruction being executed
69: int fpuadjust; // if !=0, then amount FPU stack changes as a result
70: // of this instruction being executed
71: #if DEBUG
72: void print(); // pretty-printer
73: #endif
74: };
75:
76: code *simpleops(code *c,regm_t scratch);
77: code *schedule(code *c,regm_t scratch);
78: code *peephole(code *c,regm_t scratch);
79:
80: /*****************************************
81: * Do Pentium optimizations.
82: * Input:
83: * scratch scratch registers we can use
84: */
85:
86: void cgsched_pentium(code **pc,regm_t scratch)
87: {
88: //printf("scratch = x%02x\n",scratch);
89: if (config.target_scheduler >= TARGET_80486)
90: {
91: if (!I64)
92: *pc = peephole(*pc,0);
93: if (I32) // forget about 16 bit code
94: {
95: if (config.target_cpu == TARGET_Pentium ||
96: config.target_cpu == TARGET_PentiumMMX)
97: *pc = simpleops(*pc,scratch);
98: *pc = schedule(*pc,0);
99: }
100: }
101: }
102:
103: #define NP 0 // not pairable
104: #define PU 1 // pairable in U only, never executed in V
105: #define PV 2 // pairable in V only
106: #define UV (PU|PV) // pairable in both U and V
107: #define PE 4 // register contention exception
108: #define PF 8 // flags contention exception
109: #define FX 0x10 // pairable with FXCH instruction
110:
111: static unsigned char pentcycl[256] =
112: {
113: UV,UV,UV,UV, UV,UV,NP,NP, // 0
114: UV,UV,UV,UV, UV,UV,NP,NP, // 8
115: PU,PU,PU,PU, PU,PU,NP,NP, // 10
116: PU,PU,PU,PU, PU,PU,NP,NP, // 18
117: UV,UV,UV,UV, UV,UV,NP,NP, // 20
118: UV,UV,UV,UV, UV,UV,NP,NP, // 28
119: UV,UV,UV,UV, UV,UV,NP,NP, // 30
120: UV,UV,UV,UV, UV,UV,NP,NP, // 38
121:
122: UV,UV,UV,UV, UV,UV,UV,UV, // 40
123: UV,UV,UV,UV, UV,UV,UV,UV, // 48
124: PE|UV,PE|UV,PE|UV,PE|UV, PE|UV,PE|UV,PE|UV,PE|UV, // 50 PUSH reg
125: PE|UV,PE|UV,PE|UV,PE|UV, PE|UV,PE|UV,PE|UV,PE|UV, // 58 POP reg
126: NP,NP,NP,NP, NP,NP,NP,NP, // 60
127: PE|UV,NP,PE|UV,NP, NP,NP,NP,NP, // 68
128: PV|PF,PV|PF,PV|PF,PV|PF, PV|PF,PV|PF,PV|PF,PV|PF, // 70 Jcc rel8
129: PV|PF,PV|PF,PV|PF,PV|PF, PV|PF,PV|PF,PV|PF,PV|PF, // 78 Jcc rel8
130:
131: NP,NP,NP,NP, NP,NP,NP,NP, // 80
132: UV,UV,UV,UV, NP,UV,NP,NP, // 88
133: NP,NP,NP,NP, NP,NP,NP,NP, // 90
134: NP,NP,NP,NP, NP,NP,NP,NP, // 98
135: UV,UV,UV,UV, NP,NP,NP,NP, // A0
136: UV,UV,NP,NP, NP,NP,NP,NP, // A8
137: UV,UV,UV,UV, UV,UV,UV,UV, // B0
138: UV,UV,UV,UV, UV,UV,UV,UV, // B8
139:
140: NP,NP,NP,NP, NP,NP,NP,NP, // C0
141: NP,NP,NP,NP, NP,NP,NP,NP, // C8
142: PU,PU,NP,NP, NP,NP,NP,NP, // D0
143: FX,NP,FX,FX, NP,NP,FX,NP, // D8 all floating point
144: NP,NP,NP,NP, NP,NP,NP,NP, // E0
145: PE|PV,PV,NP,PV, NP,NP,NP,NP, // E8
146: NP,NP,NP,NP, NP,NP,NP,NP, // F0
147: NP,NP,NP,NP, NP,NP,NP,NP, // F8
148: };
149:
150: /********************************************
151: * For each opcode, determine read [0] and written [1] masks.
152: */
153:
154: #define EA 0x100000
155: #define R 0x200000 // register (reg of modregrm field)
156: #define N 0x400000 // other things modified, not swappable
157: #define B 0x800000 // it's a byte operation
158: #define C 0x1000000 // floating point flags
159: #define mMEM 0x2000000 // memory
160: #define S 0x4000000 // floating point stack
161: #define F 0x8000000 // flags
162:
163: static unsigned oprw[256][2] =
164: {
165: // 00
166: EA|R|B, F|EA|B, // ADD
167: EA|R, F|EA,
168: EA|R|B, F|R|B,
169: EA|R, F|R,
170: mAX, F|mAX,
171: mAX, F|mAX,
172: N, N, // PUSH ES
173: N, N, // POP ES
174:
175: // 08
176: EA|R|B, F|EA|B, // OR
177: EA|R, F|EA,
178: EA|R|B, F|R|B,
179: EA|R, F|R,
180: mAX, F|mAX,
181: mAX, F|mAX,
182: N, N, // PUSH CS
183: N, N, // 2 byte escape
184:
185: // 10
186: F|EA|R|B,F|EA|B, // ADC
187: F|EA|R, F|EA,
188: F|EA|R|B,F|R|B,
189: F|EA|R, F|R,
190: F|mAX, F|mAX,
191: F|mAX, F|mAX,
192: N, N, // PUSH SS
193: N, N, // POP SS
194:
195: // 18
196: F|EA|R|B,F|EA|B, // SBB
197: F|EA|R, F|EA,
198: F|EA|R|B,F|R|B,
199: F|EA|R, F|R,
200: F|mAX, F|mAX,
201: F|mAX, F|mAX,
202: N, N, // PUSH DS
203: N, N, // POP DS
204:
205: // 20
206: EA|R|B, F|EA|B, // AND
207: EA|R, F|EA,
208: EA|R|B, F|R|B,
209: EA|R, F|R,
210: mAX, F|mAX,
211: mAX, F|mAX,
212: N, N, // SEG ES
213: F|mAX, F|mAX, // DAA
214:
215: // 28
216: EA|R|B, F|EA|B, // SUB
217: EA|R, F|EA,
218: EA|R|B, F|R|B,
219: EA|R, F|R,
220: mAX, F|mAX,
221: mAX, F|mAX,
222: N, N, // SEG CS
223: F|mAX, F|mAX, // DAS
224:
225: // 30
226: EA|R|B, F|EA|B, // XOR
227: EA|R, F|EA,
228: EA|R|B, F|R|B,
229: EA|R, F|R,
230: mAX, F|mAX,
231: mAX, F|mAX,
232: N, N, // SEG SS
233: F|mAX, F|mAX, // AAA
234:
235: // 38
236: EA|R|B, F, // CMP
237: EA|R, F,
238: EA|R|B, F,
239: EA|R, F,
240: mAX, F, // CMP AL,imm8
241: mAX, F, // CMP EAX,imm16/32
242: N, N, // SEG DS
243: N, N, // AAS
244:
245: // 40
246: mAX, F|mAX, // INC EAX
247: mCX, F|mCX,
248: mDX, F|mDX,
249: mBX, F|mBX,
250: mSP, F|mSP,
251: mBP, F|mBP,
252: mSI, F|mSI,
253: mDI, F|mDI,
254:
255: // 48
256: mAX, F|mAX, // DEC EAX
257: mCX, F|mCX,
258: mDX, F|mDX,
259: mBX, F|mBX,
260: mSP, F|mSP,
261: mBP, F|mBP,
262: mSI, F|mSI,
263: mDI, F|mDI,
264:
265: // 50
266: mAX|mSP, mSP|mMEM, // PUSH EAX
267: mCX|mSP, mSP|mMEM,
268: mDX|mSP, mSP|mMEM,
269: mBX|mSP, mSP|mMEM,
270: mSP|mSP, mSP|mMEM,
271: mBP|mSP, mSP|mMEM,
272: mSI|mSP, mSP|mMEM,
273: mDI|mSP, mSP|mMEM,
274:
275: // 58
276: mSP|mMEM, mAX|mSP, // POP EAX
277: mSP|mMEM, mCX|mSP,
278: mSP|mMEM, mDX|mSP,
279: mSP|mMEM, mBX|mSP,
280: mSP|mMEM, mSP|mSP,
281: mSP|mMEM, mBP|mSP,
282: mSP|mMEM, mSI|mSP,
283: mSP|mMEM, mDI|mSP,
284:
285: // 60
286: N, N, // PUSHA
287: N, N, // POPA
288: N, N, // BOUND Gv,Ma
289: N, N, // ARPL Ew,Rw
290: N, N, // SEG FS
291: N, N, // SEG GS
292: N, N, // operand size prefix
293: N, N, // address size prefix
294:
295: // 68
296: mSP, mSP|mMEM, // PUSH immed16/32
297: EA, F|R, // IMUL Gv,Ev,lv
298: mSP, mSP|mMEM, // PUSH immed8
299: EA, F|R, // IMUL Gv,Ev,lb
300: N, N, // INSB Yb,DX
301: N, N, // INSW/D Yv,DX
302: N, N, // OUTSB DX,Xb
303: N, N, // OUTSW/D DX,Xv
304:
305: // 70
306: F|N, N,
307: F|N, N,
308: F|N, N,
309: F|N, N,
310: F|N, N,
311: F|N, N,
312: F|N, N,
313: F|N, N,
314:
315: // 78
316: F|N, N,
317: F|N, N,
318: F|N, N,
319: F|N, N,
320: F|N, N,
321: F|N, N,
322: F|N, N,
323: F|N, N,
324:
325: // 80
326: N, N,
327: N, N,
328: N, N,
329: N, N,
330: EA|R, F, // TEST EA,r8
331: EA|R, F, // TEST EA,r16/32
332: EA|R, EA|R, // XCHG EA,r8
333: EA|R, EA|R, // XCHG EA,r16/32
334:
335: // 88
336: R|B, EA|B, // MOV EA8,r8
337: R, EA, // MOV EA,r16/32
338: EA|B, R|B, // MOV r8,EA8
339: EA, R, // MOV r16/32,EA
340: N, N, // MOV EA,segreg
341: EA, R, // LEA r16/32,EA
342: N, N, // MOV segreg,EA
343: mSP|mMEM, EA|mSP, // POP mem16/32
344:
345: // 90
346: 0, 0, // NOP
347: mAX|mCX, mAX|mCX,
348: mAX|mDX, mAX|mDX,
349: mAX|mBX, mAX|mBX,
350: mAX|mSP, mAX|mSP,
351: mAX|mBP, mAX|mBP,
352: mAX|mSI, mAX|mSI,
353: mAX|mDI, mAX|mDI,
354:
355: // 98
356: mAX, mAX, // CBW
357: mAX, mDX, // CWD
358: N, N|F, // CALL far ptr
359: N, N, // WAIT
360: F|mSP, mSP|mMEM, // PUSHF
361: mSP|mMEM, F|mSP, // POPF
362: mAX, F, // SAHF
363: F, mAX, // LAHF
364:
365: // A0
366: mMEM, mAX, // MOV AL,moffs8
367: mMEM, mAX, // MOV EAX,moffs32
368: mAX, mMEM, // MOV moffs8,AL
369: mAX, mMEM, // MOV moffs32,EAX
370: N, N, // MOVSB
371: N, N, // MOVSW/D
372: N, N, // CMPSB
373: N, N, // CMPSW/D
374:
375: // A8
376: mAX, F, // TEST AL,imm8
377: mAX, F, // TEST AX,imm16
378: N, N, // STOSB
379: N, N, // STOSW/D
380: N, N, // LODSB
381: N, N, // LODSW/D
382: N, N, // SCASB
383: N, N, // SCASW/D
384:
385: // B0
386: 0, mAX, // MOV AL,imm8
387: 0, mCX,
388: 0, mDX,
389: 0, mBX,
390: 0, mAX,
391: 0, mCX,
392: 0, mDX,
393: 0, mBX,
394:
395: // B8
396: 0, mAX, // MOV AX,imm16
397: 0, mCX,
398: 0, mDX,
399: 0, mBX,
400: 0, mSP,
401: 0, mBP,
402: 0, mSI,
403: 0, mDI,
404:
405: // C0
406: EA, F|EA, // Shift Eb,Ib
407: EA, F|EA,
408: N, N,
409: N, N,
410: N, N,
411: N, N,
412: 0, EA|B, // MOV EA8,imm8
413: 0, EA, // MOV EA,imm16
414:
415: // C8
416: N, N, // ENTER
417: N, N, // LEAVE
418: N, N, // RETF lw
419: N, N, // RETF
420: N, N, // INT 3
421: N, N, // INT lb
422: N, N, // INTO
423: N, N, // IRET
424:
425: // D0
426: EA, F|EA, // Shift EA,1
427: EA, F|EA,
428: EA|mCX, F|EA, // Shift EA,CL
429: EA|mCX, F|EA,
430: mAX, F|mAX, // AAM
431: mAX, F|mAX, // AAD
432: N, N, // reserved
433: mAX|mBX|mMEM, mAX, // XLAT
434:
435: // D8
436: N, N,
437: N, N,
438: N, N,
439: N, N,
440: N, N,
441: N, N,
442: N, N,
443: N, N,
444:
445: // E0
446: F|mCX|N,mCX|N, // LOOPNE jb
447: F|mCX|N,mCX|N, // LOOPE jb
448: mCX|N, mCX|N, // LOOP jb
449: mCX|N, N, // JCXZ jb
450: N, N, // IN AL,lb
451: N, N, // IN EAX,lb
452: N, N, // OUT lb,AL
453: N, N, // OUT lb,EAX
454:
455: // E8
456: N, N|F, // CALL jv
457: N, N, // JMP Jv
458: N, N, // JMP Ab
459: N, N, // JMP jb
460: N|mDX, N|mAX, // IN AL,DX
461: N|mDX, N|mAX, // IN AX,DX
462: N|mAX|mDX,N, // OUT DX,AL
463: N|mAX|mDX,N, // OUT DX,AX
464:
465: // F0
466: N, N, // LOCK
467: N, N, // reserved
468: N, N, // REPNE
469: N, N, // REP,REPE
470: N, N, // HLT
471: F, F, // CMC
472: N, N,
473: N, N,
474:
475: // F8
476: 0, F, // CLC
477: 0, F, // STC
478: N, N, // CLI
479: N, N, // STI
480: N, N, // CLD
481: N, N, // STD
482: EA, F|EA, // INC/DEC
483: N, N,
484: };
485:
486: /****************************************
487: * Same thing, but for groups.
488: */
489:
490: static unsigned grprw[8][8][2] =
491: {
492: // Grp 1
493: EA, F|EA, // ADD
494: EA, F|EA, // OR
495: F|EA, F|EA, // ADC
496: F|EA, F|EA, // SBB
497: EA, F|EA, // AND
498: EA, F|EA, // SUB
499: EA, F|EA, // XOR
500: EA, F, // CMP
501:
502: // Grp 3
503: EA, F, // TEST EA,imm
504: N, N, // reserved
505: EA, EA, // NOT
506: EA, F|EA, // NEG
507: mAX|EA, F|mAX|mDX, // MUL
508: mAX|EA, F|mAX|mDX, // IMUL
509: mAX|mDX|EA, F|mAX|mDX, // DIV
510: #if 0
511: // Could generate an exception we want to catch
512: mAX|mDX|EA|N, F|mAX|mDX|N, // IDIV
513: #else
514: mAX|mDX|EA, F|mAX|mDX, // IDIV
515: #endif
516:
517: // Grp 5
518: EA, F|EA, // INC Ev
519: EA, F|EA, // DEC Ev
520: N|EA, N, // CALL Ev
521: N|EA, N, // CALL eP
522: N|EA, N, // JMP Ev
523: N|EA, N, // JMP Ep
524: mSP|EA, mSP|mMEM, // PUSH Ev
525: N, N, // reserved
526:
527: // Grp 3, byte version
528: EA|B, F, // TEST EA,imm
529: N, N, // reserved
530: EA|B, EA|B, // NOT
531: EA|B, F|EA|B, // NEG
532: mAX|EA, F|mAX, // MUL
533: mAX|EA, F|mAX, // IMUL
534: mAX|EA, F|mAX, // DIV
535: #if 0
536: // Could generate an exception we want to catch
537: mAX|EA|N, F|mAX|N, // IDIV
538: #else
539: mAX|EA, F|mAX, // IDIV
540: #endif
541:
542: };
543:
544: /********************************************
545: * For floating point opcodes 0xD8..0xDF, with Irm < 0xC0.
546: * [][][0] = read
547: * [1] = write
548: */
549:
550: static unsigned grpf1[8][8][2] =
551: {
552: // 0xD8
553: EA|S, S|C, // FADD float
554: EA|S, S|C, // FMUL float
555: EA|S, C, // FCOM float
556: EA|S, S|C, // FCOMP float
557: EA|S, S|C, // FSUB float
558: EA|S, S|C, // FSUBR float
559: EA|S, S|C, // FDIV float
560: EA|S, S|C, // FDIVR float
561:
562: // 0xD9
563: EA, S|C, // FLD float
564: N, N, //
565: S, EA|C, // FST float
566: S, EA|S|C, // FSTP float
567: N, N, // FLDENV
568: N, N, // FLDCW
569: N, N, // FSTENV
570: N, N, // FSTCW
571:
572: // 0xDA
573: EA|S, S|C, // FIADD long
574: EA|S, S|C, // FIMUL long
575: EA|S, C, // FICOM long
576: EA|S, S|C, // FICOMP long
577: EA|S, S|C, // FISUB long
578: EA|S, S|C, // FISUBR long
579: EA|S, S|C, // FIDIV long
580: EA|S, S|C, // FIDIVR long
581:
582: // 0xDB
583: EA, S|C, // FILD long
584: S, EA|S|C, // FISTTP int
585: S, EA|C, // FIST long
586: S, EA|S|C, // FISTP long
587: N, N, //
588: EA, S|C, // FLD real80
589: N, N, //
590: S, EA|S|C, // FSTP real80
591:
592: // 0xDC
593: EA|S, S|C, // FADD double
594: EA|S, S|C, // FMUL double
595: EA|S, C, // FCOM double
596: EA|S, S|C, // FCOMP double
597: EA|S, S|C, // FSUB double
598: EA|S, S|C, // FSUBR double
599: EA|S, S|C, // FDIV double
600: EA|S, S|C, // FDIVR double
601:
602: // 0xDD
603: EA, S|C, // FLD double
604: S, EA|S|C, // FISTTP long
605: S, EA|C, // FST double
606: S, EA|S|C, // FSTP double
607: N, N, // FRSTOR
608: N, N, //
609: N, N, // FSAVE
610: C, EA, // FSTSW
611:
612: // 0xDE
613: EA|S, S|C, // FIADD short
614: EA|S, S|C, // FIMUL short
615: EA|S, C, // FICOM short
616: EA|S, S|C, // FICOMP short
617: EA|S, S|C, // FISUB short
618: EA|S, S|C, // FISUBR short
619: EA|S, S|C, // FIDIV short
620: EA|S, S|C, // FIDIVR short
621:
622: // 0xDF
623: EA, S|C, // FILD short
624: S, EA|S|C, // FISTTP short
625: S, EA|C, // FIST short
626: S, EA|S|C, // FISTP short
627: EA, S|C, // FBLD packed BCD
628: EA, S|C, // FILD long long
629: S, EA|S|C, // FBSTP packed BCD
630: S, EA|S|C, // FISTP long long
631: };
632:
633:
634: /********************************************
635: * Micro-ops for floating point opcodes 0xD8..0xDF, with Irm < 0xC0.
636: */
637:
638: static unsigned char uopsgrpf1[8][8] =
639: {
640: // 0xD8
641: 2, // FADD float
642: 2, // FMUL float
643: 2, // FCOM float
644: 2, // FCOMP float
645: 2, // FSUB float
646: 2, // FSUBR float
647: 2, // FDIV float
648: 2, // FDIVR float
649:
650: // 0xD9
651: 1, // FLD float
652: 0, //
653: 2, // FST float
654: 2, // FSTP float
655: 5, // FLDENV
656: 3, // FLDCW
657: 5, // FSTENV
658: 5, // FSTCW
659:
660: // 0xDA
661: 5, // FIADD long
662: 5, // FIMUL long
663: 5, // FICOM long
664: 5, // FICOMP long
665: 5, // FISUB long
666: 5, // FISUBR long
667: 5, // FIDIV long
668: 5, // FIDIVR long
669:
670: // 0xDB
671: 4, // FILD long
672: 0, //
673: 4, // FIST long
674: 4, // FISTP long
675: 0, //
676: 4, // FLD real80
677: 0, //
678: 5, // FSTP real80
679:
680: // 0xDC
681: 2, // FADD double
682: 2, // FMUL double
683: 2, // FCOM double
684: 2, // FCOMP double
685: 2, // FSUB double
686: 2, // FSUBR double
687: 2, // FDIV double
688: 2, // FDIVR double
689:
690: // 0xDD
691: 1, // FLD double
692: 0, //
693: 2, // FST double
694: 2, // FSTP double
695: 5, // FRSTOR
696: 0, //
697: 5, // FSAVE
698: 5, // FSTSW
699:
700: // 0xDE
701: 5, // FIADD short
702: 5, // FIMUL short
703: 5, // FICOM short
704: 5, // FICOMP short
705: 5, // FISUB short
706: 5, // FISUBR short
707: 5, // FIDIV short
708: 5, // FIDIVR short
709:
710: // 0xDF
711: 4, // FILD short
712: 0, //
713: 4, // FIST short
714: 4, // FISTP short
715: 5, // FBLD packed BCD
716: 4, // FILD long long
717: 5, // FBSTP packed BCD
718: 4, // FISTP long long
719: };
720:
721: /**************************************************
722: * Determine number of micro-ops for Pentium Pro and Pentium II processors.
723: * 0 means special case,
724: * 5 means 'complex'
725: */
726:
727: static const unsigned char insuops[256] =
728: { 0,0,0,0, 1,1,4,5, /* 00 */
729: 0,0,0,0, 1,1,4,0, /* 08 */
730: 0,0,0,0, 2,2,4,5, /* 10 */
731: 0,0,0,0, 2,2,4,5, /* 18 */
732: 0,0,0,0, 1,1,0,1, /* 20 */
733: 0,0,0,0, 1,1,0,1, /* 28 */
734: 0,0,0,0, 1,1,0,1, /* 30 */
735: 0,0,0,0, 1,1,0,1, /* 38 */
736: 1,1,1,1, 1,1,1,1, /* 40 */
737: 1,1,1,1, 1,1,1,1, /* 48 */
738: 3,3,3,3, 3,3,3,3, /* 50 */
739: 2,2,2,2, 3,2,2,2, /* 58 */
740: 5,5,5,5, 0,0,0,0, /* 60 */
741: 3,3,0,0, 5,5,5,5, /* 68 */
742: 1,1,1,1, 1,1,1,1, /* 70 */
743: 1,1,1,1, 1,1,1,1, /* 78 */
744: 0,0,0,0, 0,0,0,0, /* 80 */
745: 0,0,0,0, 0,1,4,0, /* 88 */
746: 1,3,3,3, 3,3,3,3, /* 90 */
747: 1,1,5,0, 5,5,1,1, /* 98 */
748: 1,1,2,2, 5,5,5,5, /* A0 */
749: 1,1,3,3, 2,2,3,3, /* A8 */
750: 1,1,1,1, 1,1,1,1, /* B0 */
751: 1,1,1,1, 1,1,1,1, /* B8 */
752: 0,0,5,4, 0,0,0,0, /* C0 */
753: 5,3,5,5, 5,3,5,5, /* C8 */
754: 0,0,0,0, 4,3,0,2, /* D0 */
755: 0,0,0,0, 0,0,0,0, /* D8 */
756: 4,4,4,2, 5,5,5,5, /* E0 */
757: 4,1,5,1, 5,5,5,5, /* E8 */
758: 0,0,5,5, 5,1,0,0, /* F0 */
759: 1,1,5,5, 4,4,0,0, /* F8 */
760: };
761:
762: static unsigned char uopsx[8] = { 1,1,2,5,1,1,1,5 };
763:
764: /************************************************
765: * Determine number of micro-ops for Pentium Pro and Pentium II processors.
766: * 5 means 'complex'.
767: * Doesn't currently handle:
768: * floating point
769: * MMX
770: * 0F opcodes
771: * prefix bytes
772: */
773:
774: STATIC int uops(code *c)
775: { int n;
776: int op;
777: int op2;
778:
779: op = c->Iop & 0xFF;
780: if ((c->Iop & 0xFF00) == 0x0F00)
781: op = 0x0F;
782: n = insuops[op];
783: if (!n) // if special case
784: { unsigned char irm,mod,reg,rm;
785:
786: irm = c->Irm;
787: mod = (irm >> 6) & 3;
788: reg = (irm >> 3) & 7;
789: rm = irm & 7;
790:
791: switch (op)
792: {
793: case 0x10:
794: case 0x11: // ADC rm,r
795: case 0x18:
796: case 0x19: // SBB rm,r
797: n = (mod == 3) ? 2 : 4;
798: break;
799:
800: case 0x12:
801: case 0x13: // ADC r,rm
802: case 0x1A:
803: case 0x1B: // SBB r,rm
804: n = (mod == 3) ? 2 : 3;
805: break;
806:
807: case 0x00:
808: case 0x01: // ADD rm,r
809: case 0x08:
810: case 0x09: // OR rm,r
811: case 0x20:
812: case 0x21: // AND rm,r
813: case 0x28:
814: case 0x29: // SUB rm,r
815: case 0x30:
816: case 0x31: // XOR rm,r
817: n = (mod == 3) ? 1 : 4;
818: break;
819:
820: case 0x02:
821: case 0x03: // ADD r,rm
822: case 0x0A:
823: case 0x0B: // OR r,rm
824: case 0x22:
825: case 0x23: // AND r,rm
826: case 0x2A:
827: case 0x2B: // SUB r,rm
828: case 0x32:
829: case 0x33: // XOR r,rm
830: case 0x38:
831: case 0x39: // CMP rm,r
832: case 0x3A:
833: case 0x3B: // CMP r,rm
834: case 0x69: // IMUL rm,r,imm
835: case 0x6B: // IMUL rm,r,imm8
836: case 0x84:
837: case 0x85: // TEST rm,r
838: n = (mod == 3) ? 1 : 2;
839: break;
840:
841: case 0x80:
842: case 0x81:
843: case 0x82:
844: case 0x83:
845: if (reg == 2 || reg == 3) // ADC/SBB rm,imm
846: n = (mod == 3) ? 2 : 4;
847: else if (reg == 7) // CMP rm,imm
848: n = (mod == 3) ? 1 : 2;
849: else
850: n = (mod == 3) ? 1 : 4;
851: break;
852:
853: case 0x86:
854: case 0x87: // XCHG rm,r
855: n = (mod == 3) ? 3 : 5;
856: break;
857:
858: case 0x88:
859: case 0x89: // MOV rm,r
860: n = (mod == 3) ? 1 : 2;
861: break;
862:
863: case 0x8A:
864: case 0x8B: // MOV r,rm
865: n = 1;
866: break;
867:
868: case 0x8C: // MOV Sreg,rm
869: n = (mod == 3) ? 1 : 3;
870: break;
871:
872: case 0x8F:
873: if (reg == 0) // POP m
874: n = 5;
875: break;
876:
877: case 0xC6:
878: case 0xC7:
879: if (reg == 0) // MOV rm,imm
880: n = (mod == 3) ? 1 : 2;
881: break;
882:
883: case 0xD0:
884: case 0xD1:
885: if (reg == 2 || reg == 3) // RCL/RCR rm,1
886: n = (mod == 3) ? 2 : 4;
887: else
888: n = (mod == 3) ? 1 : 4;
889: break;
890:
891: case 0xC0:
892: case 0xC1: // RCL/RCR rm,imm8
893: case 0xD2:
894: case 0xD3:
895: if (reg == 2 || reg == 3) // RCL/RCR rm,CL
896: n = 5;
897: else
898: n = (mod == 3) ? 1 : 4;
899: break;
900:
901: case 0xD8:
902: case 0xD9:
903: case 0xDA:
904: case 0xDB:
905: case 0xDC:
906: case 0xDD:
907: case 0xDE:
908: case 0xDF:
909: // Floating point opcodes
910: if (irm < 0xC0)
911: { n = uopsgrpf1[op - 0xD8][reg];
912: break;
913: }
914: n = uopsx[op - 0xD8];
915: switch (op)
916: {
917: case 0xD9:
918: switch (irm)
919: {
920: case 0xE0: // FCHS
921: n = 3;
922: break;
923: case 0xE8:
924: case 0xE9:
925: case 0xEA:
926: case 0xEB:
927: case 0xEC:
928: case 0xED:
929: n = 2;
930: break;
931: case 0xF0:
932: case 0xF1:
933: case 0xF2:
934: case 0xF3:
935: case 0xF4:
936: case 0xF5:
937: case 0xF8:
938: case 0xF9:
939: case 0xFB:
940: case 0xFC:
941: case 0xFD:
942: case 0xFE:
943: case 0xFF:
944: n = 5;
945: break;
946: }
947: break;
948: case 0xDE:
949: if (irm == 0xD9) // FCOMPP
950: n = 2;
951: break;
952: }
953: break;
954:
955: case 0xF6:
956: if (reg == 6 || reg == 7) // DIV AL,rm8
957: n = (mod == 3) ? 3 : 4;
958: else if (reg == 4 || reg == 5 || reg == 0) // MUL/IMUL/TEST rm8
959: n = (mod == 3) ? 1 : 2;
960: else if (reg == 2 || reg == 3) // NOT/NEG rm
961: n = (mod == 3) ? 1 : 4;
962: break;
963:
964: case 0xF7:
965: if (reg == 6 || reg == 7) // DIV EAX,rm
966: n = 4;
967: else if (reg == 4 || reg == 5) // MUL/IMUL rm
968: n = (mod == 3) ? 3 : 4;
969: else if (reg == 2 || reg == 3) // NOT/NEG rm
970: n = (mod == 3) ? 1 : 4;
971: break;
972:
973: case 0xFF:
974: if (reg == 2 || reg == 3 || // CALL rm, CALL m,rm
975: reg == 5) // JMP seg:offset
976: n = 5;
977: else if (reg == 4)
978: n = (mod == 3) ? 1 : 2;
979: else if (reg == 0 || reg == 1) // INC/DEC rm
980: n = (mod == 3) ? 1 : 4;
981: else if (reg == 6) // PUSH rm
982: n = (mod == 3) ? 3 : 4;
983: break;
984:
985: case 0x0F:
986: op2 = c->Iop & 0xFF;
987: if ((op2 & 0xF0) == 0x80) // Jcc
988: { n = 1;
989: break;
990: }
991: if ((op2 & 0xF0) == 0x90) // SETcc
992: { n = (mod == 3) ? 1 : 3;
993: break;
994: }
995: if (op2 == 0xB6 || op2 == 0xB7 || // MOVZX
996: op2 == 0xBE || op2 == 0xBF) // MOVSX
997: { n = 1;
998: break;
999: }
1000: if (op2 == 0xAF) // IMUL r,m
1001: { n = (mod == 3) ? 1 : 2;
1002: break;
1003: }
1004: break;
1005: }
1006: }
1007: if (n == 0)
1008: n = 5; // copout for now
1009: return n;
1010: }
1011:
1012: /******************************************
1013: * Determine pairing classification.
1014: * Don't deal with floating point, just assume they are all NP (Not Pairable).
1015: * Returns:
1016: * NP,UV,PU,PV optionally OR'd with PE
1017: */
1018:
1019: STATIC int pair_class(code *c)
1020: { unsigned char op;
1021: unsigned char irm,mod,reg,rm;
1022: unsigned a32;
1023: int pc;
1024:
1025: // Of course, with Intel this is *never* simple, and Intel's
1026: // documentation is vague about the specifics.
1027:
1028: op = c->Iop & 0xFF;
1029: if ((c->Iop & 0xFF00) == 0x0F00)
1030: op = 0x0F;
1031: pc = pentcycl[op];
1032: a32 = I32;
1033: if (c->Iflags & CFaddrsize)
1034: a32 ^= 1;
1035: irm = c->Irm;
1036: mod = (irm >> 6) & 3;
1037: reg = (irm >> 3) & 7;
1038: rm = irm & 7;
1039: switch (op)
1040: {
1041: case 0x0F: // 2 byte opcode
1042: if ((c->Iop & 0xF0) == 0x80) // if Jcc
1043: pc = PV | PF;
1044: break;
1045:
1046: case 0x80:
1047: case 0x81:
1048: case 0x83:
1049: if (reg == 2 || // ADC EA,immed
1050: reg == 3) // SBB EA,immed
1051: { pc = PU;
1052: goto L2;
1053: }
1054: goto L1; // AND/OR/XOR/ADD/SUB/CMP EA,immed
1055:
1056: case 0x84:
1057: case 0x85: // TEST EA,reg
1058: if (mod == 3) // TEST reg,reg
1059: pc = UV;
1060: break;
1061:
1062: case 0xC0:
1063: case 0xC1:
1064: if (reg >= 4)
1065: pc = PU;
1066: break;
1067:
1068: case 0xC6:
1069: case 0xC7:
1070: if (reg == 0) // MOV EA,immed
1071: {
1072: L1:
1073: pc = UV;
1074: L2:
1075: // if EA contains a displacement then
1076: // can't execute in V, or pair in U
1077: switch (mod)
1078: { case 0:
1079: if (a32)
1080: { if (rm == 5 ||
1081: (rm == 4 && (c->Isib & 7) == 5)
1082: )
1083: pc = NP;
1084: }
1085: else if (rm == 6)
1086: pc = NP;
1087: break;
1088: case 1:
1089: case 2:
1090: pc = NP;
1091: break;
1092: }
1093: }
1094: break;
1095:
1096: case 0xD9:
1097: if (irm < 0xC0)
1098: {
1099: if (reg == 0)
1100: pc = FX;
1101: }
1102: else if (irm < 0xC8)
1103: pc = FX;
1104: else if (irm < 0xD0)
1105: pc = PV;
1106: else
1107: {
1108: switch (irm)
1109: {
1110: case 0xE0:
1111: case 0xE1:
1112: case 0xE4:
1113: pc = FX;
1114: break;
1115: }
1116: }
1117: break;
1118:
1119: case 0xDB:
1120: if (irm < 0xC0 && (reg == 0 || reg == 5))
1121: pc = FX;
1122: break;
1123:
1124: case 0xDD:
1125: if (irm < 0xC0)
1126: {
1127: if (reg == 0)
1128: pc = FX;
1129: }
1130: else if (irm >= 0xE0 && irm < 0xF0)
1131: pc = FX;
1132: break;
1133:
1134: case 0xDF:
1135: if (irm < 0xC0 && (reg == 0 || reg == 5))
1136: pc = FX;
1137: break;
1138:
1139: case 0xFE:
1140: if (reg == 0 || reg == 1) // INC/DEC EA
1141: pc = UV;
1142: break;
1143: case 0xFF:
1144: if (reg == 0 || reg == 1) // INC/DEC EA
1145: pc = UV;
1146: else if (reg == 2 || reg == 4) // CALL/JMP near ptr EA
1147: pc = PE|PV;
1148: else if (reg == 6 && mod == 3) // PUSH reg
1149: pc = PE | UV;
1150: break;
1151: }
1152: if (c->Iflags & CFPREFIX && pc == UV) // if prefix byte
1153: pc = PU;
1154: return pc;
1155: }
1156:
1157: /******************************************
1158: * For an instruction, determine what is read
1159: * and what is written, and what is used for addressing.
1160: * Determine operand size if EA (larger is ok).
1161: */
1162:
1163: STATIC void getinfo(Cinfo *ci,code *c)
1164: {
1165: memset(ci,0,sizeof(Cinfo));
1166: if (!c)
1167: return;
1168: ci->c = c;
1169:
1170: if (PRO)
1171: {
1172: ci->uops = uops(c);
1173: ci->isz = calccodsize(c);
1174: }
1175: else
1176: ci->pair = pair_class(c);
1177:
1178: unsigned char op;
1179: unsigned char op2;
1180: unsigned char irm,mod,reg,rm;
1181: unsigned a32;
1182: int pc;
1183: unsigned r,w;
1184: int sz = I32 ? 4 : 2;
1185:
1186: ci->r = 0;
1187: ci->w = 0;
1188: ci->a = 0;
1189: op = c->Iop & 0xFF;
1190: if ((c->Iop & 0xFF00) == 0x0F00)
1191: op = 0x0F;
1192: //printf("\tgetinfo %x, op %x \n",c,op);
1193: pc = pentcycl[op];
1194: a32 = I32;
1195: if (c->Iflags & CFaddrsize)
1196: a32 ^= 1;
1197: if (c->Iflags & CFopsize)
1198: sz ^= 2 | 4;
1199: irm = c->Irm;
1200: mod = (irm >> 6) & 3;
1201: reg = (irm >> 3) & 7;
1202: rm = irm & 7;
1203:
1204: r = oprw[op][0];
1205: w = oprw[op][1];
1206:
1207: switch (op)
1208: {
1209: case 0x50:
1210: case 0x51:
1211: case 0x52:
1212: case 0x53:
1213: case 0x55:
1214: case 0x56:
1215: case 0x57: // PUSH reg
1216: ci->flags |= CIFLpush;
1217: case 0x54: // PUSH ESP
1218: case 0x6A: // PUSH imm8
1219: case 0x68: // PUSH imm
1220: case 0x0E:
1221: case 0x16:
1222: case 0x1E:
1223: case 0x06:
1224: case 0x9C:
1225: Lpush:
1226: ci->spadjust = -sz;
1227: ci->a |= mSP;
1228: break;
1229:
1230: case 0x58:
1231: case 0x59:
1232: case 0x5A:
1233: case 0x5B:
1234: case 0x5C:
1235: case 0x5D:
1236: case 0x5E:
1237: case 0x5F: // POP reg
1238: case 0x1F:
1239: case 0x07:
1240: case 0x17:
1241: case 0x9D: // POPF
1242: Lpop:
1243: ci->spadjust = sz;
1244: ci->a |= mSP;
1245: break;
1246:
1247: case 0x80:
1248: if (reg == 7) // CMP
1249: c->Iflags |= CFpsw;
1250: r = B | grprw[0][reg][0]; // Grp 1 (byte)
1251: w = B | grprw[0][reg][1];
1252: break;
1253:
1254: case 0x81:
1255: case 0x83:
1256: if (reg == 7) // CMP
1257: c->Iflags |= CFpsw;
1258: else if (irm == modregrm(3,0,SP)) // ADD ESP,imm
1259: {
1260: assert(c->IFL2 == FLconst);
1261: ci->spadjust = (op == 0x81) ? c->IEV2.Vint : (signed char)c->IEV2.Vint;
1262: }
1263: else if (irm == modregrm(3,5,SP)) // SUB ESP,imm
1264: {
1265: assert(c->IFL2 == FLconst);
1266: ci->spadjust = (op == 0x81) ? -c->IEV2.Vint : -(signed char)c->IEV2.Vint;
1267: }
1268: r = grprw[0][reg][0]; // Grp 1
1269: w = grprw[0][reg][1];
1270: break;
1271:
1272: case 0x8F:
1273: if (reg == 0) // POP rm
1274: goto Lpop;
1275: break;
1276:
1277: case 0xA0:
1278: case 0xA1:
1279: case 0xA2:
1280: case 0xA3:
1281: // Fake having an EA to simplify code in conflict()
1282: ci->flags |= CIFLea;
1283: ci->reg = 0;
1284: ci->sibmodrm = a32 ? modregrm(0,0,5) : modregrm(0,0,6);
1285: c->IFL1 = c->IFL2;
1286: c->IEV1 = c->IEV2;
1287: break;
1288:
1289: case 0xC2:
1290: case 0xC3:
1291: case 0xCA:
1292: case 0xCB: // RET
1293: ci->a |= mSP;
1294: break;
1295:
1296: case 0xE8:
1297: if (c->Iflags & CFclassinit) // call to __j_classinit
1298: { r = 0;
1299: w = F;
1300: #if CLASSINIT2
1301: ci->pair = UV; // it is patched to CMP EAX,0
1302: #else
1303: ci->pair = NP;
1304: #endif
1305: }
1306: break;
1307:
1308: case 0xF6:
1309: r = grprw[3][reg][0]; // Grp 3, byte version
1310: w = grprw[3][reg][1];
1311: break;
1312:
1313: case 0xF7:
1314: r = grprw[1][reg][0]; // Grp 3
1315: w = grprw[1][reg][1];
1316: break;
1317:
1318: case 0x0F:
1319: op2 = c->Iop & 0xFF;
1320: if ((op2 & 0xF0) == 0x80) // if Jxx instructions
1321: {
1322: ci->r = F | N;
1323: ci->w = N;
1324: goto Lret;
1325: }
1326: ci->r = N;
1327: ci->w = N; // copout for now
1328: goto Lret;
1329:
1330: case 0xD7: // XLAT
1331: ci->a = mAX | mBX;
1332: break;
1333:
1334: case 0xFF:
1335: r = grprw[2][reg][0]; // Grp 5
1336: w = grprw[2][reg][1];
1337: if (reg == 6) // PUSH rm
1338: goto Lpush;
1339: break;
1340:
1341: case 0x38:
1342: case 0x39:
1343: case 0x3A:
1344: case 0x3B:
1345: case 0x3C: // CMP AL,imm8
1346: case 0x3D: // CMP EAX,imm32
1347: // For CMP opcodes, always test for flags
1348: c->Iflags |= CFpsw;
1349: break;
1350:
1351: case 0xD0:
1352: case 0xD1:
1353: case 0xD2:
1354: case 0xD3:
1355: case 0xC0:
1356: case 0xC1:
1357: if (reg == 2 || reg == 3) // if RCL or RCR
1358: c->Iflags |= CFpsw; // always test for flags
1359: break;
1360:
1361: case 0xD8:
1362: case 0xD9:
1363: case 0xDA:
1364: case 0xDB:
1365: case 0xDC:
1366: case 0xDD:
1367: case 0xDE:
1368: case 0xDF:
1369: if (irm < 0xC0)
1370: { r = grpf1[op - 0xD8][reg][0];
1371: w = grpf1[op - 0xD8][reg][1];
1372: switch (op)
1373: {
1374: case 0xD8:
1375: if (reg == 3) // if FCOMP
1376: ci->fpuadjust = -1;
1377: else
1378: ci->fp_op = FPfop;
1379: break;
1380:
1381: case 0xD9:
1382: if (reg == 0) // if FLD float
1383: { ci->fpuadjust = 1;
1384: ci->fp_op = FPfld;
1385: }
1386: else if (reg == 3) // if FSTP float
1387: { ci->fpuadjust = -1;
1388: ci->fp_op = FPfstp;
1389: }
1390: else if (reg == 5 || reg == 7)
1391: sz = 2;
1392: else if (reg == 4 || reg == 6)
1393: sz = 28;
1394: break;
1395: case 0xDA:
1396: if (reg == 3) // if FICOMP
1397: ci->fpuadjust = -1;
1398: break;
1399: case 0xDB:
1400: if (reg == 0 || reg == 5)
1401: { ci->fpuadjust = 1;
1402: ci->fp_op = FPfld; // FILD / FLD long double
1403: }
1404: if (reg == 3 || reg == 7)
1405: ci->fpuadjust = -1;
1406: if (reg == 7)
1407: ci->fp_op = FPfstp; // FSTP long double
1408: if (reg == 5 || reg == 7)
1409: sz = 10;
1410: break;
1411: case 0xDC:
1412: sz = 8;
1413: if (reg == 3) // if FCOMP
1414: ci->fpuadjust = -1;
1415: else
1416: ci->fp_op = FPfop;
1417: break;
1418: case 0xDD:
1419: if (reg == 0) // if FLD double
1420: { ci->fpuadjust = 1;
1421: ci->fp_op = FPfld;
1422: }
1423: if (reg == 3) // if FSTP double
1424: { ci->fpuadjust = -1;
1425: ci->fp_op = FPfstp;
1426: }
1427: if (reg == 7)
1428: sz = 2;
1429: else if (reg == 4 || reg == 6)
1430: sz = 108;
1431: else
1432: sz = 8;
1433: break;
1434: case 0xDE:
1435: sz = 2;
1436: if (reg == 3) // if FICOMP
1437: ci->fpuadjust = -1;
1438: break;
1439: case 0xDF:
1440: sz = 2;
1441: if (reg == 4 || reg == 6)
1442: sz = 10;
1443: else if (reg == 5 || reg == 7)
1444: sz = 8;
1445: if (reg == 0 || reg == 4 || reg == 5)
1446: ci->fpuadjust = 1;
1447: else if (reg == 3 || reg == 6 || reg == 7)
1448: ci->fpuadjust = -1;
1449: break;
1450: }
1451: break;
1452: }
1453: else if (op == 0xDE)
1454: { ci->fpuadjust = -1; // pop versions of Fop's
1455: if (irm == 0xD9)
1456: ci->fpuadjust = -2; // FCOMPP
1457: }
1458:
1459: // Most floating point opcodes aren't staged, but are
1460: // sent right through, in order to make use of the large
1461: // latencies with floating point instructions.
1462: if (ci->fp_op == FPfld ||
1463: (op == 0xD9 && (irm & 0xF8) == 0xC0))
1464: ; // FLD ST(i)
1465: else
1466: ci->flags |= CIFLnostage;
1467:
1468: switch (op)
1469: {
1470: case 0xD8:
1471: r = S;
1472: w = C;
1473: if ((irm & ~7) == 0xD0)
1474: w |= S;
1475: break;
1476: case 0xD9:
1477: // FCHS or FABS or FSQRT
1478: if (irm == 0xE0 || irm == 0xE1 || irm == 0xFA)
1479: ci->fp_op = FPfop;
1480: r = S;
1481: w = S|C;
1482: break;
1483: case 0xDA:
1484: if (irm == 0xE9) // FUCOMPP
1485: { r = S;
1486: w = S|C;
1487: break;
1488: }
1489: break;
1490: case 0xDB:
1491: if (irm == 0xE2) // FCLEX
1492: { r = 0;
1493: w = C;
1494: break;
1495: }
1496: if (irm == 0xE3) // FINIT
1497: { r = 0;
1498: w = S|C;
1499: break;
1500: }
1501: break;
1502: case 0xDC:
1503: case 0xDE:
1504: if ((irm & 0xF0) != 0xD0)
1505: { r = S;
1506: w = S|C;
1507: break;
1508: }
1509: break;
1510: case 0xDD:
1511: // Not entirely correct, but conservative
1512: r = S;
1513: w = S|C;
1514: break;
1515: case 0xDF:
1516: if (irm == 0xE0) // FSTSW AX
1517: { r = C;
1518: w = mAX;
1519: break;
1520: }
1521: break;
1522: }
1523: break;
1524: #if DEBUG
1525: default:
1526: //printf("\t\tNo special case\n");
1527: break;
1528: #endif
1529: }
1530:
1531: if ((r | w) & B) // if byte operation
1532: sz = 1; // operand size is 1
1533:
1534: ci->r = r & ~(R | EA);
1535: ci->w = w & ~(R | EA);
1536: if (r & R)
1537: ci->r |= mask[(r & B) ? (reg & 3) : reg];
1538: if (w & R)
1539: ci->w |= mask[(w & B) ? (reg & 3) : reg];
1540:
1541: // OR in bits for EA addressing mode
1542: if ((r | w) & EA)
1543: { unsigned char sib;
1544:
1545: sib = 0;
1546: switch (mod)
1547: {
1548: case 0:
1549: if (a32)
1550: {
1551: if (rm == 4)
1552: { sib = c->Isib;
1553: if ((sib & modregrm(0,7,0)) != modregrm(0,4,0))
1554: ci->a |= mask[(sib >> 3) & 7]; // index register
1555: if ((sib & 7) != 5)
1556: ci->a |= mask[sib & 7]; // base register
1557: }
1558: else if (rm != 5)
1559: ci->a |= mask[rm];
1560: }
1561: else
1562: { static unsigned char ea16[8] = {mBX|mSI,mBX|mDI,mBP|mSI,mBP|mDI,mSI,mDI,0,mBX};
1563: ci->a |= ea16[rm];
1564: }
1565: goto Lmem;
1566:
1567: case 1:
1568: case 2:
1569: if (a32)
1570: {
1571: if (rm == 4)
1572: { sib = c->Isib;
1573: if ((sib & modregrm(0,7,0)) != modregrm(0,4,0))
1574: ci->a |= mask[(sib >> 3) & 7]; // index register
1575: ci->a |= mask[sib & 7]; // base register
1576: }
1577: else
1578: ci->a |= mask[rm];
1579: }
1580: else
1581: { static unsigned char ea16[8] = {mBX|mSI,mBX|mDI,mBP|mSI,mBP|mDI,mSI,mDI,mBP,mBX};
1582: ci->a |= ea16[rm];
1583: }
1584:
1585: Lmem:
1586: if (r & EA)
1587: ci->r |= mMEM;
1588: if (w & EA)
1589: ci->w |= mMEM;
1590: ci->flags |= CIFLea;
1591: break;
1592:
1593: case 3:
1594: if (r & EA)
1595: ci->r |= mask[(r & B) ? (rm & 3) : rm];
1596: if (w & EA)
1597: ci->w |= mask[(w & B) ? (rm & 3) : rm];
1598: break;
1599: }
1600: // Adjust sibmodrm so that addressing modes can be compared simply
1601: irm &= modregrm(3,0,7);
1602: if (a32)
1603: {
1604: if (irm != modregrm(0,0,5))
1605: {
1606: switch (mod)
1607: { case 0:
1608: if ((sib & 7) != 5) // if not disp32[index]
1609: { c->IFL1 = FLconst;
1610: c->IEVpointer1 = 0;
1611: irm |= 0x80;
1612: }
1613: break;
1614: case 1:
1615: c->IEVpointer1 = (signed char) c->IEVpointer1;
1616: irm = modregrm(2,0,rm);
1617: break;
1618: }
1619: }
1620: }
1621: else
1622: {
1623: if (irm != modregrm(0,0,6))
1624: {
1625: switch (mod)
1626: { case 0:
1627: c->IFL1 = FLconst;
1628: c->IEVpointer1 = 0;
1629: irm |= 0x80;
1630: break;
1631: case 1:
1632: c->IEVpointer1 = (signed char) c->IEVpointer1;
1633: irm = modregrm(2,0,rm);
1634: break;
1635: }
1636: }
1637: }
1638:
1639: ci->r |= ci->a;
1640: ci->reg = reg;
1641: ci->sibmodrm = (sib << 8) | irm;
1642: }
1643: Lret:
1644: if (ci->w & mSP) // if stack pointer is modified
1645: ci->w |= mMEM; // then we are implicitly writing to memory
1646: if (op == 0x8D) // if LEA
1647: ci->r &= ~mMEM; // memory is not actually read
1648: ci->sz = sz;
1649: #if DEBUG
1650: //printf("\t\t"); ci->print();
1651: #endif
1652: }
1653:
1654: /******************************************
1655: * Determine if two instructions can pair.
1656: * Assume that in general, cu can pair in the U pipe and cv in the V.
1657: * Look for things like register contentions.
1658: * Input:
1659: * cu instruction for U pipe
1660: * cv instruction for V pipe
1661: * Returns:
1662: * !=0 if they can pair
1663: */
1664:
1665: STATIC int pair_test(Cinfo *cu,Cinfo *cv)
1666: { unsigned pcu;
1667: unsigned pcv;
1668: unsigned r1,w1;
1669: unsigned r2,w2;
1670: unsigned x;
1671:
1672: pcu = cu->pair;
1673: if (!(pcu & PU))
1674: {
1675: // See if pairs with FXCH and cv is FXCH
1676: if (pcu & FX && cv->c->Iop == 0xD9 && (cv->c->Irm & ~7) == 0xC8)
1677: goto Lpair;
1678: goto Lnopair;
1679: }
1680: pcv = cv->pair;
1681: if (!(pcv & PV))
1682: goto Lnopair;
1683:
1684: r1 = cu->r;
1685: w1 = cu->w;
1686: r2 = cv->r;
1687: w2 = cv->w;
1688:
1689: x = w1 & (r2 | w2) & ~(F|mMEM); // register contention
1690: if (x && // if register contention
1691: !(x == mSP && pcu & pcv & PE) // and not exception
1692: )
1693: goto Lnopair;
1694:
1695: // Look for flags contention
1696: if (w1 & r2 & F && !(pcv & PF))
1697: goto Lnopair;
1698:
1699: Lpair:
1700: return 1;
1701:
1702: Lnopair:
1703: return 0;
1704: }
1705:
1706: /******************************************
1707: * Determine if two instructions have an AGI or register contention.
1708: * Returns:
1709: * !=0 if they have an AGI
1710: */
1711:
1712: STATIC int pair_agi(Cinfo *c1,Cinfo *c2)
1713: { unsigned x;
1714:
1715: x = c1->w & c2->a;
1716: return x && !(x == mSP && c1->pair & c2->pair & PE);
1717: }
1718:
1719: /********************************************
1720: * Determine if three instructions can decode simultaneously
1721: * in Pentium Pro and Pentium II.
1722: * Input:
1723: * c0,c1,c2 candidates for decoders 0,1,2
1724: * c2 can be NULL
1725: * Returns:
1726: * !=0 if they can decode simultaneously
1727: */
1728:
1729: STATIC int triple_test(Cinfo *c0,Cinfo *c1,Cinfo *c2)
1730: { int c2isz;
1731:
1732: assert(c0);
1733: if (!c1)
1734: goto Lnopair;
1735: c2isz = c2 ? c2->isz : 0;
1736: if (c0->isz > 7 || c1->isz > 7 || c2isz > 7 ||
1737: c0->isz + c1->isz + c2isz > 16)
1738: goto Lnopair;
1739:
1740: // 4-1-1 decode
1741: if (c1->uops > 1 ||
1742: (c2 && c2->uops > 1))
1743: goto Lnopair;
1744:
1745: Lpair:
warning C4102: 'Lpair' : unreferenced label
1746: return 1;
1747:
1748: Lnopair:
1749: return 0;
1750: }
1751:
1752: /********************************************
1753: * Get next instruction worth looking at for scheduling.
1754: * Returns:
1755: * NULL no more instructions
1756: */
1757:
1758: STATIC code * cnext(code *c)
1759: {
1760: while (1)
1761: {
1762: c = code_next(c);
1763: if (!c)
1764: break;
1765: if (c->Iflags & (CFtarg | CFtarg2))
1766: break;
1767: if (!(c->Iop == NOP ||
1768: c->Iop == (ESCAPE | ESClinnum)))
1769: break;
1770: }
1771: return c;
1772: }
1773:
1774: /******************************************
1775: * Instruction scheduler.
1776: * Input:
1777: * c list of instructions to schedule
1778: * scratch scratch registers we can use
1779: * Returns:
1780: * revised list of scheduled instructions
1781: */
1782:
1783: ///////////////////////////////////
1784: // Determine if c1 and c2 are swappable.
1785: // c1 comes before c2.
1786: // If they do not conflict
1787: // return 0
1788: // If they do conflict
1789: // return 0x100 + delay_clocks
1790: // Input:
1791: // fpsched if 1, then adjust fxch_pre and fxch_post to swap,
1792: // then return 0
1793: // if 2, then adjust ci1 as well as ci2
1794:
1795: STATIC int conflict(Cinfo *ci1,Cinfo *ci2,int fpsched)
1796: {
1797: code *c1;
1798: code *c2;
1799: unsigned r1,w1,a1;
1800: unsigned r2,w2,a2;
1801: int sz1,sz2;
1802: int i = 0;
1803: int delay_clocks;
1804:
1805: c1 = ci1->c;
1806: c2 = ci2->c;
1807:
1808: //printf("conflict %x %x\n",c1,c2);
1809:
1810: r1 = ci1->r;
1811: w1 = ci1->w;
1812: a1 = ci1->a;
1813: sz1 = ci1->sz;
1814:
1815: r2 = ci2->r;
1816: w2 = ci2->w;
1817: a2 = ci2->a;
1818: sz2 = ci2->sz;
1819:
1820: //printf("r1 %lx w1 %lx a1 %lx sz1 %x\n",r1,w1,a1,sz1);
1821: //printf("r2 %lx w2 %lx a2 %lx sz2 %x\n",r2,w2,a2,sz2);
1822:
1823: if ((c1->Iflags | c2->Iflags) & CFvolatile)
1824: goto Lconflict;
1825:
1826: // Determine if we should handle FPU register conflicts separately
1827: //if (fpsched) printf("fp_op %d,%d:\n",ci1->fp_op,ci2->fp_op);
1828: if (fpsched && ci1->fp_op && ci2->fp_op)
1829: {
1830: w1 &= ~(S|C);
1831: r1 &= ~(S|C);
1832: w2 &= ~(S|C);
1833: r2 &= ~(S|C);
1834: }
1835: else
1836: fpsched = 0;
1837:
1838: if ((r1 | r2) & N)
1839: {
1840: goto Lconflict;
1841: }
1842:
1843: #if 0
1844: if (c1->Iop == 0xFF && c2->Iop == 0x8B)
1845: { c1->print(); c2->print(); i = 1;
1846: printf("r1=%lx, w1=%lx, a1=%lx, sz1=%d, r2=%lx, w2=%lx, a2=%lx, sz2=%d\n",r1,w1,a1,sz1,r2,w2,a2,sz2);
1847: }
1848: #endif
1849: L1:
1850: if (w1 & r2 || (r1 | w1) & w2)
1851: { unsigned char ifl1,ifl2;
1852:
1853: if (i) printf("test\n");
1854:
1855: #if 0
1856: if (c1->IFL1 != c2->IFL1) printf("t1\n");
1857: if ((c1->Irm & modregrm(3,0,7)) != (c2->Irm & modregrm(3,0,7))) printf("t2\n");
1858: if ((issib(c1->Irm) && c1->Isib != c2->Isib)) printf("t3\n");
1859: if (c1->IEVpointer1 + sz1 <= c2->IEVpointer1) printf("t4\n");
1860: if (c2->IEVpointer1 + sz2 <= c1->IEVpointer1) printf("t5\n");
1861: #endif
1862:
1863: #if 1 // make sure CFpsw is reliably set
1864: if (w1 & w2 & F && // if both instructions write to flags
1865: w1 != F &&
1866: w2 != F &&
1867: !((r1 | r2) & F) && // but neither instruction reads them
1868: !((c1->Iflags | c2->Iflags) & CFpsw)) // and we don't care about flags
1869: {
1870: w1 &= ~F;
1871: w2 &= ~F; // remove conflict
1872: goto L1; // and try again
1873: }
1874: #endif
1875: // If other than the memory reference is a conflict
1876: if (w1 & r2 & ~mMEM || (r1 | w1) & w2 & ~mMEM)
1877: { if (i) printf("\t1\n");
1878: if (i) printf("r1=%x, w1=%x, a1=%x, sz1=%d, r2=%x, w2=%x, a2=%x, sz2=%d\n",r1,w1,a1,sz1,r2,w2,a2,sz2);
1879: goto Lconflict;
1880: }
1881:
1882: // If referring to distinct types, then no dependency
1883: if (c1->Irex && c2->Irex && c1->Irex != c2->Irex)
1884: goto Lswap;
1885:
1886: ifl1 = c1->IFL1;
1887: ifl2 = c2->IFL1;
1888:
1889: // Special case: Allow indexed references using registers other than
1890: // ESP and EBP to be swapped with PUSH instructions
1891: if (((c1->Iop & ~7) == 0x50 || // PUSH reg
1892: c1->Iop == 0x6A || // PUSH imm8
1893: c1->Iop == 0x68 || // PUSH imm16/imm32
1894: (c1->Iop == 0xFF && ci1->reg == 6) // PUSH EA
1895: ) &&
1896: ci2->flags & CIFLea && !(a2 & mSP) &&
1897: !(a2 & mBP && (long)c2->IEVpointer1 < 0)
1898: )
1899: {
1900: if (c1->Iop == 0xFF)
1901: {
1902: if (!(w2 & mMEM))
1903: goto Lswap;
1904: }
1905: else
1906: goto Lswap;
1907: }
1908:
1909: // Special case: Allow indexed references using registers other than
1910: // ESP and EBP to be swapped with PUSH instructions
1911: if (((c2->Iop & ~7) == 0x50 || // PUSH reg
1912: c2->Iop == 0x6A || // PUSH imm8
1913: c2->Iop == 0x68 || // PUSH imm16/imm32
1914: (c2->Iop == 0xFF && ci2->reg == 6) // PUSH EA
1915: ) &&
1916: ci1->flags & CIFLea && !(a1 & mSP) &&
1917: !(a2 & mBP && (long)c2->IEVpointer1 < 0)
1918: )
1919: {
1920: if (c2->Iop == 0xFF)
1921: {
1922: if (!(w1 & mMEM))
1923: goto Lswap;
1924: }
1925: else
1926: goto Lswap;
1927: }
1928:
1929: // If not both an EA addressing mode, conflict
1930: if (!(ci1->flags & ci2->flags & CIFLea))
1931: { if (i) printf("\t2\n");
1932: goto Lconflict;
1933: }
1934:
1935: if (ci1->sibmodrm == ci2->sibmodrm)
1936: { if (ifl1 != ifl2)
1937: goto Lswap;
1938: switch (ifl1)
1939: {
1940: case FLconst:
1941: if (c1->IEV1.Vint != c2->IEV1.Vint &&
1942: (c1->IEV1.Vint + sz1 <= c2->IEV1.Vint ||
1943: c2->IEV1.Vint + sz2 <= c1->IEV1.Vint))
1944: goto Lswap;
1945: break;
1946: case FLdatseg:
1947: if (c1->IEVseg1 != c2->IEVseg1 ||
1948: c1->IEV1.Vint + sz1 <= c2->IEV1.Vint ||
1949: c2->IEV1.Vint + sz2 <= c1->IEV1.Vint)
1950: goto Lswap;
1951: break;
1952: }
1953: }
1954:
1955: if ((c1->Iflags | c2->Iflags) & CFunambig &&
1956: (ifl1 != ifl2 ||
1957: ci1->sibmodrm != ci2->sibmodrm ||
1958: (c1->IEV1.Vint != c2->IEV1.Vint &&
1959: (c1->IEV1.Vint + sz1 <= c2->IEV1.Vint ||
1960: c2->IEV1.Vint + sz2 <= c1->IEV1.Vint)
1961: )
1962: )
1963: )
1964: {
1965: // Assume that [EBP] and [ESP] can point to the same location
1966: if (((a1 | a2) & (mBP | mSP)) == (mBP | mSP))
1967: goto Lconflict;
1968: goto Lswap;
1969: }
1970:
1971: if (i) printf("\t3\n");
1972: goto Lconflict;
1973: }
1974:
1975: Lswap:
1976: if (fpsched)
1977: { unsigned char a1,b1;
warning C6246: Local declaration of 'a1' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '1799' of 'c:\projects\extern\d\dmd\src\backend\cgsched.c': Lines: 1799
1978: unsigned char a2,b2;
warning C6246: Local declaration of 'a2' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '1800' of 'c:\projects\extern\d\dmd\src\backend\cgsched.c': Lines: 1800
1979:
1980: //printf("\tfpsched %d,%d:\n",ci1->fp_op,ci2->fp_op);
1981: a1 = ci1->fxch_pre;
1982: b1 = ci1->fxch_post;
1983: a2 = ci2->fxch_pre;
1984: b2 = ci2->fxch_post;
1985:
1986: #define X(a,b) ((a << 8) | b)
1987: switch (X(ci1->fp_op,ci2->fp_op))
1988: {
1989: case X(FPfstp,FPfld):
1990: if (a1 || b1)
1991: goto Lconflict;
1992: if (a2)
1993: goto Lconflict;
1994: if (b2 == 0)
1995: ci2->fxch_post++;
1996: else if (b2 == 1)
1997: {
1998: ci2->fxch_pre++;
1999: ci2->fxch_post++;
2000: }
2001: else
2002: {
2003: goto Lconflict;
2004: }
2005: break;
2006:
2007: case X(FPfstp,FPfop):
2008: if (a1 || b1)
2009: goto Lconflict;
2010: ci2->fxch_pre++;
2011: ci2->fxch_post++;
2012: break;
2013:
2014: case X(FPfop,FPfop):
2015: if (a1 == 0 && b1 == 1 && a2 == 0 && b2 == 0)
2016: { ci2->fxch_pre = 1;
2017: ci2->fxch_post = 1;
2018: break;
2019: }
2020: if (a1 == 0 && b1 == 0 && a2 == 1 && b2 == 1)
2021: break;
2022: goto Lconflict;
2023:
2024: case X(FPfop,FPfld):
2025: if (a1 || b1)
2026: goto Lconflict;
2027: if (a2)
2028: goto Lconflict;
2029: if (b2)
2030: break;
2031: else if (fpsched == 2)
2032: ci1->fxch_post = 1;
2033: ci2->fxch_post = 1;
2034: break;
2035:
2036: default:
2037: goto Lconflict;
2038: }
2039: #undef X
2040: //printf("\tpre = %d, post = %d\n",ci2->fxch_pre,ci2->fxch_post);
2041: }
2042:
2043: //printf("w1 = x%x, w2 = x%x\n",w1,w2);
2044: if (i) printf("no conflict\n\n");
2045: return 0;
2046:
2047: Lconflict:
2048: //printf("r1=%x, w1=%x, r2=%x, w2=%x\n",r1,w1,r2,w2);
2049: delay_clocks = 0;
2050:
2051: // Determine if AGI
2052: if (!PRO && pair_agi(ci1,ci2))
2053: delay_clocks = 1;
2054:
2055: // Special delays for floating point
2056: if (fpsched)
2057: { if (ci1->fp_op == FPfld && ci2->fp_op == FPfstp)
2058: delay_clocks = 1;
2059: else if (ci1->fp_op == FPfop && ci2->fp_op == FPfstp)
2060: delay_clocks = 3;
2061: else if (ci1->fp_op == FPfop && ci2->fp_op == FPfop)
2062: delay_clocks = 2;
2063: }
2064: else if (PRO)
2065: {
2066: // Look for partial register write stalls
2067: if (w1 & r2 & ALLREGS && sz1 < sz2)
2068: delay_clocks = 7;
2069: }
2070: else if ((w1 | r1) & (w2 | r2) & (C | S))
2071: { int reg;
2072: int op;
2073:
2074: op = c1->Iop;
2075: reg = c1->Irm & modregrm(0,7,0);
2076: if (ci1->fp_op == FPfld ||
2077: (op == 0xD9 && (c1->Irm & 0xF8) == 0xC0)
2078: )
2079: ; // FLD
2080: else if (op == 0xD9 && (c1->Irm & 0xF8) == 0xC8)
2081: ; // FXCH
2082: else if (c2->Iop == 0xD9 && (c2->Irm & 0xF8) == 0xC8)
2083: ; // FXCH
2084: else
2085: delay_clocks = 3;
2086: }
2087:
2088: if (i) printf("conflict %d\n\n",delay_clocks);
2089: return 0x100 + delay_clocks;
2090: }
2091:
2092: struct Schedule
2093: {
2094: #define TBLMAX (2*3*20) // must be divisible by both 2 and 3
2095: // (U,V pipe in Pentium, 3 decode units
2096: // in Pentium Pro)
2097:
2098: Cinfo *tbl[TBLMAX]; // even numbers are U pipe, odd numbers are V
2099: int tblmax; // max number of slots used
2100:
2101: Cinfo cinfo[TBLMAX];
2102: int cinfomax;
2103:
2104: list_t stagelist; // list of instructions in staging area
2105:
2106: int fpustackused; // number of slots in FPU stack that are used
2107:
2108: void initialize(int fpustackinit); // initialize scheduler
2109: int stage(code *c); // stage instruction
2110: int insert(Cinfo *ci); // insert c into schedule
2111: code **assemble(code **pc); // reassemble scheduled instructions
2112: };
2113:
2114: /******************************
2115: */
2116:
2117: void Schedule::initialize(int fpustackinit)
2118: {
2119: //printf("Schedule::initialize(fpustackinit = %d)\n", fpustackinit);
2120: memset(this,0,sizeof(Schedule));
2121: fpustackused = fpustackinit;
2122: }
2123:
2124: /******************************
2125: */
2126:
2127: code **Schedule::assemble(code **pc)
2128: { int i;
2129: list_t l;
2130: code *c;
2131:
2132: #ifdef DEBUG
2133: if (debugs) printf("assemble:\n");
2134: #endif
2135: assert(!*pc);
2136:
2137: // Try to insert the rest of the staged instructions
2138: for (l = stagelist; l; l = list_next(l))
2139: { Cinfo *ci;
2140:
2141: ci = (Cinfo *)list_ptr(l);
2142: if (!insert(ci))
2143: break;
2144: }
2145:
2146: // Get the instructions out of the schedule table
2147: assert((unsigned)tblmax <= TBLMAX);
2148: for (i = 0; i < tblmax; i++)
2149: { Cinfo *ci;
2150:
2151: ci = tbl[i];
2152: #ifdef DEBUG
2153: if (debugs)
2154: {
2155: if (PRO)
2156: { static char tbl[3][4] = { "0 "," 1 "," 2" };
2157:
2158: if (ci)
2159: printf("%s %d ",tbl[i - ((i / 3) * 3)],ci->uops);
2160: else
2161: printf("%s ",tbl[i - ((i / 3) * 3)]);
2162: }
2163: else
2164: {
2165: printf((i & 1) ? " V " : "U ");
2166: }
2167: if (ci)
2168: ci->c->print();
2169: else
2170: printf("\n");
2171: }
2172: #endif
2173: if (!ci)
2174: continue;
2175: fpustackused += ci->fpuadjust;
2176: //printf("stage()1: fpustackused = %d\n", fpustackused);
2177: c = ci->c;
2178: if (i == 0)
2179: c->Iflags |= CFtarg; // by definition, first is always a jump target
2180: else
2181: c->Iflags &= ~CFtarg; // the rest are not
2182:
2183: // Put in any FXCH prefix
2184: if (ci->fxch_pre)
2185: { code *cf;
2186: assert(i);
2187: cf = gen2(NULL,0xD9,0xC8 + ci->fxch_pre);
2188: *pc = cf;
2189: pc = &code_next(cf);
2190: }
2191:
2192: *pc = c;
2193: do
2194: {
2195: assert(*pc != code_next(*pc));
2196: pc = &code_next(*pc);
2197: } while (*pc);
2198:
2199: // Put in any FXCH postfix
2200: if (ci->fxch_post)
2201: { int j;
2202:
2203: for (j = i + 1; j < tblmax; j++)
2204: { if (tbl[j])
2205: { if (tbl[j]->fxch_pre == ci->fxch_post)
2206: {
2207: tbl[j]->fxch_pre = 0; // they cancel each other out
2208: goto L1;
2209: }
2210: break;
2211: }
2212: }
2213: { code *cf;
2214: cf = gen2(NULL,0xD9,0xC8 + ci->fxch_post);
2215: *pc = cf;
2216: pc = &code_next(cf);
2217: }
2218: }
2219: L1: ;
2220: }
2221:
2222: // Just append any instructions left in the staging area
2223: for (; l; l = list_next(l))
2224: { Cinfo *ci = (Cinfo *)list_ptr(l);
2225: code *c = ci->c;
warning C6246: Local declaration of 'c' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '2130' of 'c:\projects\extern\d\dmd\src\backend\cgsched.c': Lines: 2130
2226:
2227: #ifdef DEBUG
2228: if (debugs) { printf("appending: "); c->print(); }
2229: #endif
2230: *pc = c;
2231: do
2232: {
2233: pc = &code_next(*pc);
2234:
2235: } while (*pc);
2236: fpustackused += ci->fpuadjust;
2237: //printf("stage()2: fpustackused = %d\n", fpustackused);
2238: }
2239: list_free(&stagelist);
2240:
2241: return pc;
2242: }
2243:
2244: /******************************
2245: * Insert c into scheduling table.
2246: * Returns:
2247: * 0 could not be scheduled; have to start a new one
2248: */
2249:
2250: int Schedule::insert(Cinfo *ci)
2251: { code *c;
2252: int clocks;
2253: int i;
2254: int ic = 0;
2255: int imin;
2256: targ_size_t offset;
2257: targ_size_t vpointer;
2258: int movesp = 0;
2259: int reg2 = -1; // avoid "may be uninitialized" warning
2260:
2261: //printf("insert "); ci->c->print();
2262: //printf("insert() %d\n", fpustackused);
2263: c = ci->c;
2264: //printf("\tc->Iop %x\n",c->Iop);
2265: vpointer = c->IEVpointer1;
2266: assert((unsigned)tblmax <= TBLMAX);
2267: if (tblmax == TBLMAX) // if out of space
2268: goto Lnoinsert;
2269: if (tblmax == 0) // if table is empty
2270: { // Just stuff it in the first slot
2271: i = tblmax;
2272: goto Linsert;
2273: }
2274: else if (c->Iflags & (CFtarg | CFtarg2))
2275: // Jump targets can only be first in the scheduler
2276: goto Lnoinsert;
2277:
2278: // Special case of:
2279: // PUSH reg1
2280: // MOV reg2,x[ESP]
2281: if (c->Iop == 0x8B &&
2282: (c->Irm & modregrm(3,0,7)) == modregrm(1,0,4) &&
2283: c->Isib == modregrm(0,4,SP) &&
2284: c->IFL1 == FLconst &&
2285: ((signed char)c->IEVpointer1) >= REGSIZE
2286: )
2287: {
2288: movesp = 1; // this is a MOV reg2,offset[ESP]
2289: offset = (signed char)c->IEVpointer1;
2290: reg2 = (c->Irm >> 3) & 7;
2291: }
2292:
2293:
2294: // Start at tblmax, and back up until we get a conflict
2295: ic = -1;
2296: imin = 0;
2297: for (i = tblmax; i >= 0; i--)
2298: { Cinfo *cit;
2299:
2300: cit = tbl[i];
2301: if (!cit)
2302: continue;
2303:
2304: // Look for special case swap
2305: if (movesp &&
2306: (cit->c->Iop & ~7) == 0x50 && // if PUSH reg1
2307: (cit->c->Iop & 7) != reg2 && // if reg1 != reg2
2308: ((signed char)c->IEVpointer1) >= -cit->spadjust
2309: )
warning C4146: unary minus operator applied to unsigned type, result still unsigned
warning C4018: '>=' : signed/unsigned mismatch
2310: {
2311: c->IEVpointer1 += cit->spadjust;
2312: //printf("\t1, spadjust = %d, ptr = x%x\n",cit->spadjust,c->IEVpointer1);
2313: continue;
2314: }
2315:
2316: if (movesp &&
2317: cit->c->Iop == 0x83 &&
2318: cit->c->Irm == modregrm(3,5,SP) && // if SUB ESP,offset
2319: cit->c->IFL2 == FLconst &&
2320: ((signed char)c->IEVpointer1) >= -cit->spadjust
2321: )
warning C4146: unary minus operator applied to unsigned type, result still unsigned
warning C4018: '>=' : signed/unsigned mismatch
2322: {
2323: //printf("\t2, spadjust = %d\n",cit->spadjust);
2324: c->IEVpointer1 += cit->spadjust;
2325: continue;
2326: }
2327:
2328: clocks = conflict(cit,ci,1);
2329: if (clocks)
2330: { int j;
2331:
2332: ic = i; // where the conflict occurred
2333: clocks &= 0xFF; // convert to delay count
2334:
2335: // Move forward the delay clocks
2336: if (clocks == 0)
2337: j = i + 1;
2338: else if (PRO)
2339: j = (((i + 3) / 3) * 3) + clocks * 3;
2340: else
2341: { j = ((i + 2) & ~1) + clocks * 2;
2342:
2343: // It's possible we skipped over some AGI generating
2344: // instructions due to movesp.
2345: int k;
2346: for (k = i + 1; k < j; k++)
2347: {
2348: if (k >= TBLMAX)
2349: goto Lnoinsert;
2350: if (tbl[k] && pair_agi(tbl[k],ci))
2351: {
2352: k = ((k + 2) & ~1) + 1;
2353: }
2354: }
2355: j = k;
2356: }
2357:
2358: if (j >= TBLMAX) // exceed table size?
2359: goto Lnoinsert;
2360: imin = j; // first possible slot c can go in
2361: break;
2362: }
2363: }
2364:
2365:
2366: // Scan forward looking for a hole to put it in
2367: for (i = imin; i < TBLMAX; i++)
2368: {
2369: if (tbl[i])
2370: {
2371: // In case, due to movesp, we skipped over some AGI instructions
2372: if (!PRO && pair_agi(tbl[i],ci))
2373: {
2374: i = ((i + 2) & ~1) + 1;
2375: if (i >= TBLMAX)
2376: goto Lnoinsert;
2377: }
2378: }
2379: else
2380: {
2381: if (PRO)
2382: { int i0 = (i / 3) * 3; // index of decode unit 0
2383: Cinfo *ci0;
2384:
2385: assert(((TBLMAX / 3) * 3) == TBLMAX);
2386: switch (i - i0)
2387: {
2388: case 0: // i0 can handle any instruction
2389: goto Linsert;
2390: case 1:
2391: ci0 = tbl[i0];
2392: if (ci->uops > 1)
2393: {
2394: if (i0 >= imin && ci0->uops == 1)
2395: goto L1;
2396: i++;
2397: break;
2398: }
2399: if (triple_test(ci0,ci,tbl[i0 + 2]))
2400: goto Linsert;
2401: break;
2402: case 2:
2403: ci0 = tbl[i0];
2404: if (ci->uops > 1)
2405: {
2406: if (i0 >= imin && ci0->uops == 1)
2407: {
2408: if (i >= tblmax)
2409: { if (i + 1 >= TBLMAX)
2410: goto Lnoinsert;
2411: tblmax = i + 1;
2412: }
2413: tbl[i0 + 2] = tbl[i0 + 1];
2414: tbl[i0 + 1] = ci0;
2415: i = i0;
2416: goto Linsert;
2417: }
2418: break;
2419: }
2420: if (triple_test(ci0,tbl[i0 + 1],ci))
2421: goto Linsert;
2422: break;
2423: default:
2424: assert(0);
2425: }
2426: }
2427: else
2428: {
2429: assert((TBLMAX & 1) == 0);
2430: if (i & 1) // if V pipe
2431: {
2432: if (pair_test(tbl[i - 1],ci))
2433: {
2434: goto Linsert;
2435: }
2436: else if (i > imin && pair_test(ci,tbl[i - 1]))
2437: {
2438: L1:
2439: tbl[i] = tbl[i - 1];
2440: if (i >= tblmax)
2441: tblmax = i + 1;
2442: i--;
2443: //printf("\tswapping with x%02x\n",tbl[i + 1]->c->Iop);
2444: goto Linsert;
2445: }
2446: }
2447: else // will always fit in U pipe
2448: {
2449: assert(!tbl[i + 1]); // because V pipe should be empty
warning C6201: Index '120' is out of valid index range '0' to '119' for possibly stack allocated buffer 'tbl'
2450: goto Linsert;
2451: }
2452: }
2453: }
2454: }
2455:
2456: Lnoinsert:
2457: //printf("\tnoinsert\n");
2458: c->IEVpointer1 = vpointer; // reset to original value
2459: return 0;
2460:
2461: Linsert:
2462: // Insert at location i
2463: assert(i < TBLMAX);
2464: assert(tblmax <= TBLMAX);
2465: tbl[i] = ci;
2466: //printf("\tinsert at location %d\n",i);
2467:
2468: // If it's a scheduled floating point code, we have to adjust
2469: // the FXCH values
2470: if (ci->fp_op)
2471: { int j;
2472:
2473: ci->fxch_pre = 0;
2474: ci->fxch_post = 0; // start over again
2475:
2476: int fpu = fpustackused;
2477: for (j = 0; j < tblmax; j++)
2478: {
2479: if (tbl[j])
2480: {
2481: fpu += tbl[j]->fpuadjust;
2482: if (fpu >= 8) // if FPU stack overflow
2483: { tbl[i] = NULL;
2484: //printf("fpu stack overflow\n");
2485: goto Lnoinsert;
2486: }
2487: }
2488: }
2489:
2490: for (j = tblmax; j > i; j--)
2491: {
2492: if (j < TBLMAX && tbl[j])
2493: conflict(tbl[j],ci,2);
2494: }
2495: }
2496:
2497: if (movesp)
2498: { // Adjust [ESP] offsets
2499: int j;
2500:
2501: //printf("\tic = %d, inserting at %d\n",ic,i);
2502: assert((unsigned)tblmax <= TBLMAX);
2503: for (j = ic + 1; j < i; j++)
2504: { Cinfo *cit;
2505:
2506: cit = tbl[j];
2507: if (cit)
2508: {
2509: c->IEVpointer1 -= cit->spadjust;
2510: //printf("\t3, spadjust = %d, ptr = x%x\n",cit->spadjust,c->IEVpointer1);
2511: }
2512: }
2513: }
2514: if (i >= tblmax)
2515: tblmax = i + 1;
2516:
2517: // Now do a hack. Look back at immediately preceding instructions,
2518: // and see if we can swap with a push.
2519: if (0 && movesp)
2520: { int j;
2521:
2522: while (1)
2523: {
2524: for (j = 1; i > j; j++)
2525: if (tbl[i - j])
2526: break;
2527:
2528: if (i >= j && tbl[i - j] &&
2529: (tbl[i - j]->c->Iop & ~7) == 0x50 && // if PUSH reg1
2530: (tbl[i - j]->c->Iop & 7) != reg2 && // if reg1 != reg2
2531: (signed char)c->IEVpointer1 >= REGSIZE)
2532: {
2533: //printf("\t-4 prec, i-j=%d, i=%d\n",i-j,i);
2534: assert((unsigned)i < TBLMAX);
2535: assert((unsigned)(i - j) < TBLMAX);
2536: tbl[i] = tbl[i - j];
2537: tbl[i - j] = ci;
2538: i -= j;
2539: c->IEVpointer1 -= REGSIZE;
2540: }
2541: else
2542: break;
2543: }
2544: }
2545:
2546: //printf("\tinsert\n");
2547: return 1;
2548: }
2549:
2550:
2551: /******************************
2552: * Insert c into staging area.
2553: * Returns:
2554: * 0 could not be scheduled; have to start a new one
2555: */
2556:
2557: int Schedule::stage(code *c)
2558: { Cinfo *ci;
2559: list_t l;
2560: list_t ln;
2561: int agi;
2562:
2563: //printf("stage: "); c->print();
2564: if (cinfomax == TBLMAX) // if out of space
2565: goto Lnostage;
2566: ci = &cinfo[cinfomax++];
2567: getinfo(ci,c);
2568:
2569: if (c->Iflags & (CFtarg | CFtarg2 | CFvolatile))
2570: {
2571: // Insert anything in stagelist
2572: for (l = stagelist; l; l = ln)
2573: { Cinfo *cs;
2574:
2575: ln = list_next(l);
2576: cs = (Cinfo *)list_ptr(l);
2577: if (!insert(cs))
2578: return 0;
2579: list_subtract(&stagelist,cs);
2580: }
2581: return insert(ci);
2582: }
2583:
2584: // Look through stagelist, and insert any AGI conflicting instructions
2585: agi = 0;
2586: for (l = stagelist; l; l = ln)
2587: { Cinfo *cs;
2588:
2589: ln = list_next(l);
2590: cs = (Cinfo *)list_ptr(l);
2591: if (pair_agi(cs,ci))
2592: {
2593: if (!insert(cs))
2594: goto Lnostage;
2595: list_subtract(&stagelist,cs);
2596: agi = 1; // we put out an AGI
2597: }
2598: }
2599:
2600: // Look through stagelist, and insert any other conflicting instructions
2601: for (l = stagelist; l; l = ln)
2602: { Cinfo *cs;
2603:
2604: ln = list_next(l);
2605: cs = (Cinfo *)list_ptr(l);
2606: if (conflict(cs,ci,0) && // if conflict
2607: !(cs->flags & ci->flags & CIFLpush))
2608: {
2609: if (cs->spadjust)
2610: {
2611: // We need to insert all previous adjustments to ESP
2612: list_t la,lan;
2613:
2614: for (la = stagelist; la != l; la = lan)
2615: { Cinfo *ca;
2616:
2617: lan = list_next(la);
2618: ca = (Cinfo *)list_ptr(la);
2619: if (ca->spadjust)
2620: { if (!insert(ca))
2621: goto Lnostage;
2622: list_subtract(&stagelist,ca);
2623: }
2624: }
2625: }
2626:
2627: if (!insert(cs))
2628: goto Lnostage;
2629: list_subtract(&stagelist,cs);
2630: }
2631: }
2632:
2633: // If floating point opcode, don't stage it, send it right out
2634: if (!agi && ci->flags & CIFLnostage)
2635: {
2636: if (!insert(ci))
2637: goto Lnostage;
2638: return 1;
2639: }
2640:
2641: list_append(&stagelist,ci); // append to staging list
2642: return 1;
2643:
2644: Lnostage:
2645: return 0;
2646: }
2647:
2648: /********************************************
2649: * Snip off tail of instruction sequence.
2650: * Returns:
2651: * next instruction (the tail) or
2652: * NULL for no more instructions
2653: */
2654:
2655: STATIC code * csnip(code *c)
2656: { code **pc;
2657: unsigned iflags;
2658:
2659: if (c)
2660: { iflags = c->Iflags & CFclassinit;
2661: while (1)
2662: {
2663: pc = &code_next(c);
2664: c = *pc;
2665: if (!c)
2666: break;
2667: if (c->Iflags & (CFtarg | CFtarg2))
2668: break;
2669: if (!(c->Iop == NOP ||
2670: c->Iop == (ESCAPE | ESClinnum) ||
2671: c->Iflags & iflags))
2672: break;
2673: }
2674: *pc = NULL;
2675: }
2676: return c;
2677: }
2678:
2679:
2680: /******************************
2681: * Schedule Pentium instructions,
2682: * based on Steve Russell's algorithm.
2683: */
2684:
2685: code *schedule(code *c,regm_t scratch)
2686: {
2687: code *cresult = NULL;
2688: code **pctail = &cresult;
2689: Schedule sch;
2690:
2691: sch.initialize(0); // initialize scheduling table
2692: while (c)
2693: {
2694: if ((c->Iop == NOP || (c->Iop & 0xFF) == ESCAPE || c->Iflags & CFclassinit) &&
2695: !(c->Iflags & (CFtarg | CFtarg2)))
2696: { code *cn;
2697:
2698: // Just append this instruction to pctail and go to the next one
2699: *pctail = c;
2700: cn = code_next(c);
2701: code_next(c) = NULL;
2702: pctail = &code_next(c);
2703: c = cn;
2704: continue;
2705: }
2706:
2707: //printf("init\n");
2708: sch.initialize(sch.fpustackused); // initialize scheduling table
2709:
2710: while (c)
2711: {
2712: //printf("insert %p\n",c);
2713: if (!sch.stage(c)) // store c in scheduling table
2714: break;
2715: c = csnip(c);
2716: }
2717:
2718: //printf("assem %d\n",sch.tblmax);
2719: pctail = sch.assemble(pctail); // reassemble instruction stream
2720: }
2721:
2722: return cresult;
2723: }
2724:
2725: /**************************************************************************/
2726:
2727: /********************************************
2728: * Replace any occurrence of r1 in EA with r2.
2729: */
2730:
2731: STATIC void repEA(code *c,unsigned r1,unsigned r2)
2732: {
2733: unsigned mod,reg,rm;
2734: unsigned rmn;
2735:
2736: rmn = c->Irm;
2737: mod = rmn & 0xC0;
2738: reg = rmn & modregrm(0,7,0);
2739: rm = rmn & 7;
2740:
2741: if (mod == 0xC0 && rm == r1)
2742: ; //c->Irm = mod | reg | r2;
2743: else if (is32bitaddr(I32,c->Iflags) &&
2744: // If not disp32
2745: (rmn & modregrm(3,0,7)) != modregrm(0,0,5))
2746: {
2747: if (rm == 4)
2748: { // SIB byte addressing
2749: unsigned sib;
2750: unsigned base;
2751: unsigned index;
2752:
2753: sib = c->Isib;
2754: base = sib & 7;
2755: index = (sib >> 3) & 7;
2756: if (base == r1 &&
2757: !(r1 == 5 && mod == 0) &&
2758: !(r2 == 5 && mod == 0)
2759: )
2760: base = r2;
2761: if (index == r1)
2762: index = r2;
2763: c->Isib = (sib & 0xC0) | (index << 3) | base;
2764: }
2765: else if (rm == r1)
2766: {
2767: if (r1 == BP && r2 == SP)
2768: { // Replace [EBP] with [ESP]
2769: c->Irm = mod | reg | 4;
2770: c->Isib = modregrm(0,4,SP);
2771: }
2772: else if (r2 == BP && mod == 0)
2773: {
2774: c->Irm = modregrm(1,0,0) | reg | r2;
2775: c->IFL1 = FLconst;
2776: c->IEV1.Vint = 0;
2777: }
2778: else
2779: c->Irm = mod | reg | r2;
2780: }
2781: }
2782: }
2783:
2784: /******************************************
2785: * Instruction scheduler.
2786: * Input:
2787: * c list of instructions to schedule
2788: * scratch scratch registers we can use
2789: * Returns:
2790: * revised list of scheduled instructions
2791: */
2792:
2793: /******************************************
2794: * Swap c1 and c2.
2795: * c1 comes before c2.
2796: * Swap in place to not disturb addresses of jmp targets
2797: */
2798:
2799: STATIC void code_swap(code *c1,code *c2)
2800: { code cs;
2801:
2802: // Special case of:
2803: // PUSH reg1
2804: // MOV reg2,x[ESP]
2805: //printf("code_swap(%x, %x)\n",c1,c2);
2806: if ((c1->Iop & ~7) == 0x50 &&
2807: c2->Iop == 0x8B &&
2808: (c2->Irm & modregrm(3,0,7)) == modregrm(1,0,4) &&
2809: c2->Isib == modregrm(0,4,SP) &&
2810: c2->IFL1 == FLconst &&
2811: ((signed char)c2->IEVpointer1) >= REGSIZE &&
2812: (c1->Iop & 7) != ((c2->Irm >> 3) & 7)
2813: )
2814: c2->IEVpointer1 -= REGSIZE;
2815:
2816:
2817: cs = *c2;
2818: *c2 = *c1;
2819: *c1 = cs;
2820: // Retain original CFtarg
2821: c1->Iflags = (c1->Iflags & ~(CFtarg | CFtarg2)) | (c2->Iflags & (CFtarg | CFtarg2));
2822: c2->Iflags = (c2->Iflags & ~(CFtarg | CFtarg2)) | (cs.Iflags & (CFtarg | CFtarg2));
2823:
2824: c1->next = c2->next;
2825: c2->next = cs.next;
2826: }
2827:
2828: code *peephole(code *cstart,regm_t scratch)
2829: {
2830: // Look for cases of:
2831: // MOV r1,r2
2832: // OP ?,r1
2833: // we can replace with:
2834: // MOV r1,r2
2835: // OP ?,r2
2836: // to improve pairing
2837: code *c;
2838: code *c1;
2839: unsigned r1,r2;
2840: unsigned mod,reg,rm;
2841:
2842: //printf("peephole\n");
2843: for (c = cstart; c; c = c1)
2844: { unsigned char rmi;
2845: unsigned char rmn;
2846:
2847: //c->print();
2848: c1 = cnext(c);
2849: Ln:
2850: if (!c1)
2851: break;
2852: if (c1->Iflags & (CFtarg | CFtarg2))
2853: continue;
2854:
2855: // Do:
2856: // PUSH reg
2857: if (I32 && (c->Iop & ~7) == 0x50)
2858: { unsigned reg = c->Iop & 7;
warning C6246: Local declaration of 'reg' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '2840' of 'c:\projects\extern\d\dmd\src\backend\cgsched.c': Lines: 2840
2859:
2860: // MOV [ESP],reg => NOP
2861: if (c1->Iop == 0x8B &&
2862: c1->Irm == modregrm(0,reg,4) &&
2863: c1->Isib == modregrm(0,4,SP))
2864: { c1->Iop = NOP;
2865: continue;
2866: }
2867:
2868: // PUSH [ESP] => PUSH reg
2869: if (c1->Iop == 0xFF &&
2870: c1->Irm == modregrm(0,6,4) &&
2871: c1->Isib == modregrm(0,4,SP))
2872: { c1->Iop = 0x50 + reg;
2873: continue;
2874: }
2875:
2876: // CMP [ESP],imm => CMP reg,i,,
2877: if (c1->Iop == 0x83 &&
2878: c1->Irm == modregrm(0,7,4) &&
2879: c1->Isib == modregrm(0,4,SP))
2880: { c1->Irm = modregrm(3,7,reg);
2881: if (c1->IFL2 == FLconst && (signed char)c1->IEV2.Vuns == 0)
2882: { // to TEST reg,reg
2883: c1->Iop = (c1->Iop & 1) | 0x84;
2884: c1->Irm = modregrm(3,reg,reg);
2885: }
2886: continue;
2887: }
2888:
2889: }
2890:
2891: rmi = c->Irm;
2892:
2893: // Do:
2894: // MOV reg,[ESP] => PUSH reg
2895: // ADD ESP,4 => NOP
2896: if (I32 && c->Iop == 0x8B && (rmi & 0xC7) == modregrm(0,0,4) &&
2897: c->Isib == modregrm(0,4,SP) &&
2898: c1->Iop == 0x83 && (c1->Irm & 0xC7) == modregrm(3,0,SP) &&
2899: !(c1->Iflags & CFpsw) && c1->IFL2 == FLconst && c1->IEV2.Vint == 4)
2900: { unsigned reg = (rmi >> 3) & 7;
warning C6246: Local declaration of 'reg' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '2840' of 'c:\projects\extern\d\dmd\src\backend\cgsched.c': Lines: 2840
2901: c->Iop = 0x58 + reg;
2902: c1->Iop = NOP;
2903: continue;
2904: }
2905:
2906: if ((rmi & 0xC0) != 0xC0)
2907: {
2908: continue;
2909: }
2910:
2911: // Combine two SUBs of the same register
2912: if (c->Iop == c1->Iop &&
2913: c->Iop == 0x83 &&
2914: (rmi & modregrm(3,0,7)) == (c1->Irm & modregrm(3,0,7)) &&
2915: !(c1->Iflags & CFpsw) &&
2916: c->IFL2 == FLconst && c1->IFL2 == FLconst
2917: )
2918: { int i = (signed char)c->IEV2.Vint;
2919: int i1 = (signed char)c1->IEV2.Vint;
2920: switch ((rmi & modregrm(0,7,0)) | ((c1->Irm & modregrm(0,7,0)) >> 3))
2921: {
2922: case (0 << 3) | 0: // ADD, ADD
2923: case (5 << 3) | 5: // SUB, SUB
2924: i += i1;
2925: goto Laa;
2926: case (0 << 3) | 5: // ADD, SUB
2927: case (5 << 3) | 0: // SUB, ADD
2928: i -= i1;
2929: goto Laa;
2930: Laa:
2931: if ((signed char)i != i)
2932: c->Iop &= ~2;
2933: c->IEV2.Vint = i;
2934: c1->Iop = NOP;
2935: if (i == 0)
2936: c->Iop = NOP;
2937: continue;
2938: }
2939: }
2940:
2941: if (c->Iop == 0x8B) // MOV r1,EA
2942: { r1 = (rmi >> 3) & 7;
2943: r2 = rmi & 7;
2944: }
2945: else if (c->Iop == 0x89) // MOV EA,r2
2946: { r1 = rmi & 7;
2947: r2 = (rmi >> 3) & 7;
2948: }
2949: else
2950: {
2951: continue;
2952: }
2953:
2954: rmn = c1->Irm;
2955: mod = rmn & 0xC0;
2956: reg = rmn & modregrm(0,7,0);
2957: rm = rmn & 7;
2958: if (cod3_EA(c1))
2959: repEA(c1,r1,r2);
2960: switch (c1->Iop)
2961: {
2962: case 0x50:
2963: case 0x51:
2964: case 0x52:
2965: case 0x53:
2966: case 0x54:
2967: case 0x55:
2968: case 0x56:
2969: case 0x57: // PUSH reg
2970: if ((c1->Iop & 7) == r1)
2971: { c1->Iop = 0x50 | r2;
2972: //printf("schedule PUSH reg\n");
2973: }
2974: break;
2975:
2976: case 0x81:
2977: case 0x83:
2978: // Look for CMP EA,imm
2979: if (reg == modregrm(0,7,0))
2980: {
2981: if (mod == 0xC0 && rm == r1)
2982: c1->Irm = mod | reg | r2;
2983: }
2984: break;
2985:
2986: case 0x84: // TEST reg,byte ptr EA
2987: if (r1 >= 4 || r2 >= 4) // if not a byte register
2988: break;
2989: if ((rmn & 0xC0) == 0xC0)
2990: {
2991: if ((rmn & 3) == r1)
2992: { c1->Irm = rmn = (rmn & modregrm(3,7,4)) | r2;
2993: //printf("schedule 1\n");
2994: }
2995: }
2996: if ((rmn & modregrm(0,3,0)) == modregrm(0,r1,0))
2997: { c1->Irm = (rmn & modregrm(3,4,7)) | modregrm(0,r2,0);
2998: //printf("schedule 2\n");
2999: }
3000: break;
3001: case 0x85: // TEST reg,word ptr EA
3002: if ((rmn & 0xC0) == 0xC0)
3003: {
3004: if ((rmn & 7) == r1)
3005: { c1->Irm = rmn = (rmn & modregrm(3,7,0)) | r2;
3006: //printf("schedule 3\n");
3007: }
3008: }
3009: if ((rmn & modregrm(0,7,0)) == modregrm(0,r1,0))
3010: { c1->Irm = (rmn & modregrm(3,0,7)) | modregrm(0,r2,0);
3011: //printf("schedule 4\n");
3012: }
3013: break;
3014:
3015: case 0x89: // MOV EA,reg
3016: if ((rmn & modregrm(0,7,0)) == modregrm(0,r1,0))
3017: { c1->Irm = (rmn & modregrm(3,0,7)) | modregrm(0,r2,0);
3018: //printf("schedule 5\n");
3019: if (c1->Irm == modregrm(3,r2,r2))
3020: goto Lnop;
3021: }
3022: break;
3023:
3024: case 0x8B: // MOV reg,EA
3025: if ((rmn & 0xC0) == 0xC0 &&
3026: (rmn & 7) == r1) // if EA == r1
3027: { c1->Irm = (rmn & modregrm(3,7,0)) | r2;
3028: //printf("schedule 6\n");
3029: if (c1->Irm == modregrm(3,r2,r2))
3030: goto Lnop;
3031: }
3032: break;
3033:
3034: case 0x3C: // CMP AL,imm8
3035: if (r1 == AX && r2 < 4)
3036: { c1->Iop = 0x80;
3037: c1->Irm = modregrm(3,7,r2);
3038: //printf("schedule 7, r2 = %d\n", r2);
3039: }
3040: break;
3041:
3042: case 0x3D: // CMP AX,imm16
3043: if (r1 == AX)
3044: { c1->Iop = 0x81;
3045: c1->Irm = modregrm(3,7,r2);
3046: if (c1->IFL2 == FLconst &&
3047: c1->IEV2.Vuns == (signed char)c1->IEV2.Vuns)
3048: c1->Iop = 0x83;
3049: //printf("schedule 8\n");
3050: }
3051: break;
3052: }
3053: continue;
3054: Lnop:
3055: c1->Iop = NOP;
3056: c1 = cnext(c1);
3057: goto Ln;
3058: }
3059: L1: ;
warning C4102: 'L1' : unreferenced label
3060: return cstart;
3061: }
3062:
3063: /*****************************************************************/
3064:
3065: /**********************************************
3066: * Replace complex instructions with simple ones more conducive
3067: * to scheduling.
3068: */
3069:
3070: code *simpleops(code *c,regm_t scratch)
3071: { code *cstart;
3072: code **pc;
3073: unsigned reg;
3074: code *c2;
3075:
3076: // Worry about using registers not saved yet by prolog
3077: scratch &= ~fregsaved;
3078:
3079: if (!(scratch & (scratch - 1))) // if 0 or 1 registers
3080: return c;
3081:
3082: reg = findreg(scratch);
3083:
3084: cstart = c;
3085: for (pc = &cstart; *pc; pc = &code_next(*pc))
3086: {
3087: c = *pc;
3088: if (c->Iflags & (CFtarg | CFtarg2 | CFopsize))
3089: continue;
3090: if (c->Iop == 0x83 &&
3091: (c->Irm & modregrm(0,7,0)) == modregrm(0,7,0) &&
3092: (c->Irm & modregrm(3,0,0)) != modregrm(3,0,0)
3093: )
3094: { // Replace CMP mem,imm with:
3095: // MOV reg,mem
3096: // CMP reg,imm
3097: targ_long imm;
3098:
3099: //printf("replacing CMP\n");
3100: c->Iop = 0x8B;
3101: c->Irm = (c->Irm & modregrm(3,0,7)) | modregrm(0,reg,0);
3102:
3103: c2 = code_calloc();
3104: if (reg == AX)
3105: c2->Iop = 0x3D;
3106: else
3107: { c2->Iop = 0x83;
3108: c2->Irm = modregrm(3,7,reg);
3109: }
3110: c2->IFL2 = c->IFL2;
3111: c2->IEV2 = c->IEV2;
3112:
3113: // See if c2 should be replaced by a TEST
3114: imm = c2->IEV2.Vuns;
3115: if (!(c2->Iop & 1))
3116: imm &= 0xFF;
3117: else if (I32 ? c->Iflags & CFopsize : !(c->Iflags & CFopsize))
3118: imm = (short) imm;
3119: if (imm == 0)
3120: {
3121: c2->Iop = 0x85; // TEST reg,reg
3122: c2->Irm = modregrm(3,reg,reg);
3123: }
3124: goto L1;
3125: }
3126: else if (c->Iop == 0xFF &&
3127: (c->Irm & modregrm(0,7,0)) == modregrm(0,6,0) &&
3128: (c->Irm & modregrm(3,0,0)) != modregrm(3,0,0)
3129: )
3130: { // Replace PUSH mem with:
3131: // MOV reg,mem
3132: // PUSH reg
3133:
3134: // printf("replacing PUSH\n");
3135: c->Iop = 0x8B;
3136: c->Irm = (c->Irm & modregrm(3,0,7)) | modregrm(0,reg,0);
3137:
3138: c2 = gen1(NULL,0x50 + reg);
3139: L1:
3140: //c->print();
3141: //c2->print();
3142: c2->next = c->next;
3143: c->next = c2;
3144:
3145: // Switch to another reg
3146: if (scratch & ~mask[reg])
3147: reg = findreg(scratch & ~mask[reg]);
3148: }
3149: }
3150: return cstart;
3151: }
3152:
3153: #if DEBUG
3154: static const char *fpops[] = {"fstp","fld","fop"};
3155: void Cinfo::print()
3156: {
3157: Cinfo *ci = this;
3158:
3159: if (ci == NULL)
3160: {
3161: printf("Cinfo 0\n");
3162: return;
3163: }
3164:
3165: printf("Cinfo %p: c %p, pair %x, sz %d, isz %d, flags - ",
3166: ci,c,pair,sz,isz);
3167: if (ci->flags & CIFLarraybounds)
3168: printf("arraybounds,");
3169: if (ci->flags & CIFLea)
3170: printf("ea,");
3171: if (ci->flags & CIFLnostage)
3172: printf("nostage,");
3173: if (ci->flags & CIFLpush)
3174: printf("push,");
3175: if (ci->flags & ~(CIFLarraybounds|CIFLnostage|CIFLpush|CIFLea))
3176: printf("bad flag,");
3177: printf("\n\tr %lx w %lx a %lx reg %x uops %x sibmodrm %x spadjust %ld\n",
3178: (long)r,(long)w,(long)a,reg,uops,sibmodrm,(long)spadjust);
3179: if (ci->fp_op)
3180: printf("\tfp_op %s, fxch_pre %x, fxch_post %x\n",
3181: fpops[fp_op-1],fxch_pre,fxch_post);
3182: }
3183: #endif
3184: #endif
3185: