1: // Copyright (C) 1987-1995 by Symantec
2: // Copyright (C) 2000-2011 by Digital Mars
3: // All Rights Reserved
4: // http://www.digitalmars.com
5: // Written by Walter Bright
6: /*
7: * This source file is made available for personal use
8: * only. The license is in /dmd/src/dmd/backendlicense.txt
9: * or /dm/src/dmd/backendlicense.txt
10: * For any other uses, please contact Digital Mars.
11: */
12:
13: #if !SPP
14:
15: #include <stdio.h>
16: #include <string.h>
17: #include <time.h>
18: #include <math.h>
19: #include "cc.h"
20: #include "el.h"
21: #include "oper.h"
22: #include "code.h"
23: #include "global.h"
24:
25: static char __file__[] = __FILE__; /* for tassert.h */
26: #include "tassert.h"
27:
28: // Constants that the 8087 supports directly
29: // BUG: rewrite for 80 bit long doubles
30: #define PI 3.14159265358979323846
31: #define LOG2 0.30102999566398119521
32: #define LN2 0.6931471805599453094172321
33: #define LOG2T 3.32192809488736234787
34: #define LOG2E 1.4426950408889634074 /* 1/LN2 */
35:
36: #define FWAIT 0x9B /* FWAIT opcode */
37:
38: /* Mark variable referenced by e as not a register candidate */
39: #define notreg(e) ((e)->EV.sp.Vsym->Sflags &= ~GTregcand)
40:
41: /* Generate the appropriate ESC instruction */
42: #define ESC(MF,b) (0xD8 + ((MF) << 1) + (b))
43: enum MF
44: { // Values for MF
45: MFfloat = 0,
46: MFlong = 1,
47: MFdouble = 2,
48: MFword = 3
49: };
50:
51: NDP _8087elems[8]; // 8087 stack
52: NDP ndp_zero;
53:
54: int stackused = 0; /* number of items on the 8087 stack */
55:
56: /*********************************
57: */
58:
59: struct Dconst
60: {
61: int round;
62: symbol *roundto0;
63: symbol *roundtonearest;
64: };
65:
66: static Dconst oldd;
67:
68: #define NDPP 0 // print out debugging info
69: #define NOSAHF I64 // can't use SAHF instruction
70:
71: code *loadComplex(elem *e);
72: code *opmod_complex87(elem *e,regm_t *pretregs);
73: code *opass_complex87(elem *e,regm_t *pretregs);
74: code * genf2(code *c,unsigned op,unsigned rm);
75:
76: #define CW_roundto0 0xFBF
77: #define CW_roundtonearest 0x3BF
78:
79: STATIC code *genrnd(code *c, short cw);
80:
81: /**********************************
82: * When we need to temporarilly save 8087 registers, we record information
83: * about the save into an array of NDP structs:
84: */
85:
86: NDP *NDP::save = NULL;
87: int NDP::savemax = 0; /* # of entries in NDP::save[] */
88: int NDP::savetop = 0; /* # of entries used in NDP::save[] */
89:
90: #ifdef DEBUG
91: #define NDPSAVEINC 2 /* flush reallocation bugs */
92: #else
93: #define NDPSAVEINC 8 /* allocation chunk sizes */
94: #endif
95:
96: /****************************************
97: * Store/load to ndp save location i
98: */
99:
100: code *ndp_fstp(code *c, int i, tym_t ty)
101: { unsigned grex = I64 ? (REX_W << 16) : 0;
102: switch (tybasic(ty))
103: {
104: case TYfloat:
105: case TYifloat:
106: case TYcfloat:
107: c = genc1(c,0xD9,grex | modregrm(2,3,BPRM),FLndp,i); // FSTP m32real i[BP]
108: break;
109:
110: case TYdouble:
111: case TYdouble_alias:
112: case TYidouble:
113: case TYcdouble:
114: c = genc1(c,0xDD,grex | modregrm(2,3,BPRM),FLndp,i); // FSTP m64real i[BP]
115: break;
116:
117: case TYldouble:
118: case TYildouble:
119: case TYcldouble:
120: c = genc1(c,0xDB,grex | modregrm(2,7,BPRM),FLndp,i); // FSTP m80real i[BP]
121: break;
122:
123: default:
124: assert(0);
125: }
126: return c;
127: }
128:
129: code *ndp_fld(code *c, int i, tym_t ty)
130: { unsigned grex = I64 ? (REX_W << 16) : 0;
131: switch (tybasic(ty))
132: {
133: case TYfloat:
134: case TYifloat:
135: case TYcfloat:
136: c = genc1(c,0xD9,grex | modregrm(2,0,BPRM),FLndp,i);
137: break;
138:
139: case TYdouble:
140: case TYdouble_alias:
141: case TYidouble:
142: case TYcdouble:
143: c = genc1(c,0xDD,grex | modregrm(2,0,BPRM),FLndp,i);
144: break;
145:
146: case TYldouble:
147: case TYildouble:
148: case TYcldouble:
149: c = genc1(c,0xDB,grex | modregrm(2,5,BPRM),FLndp,i); // FLD m80real i[BP]
150: break;
151:
152: default:
153: assert(0);
154: }
155: return c;
156: }
157:
158: /**************************
159: * Return index of empty slot in NDP::save[].
160: */
161:
162: STATIC int getemptyslot()
163: { int i;
164:
165: for (i = 0; i < NDP::savemax; i++)
166: if (NDP::save[i].e == NULL)
167: goto L1;
168: /* Out of room, reallocate NDP::save[] */
169: NDP::save = (NDP *)mem_realloc(NDP::save,
170: (NDP::savemax + NDPSAVEINC) * sizeof(*NDP::save));
171: /* clear out new portion of NDP::save[] */
172: memset(NDP::save + NDP::savemax,0,NDPSAVEINC * sizeof(*NDP::save));
173: i = NDP::savemax;
174: NDP::savemax += NDPSAVEINC;
175:
176: L1: if (i >= NDP::savetop)
177: NDP::savetop = i + 1;
178: return i;
179: }
180:
181: /*********************************
182: * Pop 8087 stack.
183: */
184:
185: #undef pop87
186:
187: void pop87(
188: #ifdef DEBUG
189: int line, const char *file
190: #endif
191: )
192: #ifdef DEBUG
193: #define pop87() pop87(__LINE__,__FILE__)
194: #endif
195: {
196: int i;
197:
198: #if NDPP
199: dbg_printf("pop87(%s(%d): stackused=%d)\n", file, line, stackused);
200: #endif
201: --stackused;
202: assert(stackused >= 0);
203: for (i = 0; i < arraysize(_8087elems) - 1; i++)
204: _8087elems[i] = _8087elems[i + 1];
205: /* end of stack is nothing */
206: _8087elems[arraysize(_8087elems) - 1] = ndp_zero;
207: }
208:
209: /*******************************
210: * Push 8087 stack. Generate and return any code
211: * necessary to preserve anything that might run off the end of the stack.
212: */
213:
214: #undef push87
215:
216: #ifdef DEBUG
217: code *push87(int line, const char *file);
218: code *push87() { return push87(__LINE__,__FILE__); }
219: #endif
220:
221: code *push87(
222: #ifdef DEBUG
223: int line, const char *file
224: #endif
225: )
226: #ifdef DEBUG
227: #define push87() push87(__LINE__,__FILE__)
228: #endif
229: {
230: code *c;
231: int i;
232:
233: c = CNIL;
234: // if we would lose the top register off of the stack
235: if (_8087elems[7].e != NULL)
236: {
237: i = getemptyslot();
238: NDP::save[i] = _8087elems[7];
239: c = genf2(c,0xD9,0xF6); // FDECSTP
240: c = genfwait(c);
241: c = ndp_fstp(c, i, _8087elems[7].e->Ety); // FSTP i[BP]
242: assert(stackused == 8);
243: if (NDPP) dbg_printf("push87() : overflow\n");
244: }
245: else
246: {
247: #ifdef DEBUG
248: if (NDPP) dbg_printf("push87(%s(%d): %d)\n", file, line, stackused);
249: #endif
250: stackused++;
251: assert(stackused <= 8);
252: }
253: // Shift the stack up
254: for (i = 7; i > 0; i--)
255: _8087elems[i] = _8087elems[i - 1];
256: _8087elems[0] = ndp_zero;
257: return c;
258: }
259:
260: /*****************************
261: * Note elem e as being in ST(i) as being a value we want to keep.
262: */
263:
264: #ifdef DEBUG
265: void note87(elem *e, unsigned offset, int i, int linnum);
266: void note87(elem *e, unsigned offset, int i)
267: {
268: return note87(e, offset, i, 0);
269: }
270: void note87(elem *e, unsigned offset, int i, int linnum)
271: #define note87(e,offset,i) note87(e,offset,i,__LINE__)
272: #else
273: void note87(elem *e, unsigned offset, int i)
274: #endif
275: {
276: #if NDPP
277: printf("note87(e = %p.%d, i = %d, stackused = %d, line = %d)\n",e,offset,i,stackused,linnum);
278: #endif
279: #if 0 && DEBUG
280: if (_8087elems[i].e)
281: printf("_8087elems[%d].e = %p\n",i,_8087elems[i].e);
282: #endif
283: //if (i >= stackused) *(char*)0=0;
284: assert(i < stackused);
285: _8087elems[i].e = e;
warning C6386: Buffer overrun: accessing '_8087elems', the writable size is '64' bytes, but '8388488' bytes might be written: Lines: 284, 285
286: _8087elems[i].offset = offset;
287: }
288:
289: /****************************************************
290: * Exchange two entries in 8087 stack.
291: */
292:
293: void xchg87(int i, int j)
294: {
295: NDP save;
296:
297: save = _8087elems[i];
298: _8087elems[i] = _8087elems[j];
299: _8087elems[j] = save;
300: }
301:
302: /****************************
303: * Make sure that elem e is in register ST(i). Reload it if necessary.
304: * Input:
305: * i 0..3 8087 register number
306: * flag 1 don't bother with FXCH
307: */
308:
309: #ifdef DEBUG
310: STATIC code * makesure87(elem *e,unsigned offset,int i,unsigned flag,int linnum)
311: #define makesure87(e,offset,i,flag) makesure87(e,offset,i,flag,__LINE__)
312: #else
313: STATIC code * makesure87(elem *e,unsigned offset,int i,unsigned flag)
314: #endif
315: {
316: code *c;
317: int j;
318:
319: #ifdef DEBUG
320: if (NDPP) printf("makesure87(e=%p, offset=%d, i=%d, flag=%d, line=%d)\n",e,offset,i,flag,linnum);
321: #endif
322: assert(e && i < 4);
323: c = CNIL;
324: L1:
325: if (_8087elems[i].e != e || _8087elems[i].offset != offset)
326: {
327: #ifdef DEBUG
328: if (_8087elems[i].e)
329: printf("_8087elems[%d].e = %p, .offset = %d\n",i,_8087elems[i].e,_8087elems[i].offset);
330: #endif
331: assert(_8087elems[i].e == NULL);
332: for (j = 0; 1; j++)
333: {
334: if (j >= NDP::savetop && e->Eoper == OPcomma)
335: {
336: e = e->E2; // try right side
337: goto L1;
338: }
339: #ifdef DEBUG
340: if (j >= NDP::savetop)
341: printf("e = %p, NDP::savetop = %d\n",e,NDP::savetop);
342: #endif
343: assert(j < NDP::savetop);
344: //printf("\tNDP::save[%d] = %p, .offset = %d\n", j, NDP::save[j].e, NDP::save[j].offset);
345: if (e == NDP::save[j].e && offset == NDP::save[j].offset)
346: break;
347: }
348: c = push87();
349: c = genfwait(c);
350: c = ndp_fld(c, j, e->Ety); // FLD j[BP]
351: if (!(flag & 1))
352: {
353: while (i != 0)
354: {
355: genf2(c,0xD9,0xC8 + i); // FXCH ST(i)
356: i--;
357: }
358: }
359: NDP::save[j] = ndp_zero; // back in 8087
360: }
361: //_8087elems[i].e = NULL;
362: return c;
363: }
364:
365: /****************************
366: * Save in memory any values in the 8087 that we want to keep.
367: */
368:
369: code *save87()
370: {
371: code *c;
372: int i;
373:
374: c = CNIL;
375: while (_8087elems[0].e && stackused)
376: {
377: /* Save it */
378: i = getemptyslot();
379: if (NDPP) printf("saving %p in temporary NDP::save[%d]\n",_8087elems[0].e,i);
380: NDP::save[i] = _8087elems[0];
381:
382: c = genfwait(c);
383: c = ndp_fstp(c,i,_8087elems[0].e->Ety); // FSTP i[BP]
384: pop87();
385: }
386: if (c) /* if any stores */
387: genfwait(c); /* wait for last one to finish */
388: return c;
389: }
390:
391: /******************************************
392: * Save any noted values that would be destroyed by n pushes
393: */
394:
395: code *save87regs(unsigned n)
396: {
397: unsigned j;
398: unsigned k;
399: code *c = NULL;
400:
401: assert(n <= 7);
402: j = 8 - n;
403: if (stackused > j)
warning C4018: '>' : signed/unsigned mismatch
404: {
405: for (k = 8; k > j; k--)
406: {
407: c = genf2(c,0xD9,0xF6); // FDECSTP
408: c = genfwait(c);
409: if (k <= stackused)
warning C4018: '<=' : signed/unsigned mismatch
410: { int i;
411:
412: i = getemptyslot();
413: c = ndp_fstp(c, i, _8087elems[k - 1].e->Ety); // FSTP i[BP]
414: NDP::save[i] = _8087elems[k - 1];
415: _8087elems[k - 1] = ndp_zero;
416: }
417: }
418:
419: for (k = 8; k > j; k--)
420: {
421: if (k > stackused)
warning C4018: '>' : signed/unsigned mismatch
422: { c = genf2(c,0xD9,0xF7); // FINCSTP
423: c = genfwait(c);
424: }
425: }
426: stackused = j;
427: }
428: return c;
429: }
430:
431: /*************************************
432: * Find which, if any, slot on stack holds elem e.
433: */
434:
435: STATIC int cse_get(elem *e, unsigned offset)
436: { int i;
437:
438: for (i = 0; 1; i++)
439: {
440: if (i == stackused)
441: {
442: i = -1;
443: //printf("cse not found\n");
444: //elem_print(e);
445: break;
446: }
447: if (_8087elems[i].e == e &&
448: _8087elems[i].offset == offset)
449: { //printf("cse found %d\n",i);
450: //elem_print(e);
451: break;
452: }
453: }
454: return i;
455: }
456:
457: /*************************************
458: * Reload common subexpression.
459: */
460:
461: code *comsub87(elem *e,regm_t *pretregs)
462: { code *c;
463:
464: //printf("comsub87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
465: // Look on 8087 stack
466: int i = cse_get(e, 0);
467:
468: if (tycomplex(e->Ety))
469: {
470: unsigned sz = tysize(e->Ety);
471: int j = cse_get(e, sz / 2);
472: if (i >= 0 && j >= 0)
473: {
474: c = push87();
475: c = cat(c, push87());
476: c = genf2(c,0xD9,0xC0 + i); // FLD ST(i)
477: c = genf2(c,0xD9,0xC0 + j + 1); // FLD ST(j + 1)
478: c = cat(c,fixresult_complex87(e,mST01,pretregs));
479: }
480: else
481: // Reload
482: c = loaddata(e,pretregs);
483: }
484: else
485: {
486: if (i >= 0)
487: {
488: c = push87();
489: c = genf2(c,0xD9,0xC0 + i); // FLD ST(i)
490: if (*pretregs & XMMREGS)
491: c = cat(c,fixresult87(e,mST0,pretregs));
492: else
493: c = cat(c,fixresult(e,mST0,pretregs));
494: }
495: else
496: // Reload
497: c = loaddata(e,pretregs);
498: }
499:
500: freenode(e);
501: return c;
502: }
503:
504:
505: /**************************
506: * Generate code to deal with floatreg.
507: */
508:
509: code * genfltreg(code *c,unsigned opcode,unsigned reg,targ_size_t offset)
510: {
511: floatreg = TRUE;
512: reflocal = TRUE;
513: if ((opcode & ~7) == 0xD8)
514: c = genfwait(c);
515: return genc1(c,opcode,modregxrm(2,reg,BPRM),FLfltreg,offset);
516: }
517:
518: /*******************************
519: * Decide if we need to gen an FWAIT.
520: */
521:
522: code *genfwait(code *c)
523: {
524: if (ADDFWAIT())
525: c = gen1(c,FWAIT);
526: return c;
527: }
528:
529: /***************************************
530: * Generate floating point instruction.
531: */
532:
533: code * genf2(code *c,unsigned op,unsigned rm)
534: {
535: return gen2(genfwait(c),op,rm);
536: }
537:
538: /***************************
539: * Put the 8087 flags into the CPU flags.
540: */
541:
542: STATIC code * cg87_87topsw(code *c)
543: {
544: /* Note that SAHF is not available on some early I64 processors
545: * and will cause a seg fault
546: */
547: c = cat(c,getregs(mAX));
548: if (config.target_cpu >= TARGET_80286)
549: c = genf2(c,0xDF,0xE0); // FSTSW AX
550: else
551: { c = genfltreg(c,0xD8+5,7,0); /* FSTSW floatreg[BP] */
552: genfwait(c); /* FWAIT */
553: genfltreg(c,0x8A,4,1); /* MOV AH,floatreg+1[BP] */
554: }
555: gen1(c,0x9E); // SAHF
556: code_orflag(c,CFpsw);
557: return c;
558: }
559:
560: /***************************
561: * Set the PSW based on the state of ST0.
562: * Input:
563: * pop if stack should be popped after test
564: * Returns:
565: * start of code appended to c.
566: */
567:
568: STATIC code * genftst(code *c,elem *e,int pop)
569: {
570: if (NOSAHF)
571: {
572: c = cat(c,push87());
573: c = gen2(c,0xD9,0xEE); // FLDZ
574: gen2(c,0xDF,0xE9); // FUCOMIP ST1
575: pop87();
576: if (pop)
577: { c = genf2(c,0xDD,modregrm(3,3,0)); // FPOP
578: pop87();
579: }
580: }
581: else if (config.flags4 & CFG4fastfloat) // if fast floating point
582: {
583: c = genf2(c,0xD9,0xE4); // FTST
584: c = cg87_87topsw(c); // put 8087 flags in CPU flags
585: if (pop)
586: { c = genf2(c,0xDD,modregrm(3,3,0)); // FPOP
587: pop87();
588: }
589: }
590: else if (config.target_cpu >= TARGET_80386)
591: {
592: // FUCOMP doesn't raise exceptions on QNANs, unlike FTST
593: c = cat(c,push87());
594: c = gen2(c,0xD9,0xEE); // FLDZ
595: gen2(c,pop ? 0xDA : 0xDD,0xE9); // FUCOMPP / FUCOMP
596: pop87();
597: if (pop)
598: pop87();
599: cg87_87topsw(c); // put 8087 flags in CPU flags
600: }
601: else
602: {
603: // Call library function which does not raise exceptions
604: regm_t regm = 0;
605:
606: c = cat(c,callclib(e,CLIBftest,®m,0));
607: if (pop)
608: { c = genf2(c,0xDD,modregrm(3,3,0)); // FPOP
609: pop87();
610: }
611: }
612: return c;
613: }
614:
615: /*************************************
616: * Determine if there is a special 8087 instruction to load
617: * constant e.
618: * Input:
619: * im 0 load real part
620: * 1 load imaginary part
621: * Returns:
622: * opcode if found
623: * 0 if not
624: */
625:
626: unsigned char loadconst(elem *e, int im)
627: #if __DMC__
628: __in
629: {
630: elem_debug(e);
631: assert(im == 0 || im == 1);
632: }
633: __body
634: #endif
635: {
636: static float fval[7] =
637: {0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2};
warning C4305: 'initializing' : truncation from 'double' to 'float'
warning C4305: 'initializing' : truncation from 'double' to 'float'
warning C4305: 'initializing' : truncation from 'double' to 'float'
warning C4305: 'initializing' : truncation from 'double' to 'float'
warning C4305: 'initializing' : truncation from 'double' to 'float'
638: static double dval[7] =
639: {0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2};
640: static long double ldval[7] =
641: #if __APPLE__ || __FreeBSD__ || __OpenBSD__ || __sun&&__SVR4
642: #define M_PIl 0x1.921fb54442d1846ap+1L // 3.14159 fldpi
643: #define M_LOG2T_L 0x1.a934f0979a3715fcp+1L // 3.32193 fldl2t
644: #define M_LOG2El 0x1.71547652b82fe178p+0L // 1.4427 fldl2e
645: #define M_LOG2_L 0x1.34413509f79fef32p-2L // 0.30103 fldlg2
646: #define M_LN2l 0x1.62e42fefa39ef358p-1L // 0.693147 fldln2
647: {0.0,1.0,M_PIl,M_LOG2T_L,M_LOG2El,M_LOG2_L,M_LN2l};
648: #elif __GNUC__
649: // BUG: should get proper 80 bit values for these
650: #define M_LOG2T_L LOG2T
651: #define M_LOG2_L LOG2
652: {0.0,1.0,M_PIl,M_LOG2T_L,M_LOG2El,M_LOG2_L,M_LN2l};
653: #elif _MSC_VER
654: // BUG: should get proper 80 bit values for these
655: #define M_LOG2T_L LOG2T
656: #define M_LOG2_L LOG2
657: {0.0,1.0,PI,M_LOG2T_L,LOG2E,M_LOG2_L,LN2};
658: #else
659: {0.0,1.0,M_PI_L,M_LOG2T_L,M_LOG2E_L,M_LOG2_L,M_LN2_L};
660: #endif
661: static char opcode[7 + 1] =
662: /* FLDZ,FLD1,FLDPI,FLDL2T,FLDL2E,FLDLG2,FLDLN2,0 */
663: {0xEE,0xE8,0xEB,0xE9,0xEA,0xEC,0xED,0};
warning C4309: 'initializing' : truncation of constant value
warning C4309: 'initializing' : truncation of constant value
warning C4309: 'initializing' : truncation of constant value
warning C4309: 'initializing' : truncation of constant value
warning C4309: 'initializing' : truncation of constant value
warning C4309: 'initializing' : truncation of constant value
warning C4309: 'initializing' : truncation of constant value
664: int i;
665: targ_float f;
666: targ_double d;
667: targ_ldouble ld;
668: int sz;
669: int zero;
670: void *p;
671: static char zeros[sizeof(long double)];
672:
673: if (im == 0)
674: {
675: switch (tybasic(e->Ety))
676: {
677: case TYfloat:
678: case TYifloat:
679: case TYcfloat:
680: f = e->EV.Vfloat;
681: sz = 4;
682: p = &f;
683: break;
684:
685: case TYdouble:
686: case TYdouble_alias:
687: case TYidouble:
688: case TYcdouble:
689: d = e->EV.Vdouble;
690: sz = 8;
691: p = &d;
692: break;
693:
694: case TYldouble:
695: case TYildouble:
696: case TYcldouble:
697: ld = e->EV.Vldouble;
698: sz = 10;
699: p = &ld;
700: break;
701:
702: default:
703: assert(0);
704: }
705: }
706: else
707: {
708: switch (tybasic(e->Ety))
709: {
710: case TYcfloat:
711: f = e->EV.Vcfloat.im;
712: sz = 4;
713: p = &f;
714: break;
715:
716: case TYcdouble:
717: d = e->EV.Vcdouble.im;
718: sz = 8;
719: p = &d;
720: break;
721:
722: case TYcldouble:
723: ld = e->EV.Vcldouble.im;
724: sz = 10;
725: p = &ld;
726: break;
727:
728: default:
729: assert(0);
730: }
731: }
732:
733: // Note that for this purpose, -0 is not regarded as +0,
734: // since FLDZ loads a +0
735: zero = (memcmp(p, zeros, sz) == 0);
warning C6385: Invalid data: accessing 'argument 2', the readable size is '8' bytes, but '10' bytes might be read: Lines: 636, 638, 640, 661, 664, 665, 666, 667, 668, 669, 670, 671, 673, 708, 722, 723, 724, 725, 735
736: if (zero && config.target_cpu >= TARGET_PentiumPro)
737: return 0xEE; // FLDZ is the only one with 1 micro-op
738:
739: // For some reason, these instructions take more clocks
740: if (config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
741: return 0;
742:
743: if (zero)
744: return 0xEE;
745:
746: for (i = 1; i < arraysize(fval); i++)
747: {
748: switch (sz)
749: {
750: case 4:
751: if (fval[i] != f)
752: continue;
753: break;
754: case 8:
755: if (dval[i] != d)
756: continue;
757: break;
758: case 10:
759: if (ldval[i] != ld)
760: continue;
761: break;
762: default:
763: assert(0);
764: }
765: break;
766: }
767: return opcode[i];
768: }
769:
770: /******************************
771: * Given the result of an expression is in retregs,
772: * generate necessary code to return result in *pretregs.
773: */
774:
775:
776: code *fixresult87(elem *e,regm_t retregs,regm_t *pretregs)
777: {
778: regm_t regm;
779: tym_t tym;
780: code *c1,*c2;
781: unsigned sz;
782:
783: //printf("fixresult87(e = %p, retregs = x%x, *pretregs = x%x)\n", e,retregs,*pretregs);
784: //printf("fixresult87(e = %p, retregs = %s, *pretregs = %s)\n", e,regm_str(retregs),regm_str(*pretregs));
785: assert(!*pretregs || retregs);
786: c1 = CNIL;
787: c2 = CNIL;
788: tym = tybasic(e->Ety);
789: sz = tysize[tym];
790: //printf("tym = x%x, sz = %d\n", tym, sz);
791:
792: if (*pretregs & mST01)
793: return fixresult_complex87(e, retregs, pretregs);
794:
795: /* if retregs needs to be transferred into the 8087 */
796: if (*pretregs & mST0 && retregs & (mBP | ALLREGS))
797: {
798: assert(sz <= DOUBLESIZE);
799: if (!I16)
800: {
801:
802: if (*pretregs & mPSW)
803: { // Set flags
804: regm_t r = retregs | mPSW;
805: c1 = fixresult(e,retregs,&r);
806: }
807: c2 = push87();
808: if (sz == REGSIZE || (I64 && sz == 4))
809: {
810: unsigned reg = findreg(retregs);
811: c2 = genfltreg(c2,0x89,reg,0); // MOV fltreg,reg
812: genfltreg(c2,0xD9,0,0); // FLD float ptr fltreg
813: }
814: else
815: { unsigned msreg,lsreg;
816:
817: msreg = findregmsw(retregs);
818: lsreg = findreglsw(retregs);
819: c2 = genfltreg(c2,0x89,lsreg,0); // MOV fltreg,lsreg
820: genfltreg(c2,0x89,msreg,4); // MOV fltreg+4,msreg
821: genfltreg(c2,0xDD,0,0); // FLD double ptr fltreg
822: }
823: }
824: else
825: {
826: regm = (sz == FLOATSIZE) ? FLOATREGS : DOUBLEREGS;
827: regm |= *pretregs & mPSW;
828: c1 = fixresult(e,retregs,®m);
829: regm = 0; // don't worry about result from CLIBxxx
830: c2 = callclib(e,
831: ((sz == FLOATSIZE) ? CLIBfltto87 : CLIBdblto87),
832: ®m,0);
833: }
834: }
835: else if (*pretregs & (mBP | ALLREGS) && retregs & mST0)
836: { unsigned mf;
837: unsigned reg;
838:
839: assert(sz <= DOUBLESIZE);
840: mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
841: if (*pretregs & mPSW && !(retregs & mPSW))
842: c1 = genftst(c1,e,0);
843: /* FSTP floatreg */
844: pop87();
845: c1 = genfltreg(c1,ESC(mf,1),3,0);
846: genfwait(c1);
847: c2 = allocreg(pretregs,®,(sz == FLOATSIZE) ? TYfloat : TYdouble);
848: if (sz == FLOATSIZE)
849: {
850: if (!I16)
851: c2 = genfltreg(c2,0x8B,reg,0);
852: else
853: { c2 = genfltreg(c2,0x8B,reg,REGSIZE);
854: genfltreg(c2,0x8B,findreglsw(*pretregs),0);
855: }
856: }
857: else
858: { assert(sz == DOUBLESIZE);
859: if (I16)
860: { c2 = genfltreg(c2,0x8B,AX,6);
861: genfltreg(c2,0x8B,BX,4);
862: genfltreg(c2,0x8B,CX,2);
863: genfltreg(c2,0x8B,DX,0);
864: }
865: else if (I32)
866: { c2 = genfltreg(c2,0x8B,reg,REGSIZE);
867: genfltreg(c2,0x8B,findreglsw(*pretregs),0);
868: }
869: else // I64
870: {
871: c2 = genfltreg(c2,0x8B,reg,0);
872: code_orrex(c2, REX_W);
873: }
874: }
875: }
876: else if (*pretregs == 0 && retregs == mST0)
877: {
878: c1 = genf2(c1,0xDD,modregrm(3,3,0)); // FPOP
879: pop87();
880: }
881: else
882: { if (*pretregs & mPSW)
883: { if (!(retregs & mPSW))
884: { assert(retregs & mST0);
885: c1 = genftst(c1,e,!(*pretregs & mST0)); // FTST
886: }
887: }
888: if (*pretregs & mST0 && retregs & XMMREGS)
889: {
890: assert(sz <= DOUBLESIZE);
891: unsigned mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
892: // MOVD floatreg,XMM?
893: unsigned reg = findreg(retregs);
894: c1 = genfltreg(c1,0xF20F11,reg - XMM0,0);
895: c2 = push87();
896: c2 = genfltreg(c2,ESC(mf,1),0,0); // FLD float/double ptr fltreg
897: }
898: else if (retregs & mST0 && *pretregs & XMMREGS)
899: {
900: assert(sz <= DOUBLESIZE);
901: unsigned mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
902: // FSTP floatreg
903: pop87();
904: c1 = genfltreg(c1,ESC(mf,1),3,0);
905: genfwait(c1);
906: // MOVD XMM?,floatreg
907: unsigned reg;
908: c2 = allocreg(pretregs,®,(sz == FLOATSIZE) ? TYfloat : TYdouble);
909: c2 = genfltreg(c2,0xF20F10,reg -XMM0,0);
910: }
911: else
912: assert(!(*pretregs & mST0) || (retregs & mST0));
913: }
914: if (*pretregs & mST0)
915: note87(e,0,0);
916: return cat(c1,c2);
917: }
918:
919: /********************************
920: * Generate in-line 8087 code for the following operators:
921: * add
922: * min
923: * mul
924: * div
925: * cmp
926: */
927:
928: // Reverse the order that the op is done in
929: static const char oprev[9] = { -1,0,1,2,3,5,4,7,6 };
930:
931: code *orth87(elem *e,regm_t *pretregs)
932: {
933: unsigned op;
934: code *c1,*c2,*c3,*c4;
935: code *cx;
936: regm_t retregs;
937: regm_t resregm;
938: elem *e1;
939: elem *e2;
940: int e2oper;
941: int eoper;
942: unsigned sz2;
943: int clib = CLIBMAX; // initialize to invalid value
944: int reverse = 0;
945:
946: //printf("orth87(+e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
947: #if 1 // we could be evaluating / for side effects only
948: assert(*pretregs != 0);
949: #endif
950: retregs = mST0;
951: resregm = mST0;
952:
953: e1 = e->E1;
954: e2 = e->E2;
955: c3 = CNIL;
956: c4 = CNIL;
957: sz2 = tysize(e1->Ety);
958: if (tycomplex(e1->Ety))
959: sz2 /= 2;
960:
961: eoper = e->Eoper;
962: if (eoper == OPmul && e2->Eoper == OPconst && el_toldouble(e->E2) == 2.0L)
963: {
964: // Perform "mul 2.0" as fadd ST(0), ST
965: c1 = codelem(e1,&retregs,FALSE);
966: c1 = genf2(c1, 0xDC, 0xC0); // fadd ST(0), ST;
967: c2 = fixresult87(e,mST0,pretregs); // result is in ST(0).
968: freenode(e2);
969: return cat(c1,c2);
970: }
971:
972: if (OTrel(eoper))
973: eoper = OPeqeq;
974: #define X(op, ty1, ty2) (((op) << 16) + (ty1) * 256 + (ty2))
975: switch (X(eoper, tybasic(e1->Ety), tybasic(e2->Ety)))
976: {
977: case X(OPadd, TYfloat, TYfloat):
978: case X(OPadd, TYdouble, TYdouble):
979: case X(OPadd, TYdouble_alias, TYdouble_alias):
980: case X(OPadd, TYldouble, TYldouble):
981: case X(OPadd, TYldouble, TYdouble):
982: case X(OPadd, TYdouble, TYldouble):
983: case X(OPadd, TYifloat, TYifloat):
984: case X(OPadd, TYidouble, TYidouble):
985: case X(OPadd, TYildouble, TYildouble):
986: op = 0; // FADDP
987: break;
988:
989: case X(OPmin, TYfloat, TYfloat):
990: case X(OPmin, TYdouble, TYdouble):
991: case X(OPmin, TYdouble_alias, TYdouble_alias):
992: case X(OPmin, TYldouble, TYldouble):
993: case X(OPmin, TYldouble, TYdouble):
994: case X(OPmin, TYdouble, TYldouble):
995: case X(OPmin, TYifloat, TYifloat):
996: case X(OPmin, TYidouble, TYidouble):
997: case X(OPmin, TYildouble, TYildouble):
998: op = 4; // FSUBP
999: break;
1000:
1001: case X(OPmul, TYfloat, TYfloat):
1002: case X(OPmul, TYdouble, TYdouble):
1003: case X(OPmul, TYdouble_alias, TYdouble_alias):
1004: case X(OPmul, TYldouble, TYldouble):
1005: case X(OPmul, TYldouble, TYdouble):
1006: case X(OPmul, TYdouble, TYldouble):
1007: case X(OPmul, TYifloat, TYifloat):
1008: case X(OPmul, TYidouble, TYidouble):
1009: case X(OPmul, TYildouble, TYildouble):
1010: case X(OPmul, TYfloat, TYifloat):
1011: case X(OPmul, TYdouble, TYidouble):
1012: case X(OPmul, TYldouble, TYildouble):
1013: case X(OPmul, TYifloat, TYfloat):
1014: case X(OPmul, TYidouble, TYdouble):
1015: case X(OPmul, TYildouble, TYldouble):
1016: op = 1; // FMULP
1017: break;
1018:
1019: case X(OPdiv, TYfloat, TYfloat):
1020: case X(OPdiv, TYdouble, TYdouble):
1021: case X(OPdiv, TYdouble_alias, TYdouble_alias):
1022: case X(OPdiv, TYldouble, TYldouble):
1023: case X(OPdiv, TYldouble, TYdouble):
1024: case X(OPdiv, TYdouble, TYldouble):
1025: case X(OPdiv, TYifloat, TYifloat):
1026: case X(OPdiv, TYidouble, TYidouble):
1027: case X(OPdiv, TYildouble, TYildouble):
1028: op = 6; // FDIVP
1029: break;
1030:
1031: case X(OPmod, TYfloat, TYfloat):
1032: case X(OPmod, TYdouble, TYdouble):
1033: case X(OPmod, TYdouble_alias, TYdouble_alias):
1034: case X(OPmod, TYldouble, TYldouble):
1035: case X(OPmod, TYfloat, TYifloat):
1036: case X(OPmod, TYdouble, TYidouble):
1037: case X(OPmod, TYldouble, TYildouble):
1038: case X(OPmod, TYifloat, TYifloat):
1039: case X(OPmod, TYidouble, TYidouble):
1040: case X(OPmod, TYildouble, TYildouble):
1041: case X(OPmod, TYifloat, TYfloat):
1042: case X(OPmod, TYidouble, TYdouble):
1043: case X(OPmod, TYildouble, TYldouble):
1044: op = (unsigned) -1;
1045: break;
1046:
1047: case X(OPeqeq, TYfloat, TYfloat):
1048: case X(OPeqeq, TYdouble, TYdouble):
1049: case X(OPeqeq, TYdouble_alias, TYdouble_alias):
1050: case X(OPeqeq, TYldouble, TYldouble):
1051: case X(OPeqeq, TYifloat, TYifloat):
1052: case X(OPeqeq, TYidouble, TYidouble):
1053: case X(OPeqeq, TYildouble, TYildouble):
1054: assert(OTrel(e->Eoper));
1055: assert((*pretregs & mST0) == 0);
1056: c1 = codelem(e1,&retregs,FALSE);
1057: note87(e1,0,0);
1058: resregm = mPSW;
1059:
1060: if (rel_exception(e->Eoper) || config.flags4 & CFG4fastfloat)
1061: {
1062: if (cnst(e2) && !boolres(e2))
1063: {
1064: if (NOSAHF)
1065: {
1066: c1 = cat(c1,push87());
1067: c1 = gen2(c1,0xD9,0xEE); // FLDZ
1068: gen2(c1,0xDF,0xF1); // FCOMIP ST1
1069: pop87();
1070: }
1071: else
1072: { c1 = genf2(c1,0xD9,0xE4); // FTST
1073: c1 = cg87_87topsw(c1);
1074: }
1075: c2 = genf2(NULL,0xDD,modregrm(3,3,0)); // FPOP
1076: pop87();
1077: }
1078: else if (NOSAHF)
1079: {
1080: note87(e1,0,0);
1081: c2 = load87(e2,0,&retregs,e1,-1);
1082: c2 = cat(c2,makesure87(e1,0,1,0));
1083: resregm = 0;
1084: //c2 = genf2(c2,0xD9,0xC8 + 1); // FXCH ST1
1085: c2 = gen2(c2,0xDF,0xF1); // FCOMIP ST1
1086: pop87();
1087: genf2(c2,0xDD,modregrm(3,3,0)); // FPOP
1088: pop87();
1089: }
1090: else
1091: {
1092: c2 = load87(e2, 0, pretregs, e1, 3); // FCOMPP
1093: }
1094: }
1095: else
1096: {
1097: if (cnst(e2) && !boolres(e2) &&
1098: config.target_cpu < TARGET_80386)
1099: {
1100: regm_t regm = 0;
1101:
1102: c2 = callclib(e,CLIBftest0,®m,0);
1103: pop87();
1104: }
1105: else
1106: {
1107: note87(e1,0,0);
1108: c2 = load87(e2,0,&retregs,e1,-1);
1109: c2 = cat(c2,makesure87(e1,0,1,0));
1110: resregm = 0;
1111: if (NOSAHF)
1112: {
1113: c3 = gen2(CNIL,0xDF,0xE9); // FUCOMIP ST1
1114: pop87();
1115: genf2(c3,0xDD,modregrm(3,3,0)); // FPOP
1116: pop87();
1117: }
1118: else if (config.target_cpu >= TARGET_80386)
1119: {
1120: c3 = gen2(CNIL,0xDA,0xE9); // FUCOMPP
1121: c3 = cg87_87topsw(c3);
1122: pop87();
1123: pop87();
1124: }
1125: else
1126: // Call a function instead so that exceptions
1127: // are not generated.
1128: c3 = callclib(e,CLIBfcompp,&resregm,0);
1129: }
1130: }
1131:
1132: freenode(e2);
1133: return cat4(c1,c2,c3,c4);
1134:
1135: case X(OPadd, TYcfloat, TYcfloat):
1136: case X(OPadd, TYcdouble, TYcdouble):
1137: case X(OPadd, TYcldouble, TYcldouble):
1138: case X(OPadd, TYcfloat, TYfloat):
1139: case X(OPadd, TYcdouble, TYdouble):
1140: case X(OPadd, TYcldouble, TYldouble):
1141: case X(OPadd, TYfloat, TYcfloat):
1142: case X(OPadd, TYdouble, TYcdouble):
1143: case X(OPadd, TYldouble, TYcldouble):
1144: goto Lcomplex;
1145:
1146: case X(OPadd, TYifloat, TYcfloat):
1147: case X(OPadd, TYidouble, TYcdouble):
1148: case X(OPadd, TYildouble, TYcldouble):
1149: goto Lcomplex2;
1150:
1151: case X(OPmin, TYcfloat, TYcfloat):
1152: case X(OPmin, TYcdouble, TYcdouble):
1153: case X(OPmin, TYcldouble, TYcldouble):
1154: case X(OPmin, TYcfloat, TYfloat):
1155: case X(OPmin, TYcdouble, TYdouble):
1156: case X(OPmin, TYcldouble, TYldouble):
1157: case X(OPmin, TYfloat, TYcfloat):
1158: case X(OPmin, TYdouble, TYcdouble):
1159: case X(OPmin, TYldouble, TYcldouble):
1160: goto Lcomplex;
1161:
1162: case X(OPmin, TYifloat, TYcfloat):
1163: case X(OPmin, TYidouble, TYcdouble):
1164: case X(OPmin, TYildouble, TYcldouble):
1165: goto Lcomplex2;
1166:
1167: case X(OPmul, TYcfloat, TYcfloat):
1168: case X(OPmul, TYcdouble, TYcdouble):
1169: case X(OPmul, TYcldouble, TYcldouble):
1170: clib = CLIBcmul;
1171: goto Lcomplex;
1172:
1173: case X(OPdiv, TYcfloat, TYcfloat):
1174: case X(OPdiv, TYcdouble, TYcdouble):
1175: case X(OPdiv, TYcldouble, TYcldouble):
1176: case X(OPdiv, TYfloat, TYcfloat):
1177: case X(OPdiv, TYdouble, TYcdouble):
1178: case X(OPdiv, TYldouble, TYcldouble):
1179: case X(OPdiv, TYifloat, TYcfloat):
1180: case X(OPdiv, TYidouble, TYcdouble):
1181: case X(OPdiv, TYildouble, TYcldouble):
1182: clib = CLIBcdiv;
1183: goto Lcomplex;
1184:
1185: case X(OPdiv, TYifloat, TYfloat):
1186: case X(OPdiv, TYidouble, TYdouble):
1187: case X(OPdiv, TYildouble, TYldouble):
1188: op = 6; // FDIVP
1189: break;
1190:
1191: Lcomplex:
1192: c1 = loadComplex(e1);
1193: c2 = loadComplex(e2);
1194: c3 = makesure87(e1, sz2, 2, 0);
1195: c3 = cat(c3,makesure87(e1, 0, 3, 0));
1196: retregs = mST01;
1197: if (eoper == OPadd)
1198: {
1199: c4 = genf2(NULL, 0xDE, 0xC0+2); // FADDP ST(2),ST
1200: genf2(c4, 0xDE, 0xC0+2); // FADDP ST(2),ST
1201: pop87();
1202: pop87();
1203: }
1204: else if (eoper == OPmin)
1205: {
1206: c4 = genf2(NULL, 0xDE, 0xE8+2); // FSUBP ST(2),ST
1207: genf2(c4, 0xDE, 0xE8+2); // FSUBP ST(2),ST
1208: pop87();
1209: pop87();
1210: }
1211: else
1212: c4 = callclib(e, clib, &retregs, 0);
1213: c4 = cat(c4, fixresult_complex87(e, retregs, pretregs));
1214: return cat4(c1,c2,c3,c4);
1215:
1216: Lcomplex2:
1217: retregs = mST0;
1218: c1 = codelem(e1, &retregs, FALSE);
1219: note87(e1, 0, 0);
1220: c2 = loadComplex(e2);
1221: c3 = makesure87(e1, 0, 2, 0);
1222: retregs = mST01;
1223: if (eoper == OPadd)
1224: {
1225: c4 = genf2(NULL, 0xDE, 0xC0+2); // FADDP ST(2),ST
1226: }
1227: else if (eoper == OPmin)
1228: {
1229: c4 = genf2(NULL, 0xDE, 0xE8+2); // FSUBP ST(2),ST
1230: c4 = genf2(c4, 0xD9, 0xE0); // FCHS
1231: }
1232: else
1233: assert(0);
1234: pop87();
1235: c4 = genf2(c4, 0xD9, 0xC8 + 1); // FXCH ST(1)
1236: c4 = cat(c4, fixresult_complex87(e, retregs, pretregs));
1237: return cat4(c1,c2,c3,c4);
1238:
1239: case X(OPeqeq, TYcfloat, TYcfloat):
1240: case X(OPeqeq, TYcdouble, TYcdouble):
1241: case X(OPeqeq, TYcldouble, TYcldouble):
1242: case X(OPeqeq, TYcfloat, TYifloat):
1243: case X(OPeqeq, TYcdouble, TYidouble):
1244: case X(OPeqeq, TYcldouble, TYildouble):
1245: case X(OPeqeq, TYcfloat, TYfloat):
1246: case X(OPeqeq, TYcdouble, TYdouble):
1247: case X(OPeqeq, TYcldouble, TYldouble):
1248: case X(OPeqeq, TYifloat, TYcfloat):
1249: case X(OPeqeq, TYidouble, TYcdouble):
1250: case X(OPeqeq, TYildouble, TYcldouble):
1251: case X(OPeqeq, TYfloat, TYcfloat):
1252: case X(OPeqeq, TYdouble, TYcdouble):
1253: case X(OPeqeq, TYldouble, TYcldouble):
1254: case X(OPeqeq, TYfloat, TYifloat):
1255: case X(OPeqeq, TYdouble, TYidouble):
1256: case X(OPeqeq, TYldouble, TYildouble):
1257: case X(OPeqeq, TYifloat, TYfloat):
1258: case X(OPeqeq, TYidouble, TYdouble):
1259: case X(OPeqeq, TYildouble, TYldouble):
1260: c1 = loadComplex(e1);
1261: c2 = loadComplex(e2);
1262: c3 = makesure87(e1, sz2, 2, 0);
1263: c3 = cat(c3,makesure87(e1, 0, 3, 0));
1264: retregs = 0;
1265: c4 = callclib(e, CLIBccmp, &retregs, 0);
1266: return cat4(c1,c2,c3,c4);
1267:
1268:
1269: case X(OPadd, TYfloat, TYifloat):
1270: case X(OPadd, TYdouble, TYidouble):
1271: case X(OPadd, TYldouble, TYildouble):
1272: case X(OPadd, TYifloat, TYfloat):
1273: case X(OPadd, TYidouble, TYdouble):
1274: case X(OPadd, TYildouble, TYldouble):
1275:
1276: case X(OPmin, TYfloat, TYifloat):
1277: case X(OPmin, TYdouble, TYidouble):
1278: case X(OPmin, TYldouble, TYildouble):
1279: case X(OPmin, TYifloat, TYfloat):
1280: case X(OPmin, TYidouble, TYdouble):
1281: case X(OPmin, TYildouble, TYldouble):
1282: retregs = mST0;
1283: c1 = codelem(e1, &retregs, FALSE);
1284: note87(e1, 0, 0);
1285: c2 = codelem(e2, &retregs, FALSE);
1286: c3 = makesure87(e1, 0, 1, 0);
1287: if (eoper == OPmin)
1288: c3 = genf2(c3, 0xD9, 0xE0); // FCHS
1289: if (tyimaginary(e1->Ety))
1290: c3 = genf2(c3, 0xD9, 0xC8 + 1); // FXCH ST(1)
1291: retregs = mST01;
1292: c4 = fixresult_complex87(e, retregs, pretregs);
1293: return cat4(c1,c2,c3,c4);
1294:
1295: case X(OPadd, TYcfloat, TYifloat):
1296: case X(OPadd, TYcdouble, TYidouble):
1297: case X(OPadd, TYcldouble, TYildouble):
1298: op = 0;
1299: goto Lci;
1300:
1301: case X(OPmin, TYcfloat, TYifloat):
1302: case X(OPmin, TYcdouble, TYidouble):
1303: case X(OPmin, TYcldouble, TYildouble):
1304: op = 4;
1305: goto Lci;
1306:
1307: Lci:
1308: c1 = loadComplex(e1);
1309: retregs = mST0;
1310: c2 = load87(e2,sz2,&retregs,e1,op);
1311: freenode(e2);
1312: retregs = mST01;
1313: c3 = makesure87(e1,0,1,0);
1314: c4 = fixresult_complex87(e, retregs, pretregs);
1315: return cat4(c1,c2,c3,c4);
1316:
1317: case X(OPmul, TYcfloat, TYfloat):
1318: case X(OPmul, TYcdouble, TYdouble):
1319: case X(OPmul, TYcldouble, TYldouble):
1320: c1 = loadComplex(e1);
1321: goto Lcm1;
1322:
1323: case X(OPmul, TYcfloat, TYifloat):
1324: case X(OPmul, TYcdouble, TYidouble):
1325: case X(OPmul, TYcldouble, TYildouble):
1326: c1 = loadComplex(e1);
1327: c1 = genf2(c1, 0xD9, 0xE0); // FCHS
1328: genf2(c1,0xD9,0xC8 + 1); // FXCH ST(1)
1329: if (elemisone(e2))
1330: {
1331: freenode(e2);
1332: c2 = NULL;
1333: c3 = NULL;
1334: goto Lcd4;
1335: }
1336: goto Lcm1;
1337:
1338: Lcm1:
1339: retregs = mST0;
1340: c2 = codelem(e2, &retregs, FALSE);
1341: c3 = makesure87(e1, sz2, 1, 0);
1342: c3 = cat(c3,makesure87(e1, 0, 2, 0));
1343: goto Lcm2;
1344:
1345: case X(OPmul, TYfloat, TYcfloat):
1346: case X(OPmul, TYdouble, TYcdouble):
1347: case X(OPmul, TYldouble, TYcldouble):
1348: retregs = mST0;
1349: c1 = codelem(e1, &retregs, FALSE);
1350: note87(e1, 0, 0);
1351: c2 = loadComplex(e2);
1352: c3 = makesure87(e1, 0, 2, 0);
1353: c3 = genf2(c3,0xD9,0xC8 + 1); // FXCH ST(1)
1354: genf2(c3,0xD9,0xC8 + 2); // FXCH ST(2)
1355: goto Lcm2;
1356:
1357: case X(OPmul, TYifloat, TYcfloat):
1358: case X(OPmul, TYidouble, TYcdouble):
1359: case X(OPmul, TYildouble, TYcldouble):
1360: retregs = mST0;
1361: c1 = codelem(e1, &retregs, FALSE);
1362: note87(e1, 0, 0);
1363: c2 = loadComplex(e2);
1364: c3 = makesure87(e1, 0, 2, 0);
1365: c3 = genf2(c3, 0xD9, 0xE0); // FCHS
1366: genf2(c3,0xD9,0xC8 + 2); // FXCH ST(2)
1367: goto Lcm2;
1368:
1369: Lcm2:
1370: c3 = genf2(c3,0xDC,0xC8 + 2); // FMUL ST(2), ST
1371: genf2(c3,0xDE,0xC8 + 1); // FMULP ST(1), ST
1372: goto Lcd3;
1373:
1374: case X(OPdiv, TYcfloat, TYfloat):
1375: case X(OPdiv, TYcdouble, TYdouble):
1376: case X(OPdiv, TYcldouble, TYldouble):
1377: c1 = loadComplex(e1);
1378: retregs = mST0;
1379: c2 = codelem(e2, &retregs, FALSE);
1380: c3 = makesure87(e1, sz2, 1, 0);
1381: c3 = cat(c3,makesure87(e1, 0, 2, 0));
1382: goto Lcd1;
1383:
1384: case X(OPdiv, TYcfloat, TYifloat):
1385: case X(OPdiv, TYcdouble, TYidouble):
1386: case X(OPdiv, TYcldouble, TYildouble):
1387: c1 = loadComplex(e1);
1388: c1 = genf2(c1,0xD9,0xC8 + 1); // FXCH ST(1)
1389: xchg87(0, 1);
1390: genf2(c1, 0xD9, 0xE0); // FCHS
1391: retregs = mST0;
1392: c2 = codelem(e2, &retregs, FALSE);
1393: c3 = makesure87(e1, 0, 1, 0);
1394: c3 = cat(c3,makesure87(e1, sz2, 2, 0));
1395: Lcd1:
1396: c3 = genf2(c3,0xDC,0xF8 + 2); // FDIV ST(2), ST
1397: genf2(c3,0xDE,0xF8 + 1); // FDIVP ST(1), ST
1398: Lcd3:
1399: pop87();
1400: Lcd4:
1401: retregs = mST01;
1402: c4 = fixresult_complex87(e, retregs, pretregs);
1403: return cat4(c1, c2, c3, c4);
1404:
1405: case X(OPmod, TYcfloat, TYfloat):
1406: case X(OPmod, TYcdouble, TYdouble):
1407: case X(OPmod, TYcldouble, TYldouble):
1408: case X(OPmod, TYcfloat, TYifloat):
1409: case X(OPmod, TYcdouble, TYidouble):
1410: case X(OPmod, TYcldouble, TYildouble):
1411: /*
1412: fld E1.re
1413: fld E1.im
1414: fld E2
1415: fxch ST(1)
1416: FM1: fprem
1417: fstsw word ptr sw
1418: fwait
1419: mov AH, byte ptr sw+1
1420: jp FM1
1421: fxch ST(2)
1422: FM2: fprem
1423: fstsw word ptr sw
1424: fwait
1425: mov AH, byte ptr sw+1
1426: jp FM2
1427: fstp ST(1)
1428: fxch ST(1)
1429: */
1430: c1 = loadComplex(e1);
1431: retregs = mST0;
1432: c2 = codelem(e2, &retregs, FALSE);
1433: c3 = makesure87(e1, sz2, 1, 0);
1434: c3 = cat(c3,makesure87(e1, 0, 2, 0));
1435: c3 = genf2(c3, 0xD9, 0xC8 + 1); // FXCH ST(1)
1436:
1437: cx = gen2(NULL, 0xD9, 0xF8); // FPREM
1438: cx = cg87_87topsw(cx);
1439: cx = genjmp(cx, JP, FLcode, (block *)cx); // JP FM1
1440: cx = genf2(cx, 0xD9, 0xC8 + 2); // FXCH ST(2)
1441: c3 = cat(c3,cx);
1442:
1443: cx = gen2(NULL, 0xD9, 0xF8); // FPREM
1444: cx = cg87_87topsw(cx);
1445: cx = genjmp(cx, JP, FLcode, (block *)cx); // JP FM2
1446: cx = genf2(cx,0xDD,0xD8 + 1); // FSTP ST(1)
1447: cx = genf2(cx, 0xD9, 0xC8 + 1); // FXCH ST(1)
1448: c3 = cat(c3,cx);
1449:
1450: goto Lcd3;
1451:
1452: default:
1453: #ifdef DEBUG
1454: elem_print(e);
1455: #endif
1456: assert(0);
1457: break;
1458: }
1459: #undef X
1460:
1461: e2oper = e2->Eoper;
1462:
1463: /* Move double-sized operand into the second position if there's a chance
1464: * it will allow combining a load with an operation (DMD Bugzilla 2905)
1465: */
1466: if ( ((tybasic(e1->Ety) == TYdouble)
1467: && ((e1->Eoper == OPvar) || (e1->Eoper == OPconst))
1468: && (tybasic(e2->Ety) != TYdouble)) ||
1469: (e1->Eoper == OPconst) ||
1470: (e1->Eoper == OPvar &&
1471: ((e1->Ety & (mTYconst | mTYimmutable) && !OTleaf(e2oper)) ||
1472: (e2oper == OPd_f &&
1473: (e2->E1->Eoper == OPs32_d || e2->E1->Eoper == OPs64_d || e2->E1->Eoper == OPs16_d) &&
1474: e2->E1->E1->Eoper == OPvar
1475: ) ||
1476: ((e2oper == OPs32_d || e2oper == OPs64_d || e2oper == OPs16_d) &&
1477: e2->E1->Eoper == OPvar
1478: )
1479: )
1480: )
1481: )
1482: { // Reverse order of evaluation
1483: e1 = e->E2;
1484: e2 = e->E1;
1485: op = oprev[op + 1];
1486: reverse ^= 1;
1487: }
1488:
1489: c1 = codelem(e1,&retregs,FALSE);
1490: note87(e1,0,0);
1491:
1492: if (config.flags4 & CFG4fdivcall && e->Eoper == OPdiv)
1493: {
1494: regm_t retregs = mST0;
warning C6246: Local declaration of 'retregs' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '936' of 'c:\projects\extern\d\dmd\src\backend\cg87.c': Lines: 936
1495: c2 = load87(e2,0,&retregs,e1,-1);
1496: c2 = cat(c2,makesure87(e1,0,1,0));
1497: if (op == 7) // if reverse divide
1498: c2 = genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1)
1499: c2 = cat(c2,callclib(e,CLIBfdiv87,&retregs,0));
1500: pop87();
1501: resregm = mST0;
1502: freenode(e2);
1503: c4 = fixresult87(e,resregm,pretregs);
1504: }
1505: else if (e->Eoper == OPmod)
1506: {
1507: /*
1508: * fld tbyte ptr y
1509: * fld tbyte ptr x // ST = x, ST1 = y
1510: * FM1: // We don't use fprem1 because for some inexplicable
1511: * // reason we get -5 when we do _modulo(15, 10)
1512: * fprem // ST = ST % ST1
1513: * fstsw word ptr sw
1514: * fwait
1515: * mov AH,byte ptr sw+1 // get msb of status word in AH
1516: * sahf // transfer to flags
1517: * jp FM1 // continue till ST < ST1
1518: * fstp ST(1) // leave remainder on stack
1519: */
1520: regm_t retregs = mST0;
warning C6246: Local declaration of 'retregs' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '936' of 'c:\projects\extern\d\dmd\src\backend\cg87.c': Lines: 936
1521: c2 = load87(e2,0,&retregs,e1,-1);
1522: c2 = cat(c2,makesure87(e1,0,1,0)); // now have x,y on stack; need y,x
1523: if (!reverse) // if not reverse modulo
1524: c2 = genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1)
1525:
1526: c3 = gen2(NULL, 0xD9, 0xF8); // FM1: FPREM
1527: c3 = cg87_87topsw(c3);
1528: c3 = genjmp(c3, JP, FLcode, (block *)c3); // JP FM1
1529: c3 = genf2(c3,0xDD,0xD8 + 1); // FSTP ST(1)
1530:
1531: pop87();
1532: resregm = mST0;
1533: freenode(e2);
1534: c4 = fixresult87(e,resregm,pretregs);
1535: }
1536: else
1537: { c2 = load87(e2,0,pretregs,e1,op);
1538: freenode(e2);
1539: }
1540: if (*pretregs & mST0)
1541: note87(e,0,0);
1542: //printf("orth87(-e = %p, *pretregs = x%x)\n", e, *pretregs);
1543: return cat4(c1,c2,c3,c4);
1544: }
1545:
1546: /*****************************
1547: * Load e into ST01.
1548: */
1549:
1550: code *loadComplex(elem *e)
1551: { int sz;
1552: regm_t retregs;
1553: code *c;
1554:
1555: sz = tysize(e->Ety);
1556: switch (tybasic(e->Ety))
1557: {
1558: case TYfloat:
1559: case TYdouble:
1560: case TYldouble:
1561: retregs = mST0;
1562: c = codelem(e,&retregs,FALSE);
1563: // Convert to complex with a 0 for the imaginary part
1564: c = cat(c, push87());
1565: c = gen2(c,0xD9,0xEE); // FLDZ
1566: break;
1567:
1568: case TYifloat:
1569: case TYidouble:
1570: case TYildouble:
1571: // Convert to complex with a 0 for the real part
1572: c = push87();
1573: c = gen2(c,0xD9,0xEE); // FLDZ
1574: retregs = mST0;
1575: c = cat(c, codelem(e,&retregs,FALSE));
1576: break;
1577:
1578: case TYcfloat:
1579: case TYcdouble:
1580: case TYcldouble:
1581: sz /= 2;
1582: retregs = mST01;
1583: c = codelem(e,&retregs,FALSE);
1584: break;
1585:
1586: default:
1587: assert(0);
1588: }
1589: note87(e, 0, 1);
1590: note87(e, sz, 0);
1591: return c;
1592: }
1593:
1594: /*************************
1595: * If op == -1, load expression e into ST0.
1596: * else compute (eleft op e), eleft is in ST0.
1597: * Must follow same logic as cmporder87();
1598: */
1599:
1600: code *load87(elem *e,unsigned eoffset,regm_t *pretregs,elem *eleft,int op)
1601: {
1602: code *ccomma,*c,*c2,*cpush;
1603: code cs;
1604: regm_t retregs;
1605: unsigned reg,mf,mf1;
1606: int opr;
1607: unsigned char ldop;
1608: tym_t ty;
1609: int i;
1610:
1611: #if NDPP
1612: printf("+load87(e=%p, eoffset=%d, *pretregs=%s, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,regm_str(*pretregs),eleft,op,stackused);
1613: #endif
1614: elem_debug(e);
1615: ccomma = NULL;
1616: cpush = NULL;
1617: if (ADDFWAIT())
1618: cs.Iflags = CFwait;
1619: else
1620: cs.Iflags = 0;
1621: cs.Irex = 0;
1622: opr = oprev[op + 1];
1623: ty = tybasic(e->Ety);
1624: if ((ty == TYldouble || ty == TYildouble) &&
1625: op != -1 && e->Eoper != OPd_ld)
1626: goto Ldefault;
1627: mf = (ty == TYfloat || ty == TYifloat || ty == TYcfloat) ? MFfloat : MFdouble;
1628: L5:
1629: switch (e->Eoper)
1630: {
1631: case OPcomma:
1632: ccomma = docommas(&e);
1633: // if (op != -1)
1634: // ccomma = cat(ccomma,makesure87(eleft,eoffset,0,0));
1635: goto L5;
1636:
1637: case OPvar:
1638: notreg(e);
1639: case OPind:
1640: L2:
warning C4102: 'L2' : unreferenced label
1641: if (op != -1)
1642: {
1643: if (e->Ecount && e->Ecount != e->Ecomsub &&
1644: (i = cse_get(e, 0)) >= 0)
1645: { static unsigned char b2[8] = {0xC0,0xC8,0xD0,0xD8,0xE0,0xE8,0xF0,0xF8};
1646:
1647: c = genf2(NULL,0xD8,b2[op] + i); // Fop ST(i)
1648: }
1649: else
1650: {
1651: c = getlvalue(&cs,e,0);
1652: if (I64)
1653: cs.Irex &= ~REX_W; // don't use for x87 ops
1654: c = cat(c,makesure87(eleft,eoffset,0,0));
1655: cs.Iop = ESC(mf,0);
1656: cs.Irm |= modregrm(0,op,0);
1657: c = gen(c,&cs);
1658: }
1659: }
1660: else
1661: {
1662: cpush = push87();
1663: switch (ty)
1664: {
1665: case TYfloat:
1666: case TYdouble:
1667: case TYifloat:
1668: case TYidouble:
1669: case TYcfloat:
1670: case TYcdouble:
1671: case TYdouble_alias:
1672: c = loadea(e,&cs,ESC(mf,1),0,0,0,0); // FLD var
1673: break;
1674: case TYldouble:
1675: case TYildouble:
1676: case TYcldouble:
1677: c = loadea(e,&cs,0xDB,5,0,0,0); // FLD var
1678: break;
1679: default:
1680: // __debug printf("ty = x%x\n", ty);
1681: assert(0);
1682: break;
1683: }
1684: note87(e,0,0);
1685: }
1686: break;
1687: case OPd_f:
1688: case OPf_d:
1689: case OPd_ld:
1690: mf1 = (tybasic(e->E1->Ety) == TYfloat || tybasic(e->E1->Ety) == TYifloat)
1691: ? MFfloat : MFdouble;
1692: if (op != -1 && stackused)
1693: note87(eleft,eoffset,0); // don't trash this value
1694: if (e->E1->Eoper == OPvar || e->E1->Eoper == OPind)
1695: {
1696: #if 1
1697: L4:
1698: c = getlvalue(&cs,e->E1,0);
1699: cs.Iop = ESC(mf1,0);
1700: if (ADDFWAIT())
1701: cs.Iflags |= CFwait;
1702: if (!I16)
1703: cs.Iflags &= ~CFopsize;
1704: if (op != -1)
1705: { cs.Irm |= modregrm(0,op,0);
1706: c = cat(c,makesure87(eleft,eoffset,0,0));
1707: }
1708: else
1709: { cs.Iop |= 1;
1710: c = cat(c,push87());
1711: }
1712: c = gen(c,&cs); /* FLD / Fop */
1713: #else
1714: c = loadea(e->E1,&cs,ESC(mf1,1),0,0,0,0); /* FLD e->E1 */
1715: #endif
1716: /* Variable cannot be put into a register anymore */
1717: if (e->E1->Eoper == OPvar)
1718: notreg(e->E1);
1719: freenode(e->E1);
1720: }
1721: else
1722: {
1723: retregs = mST0;
1724: c = codelem(e->E1,&retregs,FALSE);
1725: if (op != -1)
1726: { c = cat(c,makesure87(eleft,eoffset,1,0));
1727: c = genf2(c,0xDE,modregrm(3,opr,1)); // FopRP
1728: pop87();
1729: }
1730: }
1731: break;
1732:
1733: case OPs64_d:
1734: if (e->E1->Eoper == OPvar ||
1735: (e->E1->Eoper == OPind && e->E1->Ecount == 0))
1736: {
1737: c = getlvalue(&cs,e->E1,0);
1738: cs.Iop = 0xDF;
1739: if (ADDFWAIT())
1740: cs.Iflags |= CFwait;
1741: if (!I16)
1742: cs.Iflags &= ~CFopsize;
1743: c = cat(c,push87());
1744: cs.Irm |= modregrm(0,5,0);
1745: c = gen(c,&cs); // FILD m64
1746: // Variable cannot be put into a register anymore
1747: if (e->E1->Eoper == OPvar)
1748: notreg(e->E1);
1749: freenode(e->E1);
1750: }
1751: else if (I64)
1752: {
1753: retregs = ALLREGS;
1754: c = codelem(e->E1,&retregs,FALSE);
1755: reg = findreg(retregs);
1756: c = genfltreg(c,0x89,reg,0); // MOV floatreg,reg
1757: code_orrex(c, REX_W);
1758: c = cat(c,push87());
1759: c = genfltreg(c,0xDF,5,0); // FILD long long ptr floatreg
1760: }
1761: else
1762: {
1763: retregs = ALLREGS;
1764: c = codelem(e->E1,&retregs,FALSE);
1765: reg = findreglsw(retregs);
1766: c = genfltreg(c,0x89,reg,0); // MOV floatreg,reglsw
1767: reg = findregmsw(retregs);
1768: c = genfltreg(c,0x89,reg,4); // MOV floatreg+4,regmsw
1769: c = cat(c,push87());
1770: c = genfltreg(c,0xDF,5,0); // FILD long long ptr floatreg
1771: }
1772: if (op != -1)
1773: { c = cat(c,makesure87(eleft,eoffset,1,0));
1774: c = genf2(c,0xDE,modregrm(3,opr,1)); // FopRP
1775: pop87();
1776: }
1777: break;
1778:
1779: case OPconst:
1780: ldop = loadconst(e, 0);
1781: if (ldop)
1782: {
1783: cpush = push87();
1784: c = genf2(NULL,0xD9,ldop); // FLDx
1785: if (op != -1)
1786: { genf2(c,0xDE,modregrm(3,opr,1)); // FopRP
1787: pop87();
1788: }
1789: }
1790: else
1791: {
1792: assert(0);
1793: }
1794: break;
1795:
1796: case OPu16_d:
1797: {
1798: /* This opcode should never be generated */
1799: /* (probably shouldn't be for 16 bit code too) */
1800: assert(!I32);
1801:
1802: if (op != -1)
1803: note87(eleft,eoffset,0); // don't trash this value
1804: retregs = ALLREGS & mLSW;
1805: c = codelem(e->E1,&retregs,FALSE);
1806: c = regwithvalue(c,ALLREGS & mMSW,0,®,0); // 0-extend
1807: retregs |= mask[reg];
1808: mf1 = MFlong;
1809: goto L3;
1810: }
1811: case OPs16_d: mf1 = MFword; goto L6;
1812: case OPs32_d: mf1 = MFlong; goto L6;
1813: L6:
1814: if (op != -1)
1815: note87(eleft,eoffset,0); // don't trash this value
1816: if (e->E1->Eoper == OPvar ||
1817: (e->E1->Eoper == OPind && e->E1->Ecount == 0))
1818: {
1819: goto L4;
1820: }
1821: else
1822: {
1823: retregs = ALLREGS;
1824: c = codelem(e->E1,&retregs,FALSE);
1825: L3:
1826: if (I16 && e->Eoper != OPs16_d)
1827: {
1828: /* MOV floatreg+2,reg */
1829: reg = findregmsw(retregs);
1830: c = genfltreg(c,0x89,reg,REGSIZE);
1831: retregs &= mLSW;
1832: }
1833: reg = findreg(retregs);
1834: c = genfltreg(c,0x89,reg,0); /* MOV floatreg,reg */
1835: if (op != -1)
1836: { c = cat(c,makesure87(eleft,eoffset,0,0));
1837: genfltreg(c,ESC(mf1,0),op,0); /* Fop floatreg */
1838: }
1839: else
1840: {
1841: /* FLD long ptr floatreg */
1842: c = cat(c,push87());
1843: c = genfltreg(c,ESC(mf1,1),0,0);
1844: }
1845: }
1846: break;
1847: default:
1848: Ldefault:
1849: retregs = mST0;
1850: #if 1 /* Do this instead of codelem() to avoid the freenode(e).
1851: We also lose CSE capability */
1852: if (e->Eoper == OPconst)
1853: {
1854: c = load87(e, 0, &retregs, NULL, -1);
1855: }
1856: else
1857: c = (*cdxxx[e->Eoper])(e,&retregs);
1858: #else
1859: c = codelem(e,&retregs,FALSE);
1860: #endif
1861: if (op != -1)
1862: {
1863: c = cat(c,makesure87(eleft,eoffset,1,(op == 0 || op == 1)));
1864: pop87();
1865: if (op == 4 || op == 6) // sub or div
1866: { code *cl;
1867:
1868: cl = code_last(c);
1869: if (cl && cl->Iop == 0xD9 && cl->Irm == 0xC9) // FXCH ST(1)
1870: { cl->Iop = NOP;
1871: opr = op; // reverse operands
1872: }
1873: }
1874: c = genf2(c,0xDE,modregrm(3,opr,1)); // FopRP
1875: }
1876: break;
1877: }
1878: if (op == 3) // FCOMP
1879: { pop87(); // extra pop was done
1880: cg87_87topsw(c);
1881: }
1882: c2 = fixresult87(e,((op == 3) ? mPSW : mST0),pretregs);
1883: #if NDPP
1884: printf("-load87(e=%p, eoffset=%d, *pretregs=%s, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,regm_str(*pretregs),eleft,op,stackused);
1885: #endif
1886: return cat4(ccomma,cpush,c,c2);
1887: }
1888:
1889: /********************************
1890: * Determine if a compare is to be done forwards (return 0)
1891: * or backwards (return 1).
1892: * Must follow same logic as load87().
1893: */
1894:
1895: int cmporder87(elem *e)
1896: {
1897: //printf("cmporder87(%p)\n",e);
1898: L1:
1899: switch (e->Eoper)
1900: {
1901: case OPcomma:
1902: e = e->E2;
1903: goto L1;
1904:
1905: case OPd_f:
1906: case OPf_d:
1907: case OPd_ld:
1908: if (e->E1->Eoper == OPvar || e->E1->Eoper == OPind)
1909: goto ret0;
1910: else
1911: goto ret1;
1912:
1913: case OPconst:
1914: if (loadconst(e, 0) || tybasic(e->Ety) == TYldouble
1915: || tybasic(e->Ety) == TYildouble)
1916: {
1917: //printf("ret 1, loadconst(e) = %d\n", loadconst(e));
1918: goto ret1;
1919: }
1920: goto ret0;
1921:
1922: case OPvar:
1923: case OPind:
1924: if (tybasic(e->Ety) == TYldouble ||
1925: tybasic(e->Ety) == TYildouble)
1926: goto ret1;
1927: case OPu16_d:
1928: case OPs16_d:
1929: case OPs32_d:
1930: goto ret0;
1931:
1932: case OPs64_d:
1933: goto ret1;
1934:
1935: default:
1936: goto ret1;
1937: }
1938:
1939: ret1: return 1;
1940: ret0: return 0;
1941: }
1942:
1943: /*******************************
1944: * Perform an assignment to a long double/double/float.
1945: */
1946:
1947: code *eq87(elem *e,regm_t *pretregs)
1948: {
1949: regm_t retregs;
1950: code *c1,*c2;
1951: code cs;
1952: unsigned op1;
1953: unsigned op2;
1954: tym_t ty1;
1955:
1956: //printf("+eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
1957: assert(e->Eoper == OPeq);
1958: retregs = mST0 | (*pretregs & mPSW);
1959: c1 = codelem(e->E2,&retregs,FALSE);
1960: ty1 = tybasic(e->E1->Ety);
1961: switch (ty1)
1962: { case TYdouble_alias:
1963: case TYidouble:
1964: case TYdouble: op1 = ESC(MFdouble,1); op2 = 3; break;
1965: case TYifloat:
1966: case TYfloat: op1 = ESC(MFfloat,1); op2 = 3; break;
1967: case TYildouble:
1968: case TYldouble: op1 = 0xDB; op2 = 7; break;
1969: default:
1970: assert(0);
1971: }
1972: if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) // if want result on stack too
1973: {
1974: if (ty1 == TYldouble || ty1 == TYildouble)
1975: {
1976: c1 = cat(c1,push87());
1977: c1 = genf2(c1,0xD9,0xC0); // FLD ST(0)
1978: pop87();
1979: }
1980: else
1981: op2 = 2; // FST e->E1
1982: }
1983: else
1984: { // FSTP e->E1
1985: pop87();
1986: }
1987: #if 0
1988: // Doesn't work if ST(0) gets saved to the stack by getlvalue()
1989: c2 = loadea(e->E1,&cs,op1,op2,0,0,0);
1990: #else
1991: cs.Irex = 0;
1992: cs.Iflags = 0;
1993: cs.Iop = op1;
1994: if (*pretregs & (mST0 | ALLREGS | mBP)) // if want result on stack too
1995: { // Make sure it's still there
1996: elem *e2 = e->E2;
1997: while (e2->Eoper == OPcomma)
1998: e2 = e2->E2;
1999: note87(e2,0,0);
2000: c2 = getlvalue(&cs, e->E1, 0);
2001: c2 = cat(c2,makesure87(e2,0,0,1));
2002: }
2003: else
2004: {
2005: c2 = getlvalue(&cs, e->E1, 0);
2006: }
2007: cs.Irm |= modregrm(0,op2,0); // OR in reg field
2008: if (I32)
2009: cs.Iflags &= ~CFopsize;
2010: else if (ADDFWAIT())
2011: cs.Iflags |= CFwait;
2012: else if (I64)
2013: cs.Irex &= ~REX_W;
2014: c2 = gen(c2, &cs);
2015: #if LNGDBLSIZE == 12
2016: if (tysize[TYldouble] == 12)
2017: {
2018: /* This deals with the fact that 10 byte reals really
2019: * occupy 12 bytes by zeroing the extra 2 bytes.
2020: */
2021: if (op1 == 0xDB)
2022: {
2023: cs.Iop = 0xC7; // MOV EA+10,0
2024: NEWREG(cs.Irm, 0);
2025: cs.IEV1.sp.Voffset += 10;
2026: cs.IFL2 = FLconst;
2027: cs.IEV2.Vint = 0;
2028: cs.Iflags |= CFopsize;
2029: c2 = gen(c2, &cs);
2030: }
2031: }
2032: #endif
2033: if (tysize[TYldouble] == 16)
2034: {
2035: /* This deals with the fact that 10 byte reals really
2036: * occupy 16 bytes by zeroing the extra 6 bytes.
2037: */
2038: if (op1 == 0xDB)
2039: {
2040: cs.Irex &= ~REX_W;
2041: cs.Iop = 0xC7; // MOV EA+10,0
2042: NEWREG(cs.Irm, 0);
2043: cs.IEV1.sp.Voffset += 10;
2044: cs.IFL2 = FLconst;
2045: cs.IEV2.Vint = 0;
2046: cs.Iflags |= CFopsize;
2047: c2 = gen(c2, &cs);
2048:
2049: cs.IEV1.sp.Voffset += 2;
2050: cs.Iflags &= ~CFopsize;
2051: c2 = gen(c2, &cs);
2052: }
2053: }
2054: #endif
2055: c2 = genfwait(c2);
2056: freenode(e->E1);
2057: c1 = cat3(c1,c2,fixresult87(e,mST0 | mPSW,pretregs));
2058: return c1;
2059: }
2060:
2061: /*******************************
2062: * Perform an assignment to a long double/double/float.
2063: */
2064:
2065: code *complex_eq87(elem *e,regm_t *pretregs)
2066: {
2067: regm_t retregs;
2068: code *c1,*c2;
2069: code cs;
2070: unsigned op1;
2071: unsigned op2;
2072: unsigned sz;
2073: tym_t ty1;
2074: int fxch = 0;
2075:
2076: //printf("complex_eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
2077: assert(e->Eoper == OPeq);
2078: cs.Iflags = ADDFWAIT() ? CFwait : 0;
2079: cs.Irex = 0;
2080: retregs = mST01 | (*pretregs & mPSW);
2081: c1 = codelem(e->E2,&retregs,FALSE);
2082: ty1 = tybasic(e->E1->Ety);
2083: switch (ty1)
2084: {
2085: case TYcdouble: op1 = ESC(MFdouble,1); op2 = 3; break;
2086: case TYcfloat: op1 = ESC(MFfloat,1); op2 = 3; break;
2087: case TYcldouble: op1 = 0xDB; op2 = 7; break;
2088: default:
2089: assert(0);
2090: }
2091: if (*pretregs & (mST01 | mXMM0 | mXMM1)) // if want result on stack too
2092: {
2093: if (ty1 == TYcldouble)
2094: {
2095: c1 = cat(c1,push87());
2096: c1 = cat(c1,push87());
2097: c1 = genf2(c1,0xD9,0xC0 + 1); // FLD ST(1)
2098: genf2(c1,0xD9,0xC0 + 1); // FLD ST(1)
2099: pop87();
2100: pop87();
2101: }
2102: else
2103: { op2 = 2; // FST e->E1
2104: fxch = 1;
2105: }
2106: }
2107: else
2108: { // FSTP e->E1
2109: pop87();
2110: pop87();
2111: }
2112: sz = tysize(ty1) / 2;
2113: if (*pretregs & (mST01 | mXMM0 | mXMM1))
2114: {
2115: cs.Iflags = 0;
2116: cs.Irex = 0;
2117: cs.Iop = op1;
2118: c2 = getlvalue(&cs, e->E1, 0);
2119: cs.IEVoffset1 += sz;
2120: cs.Irm |= modregrm(0, op2, 0);
2121: c2 = cat(c2, makesure87(e->E2, sz, 0, 0));
2122: c2 = gen(c2, &cs);
2123: c2 = genfwait(c2);
2124: c2 = cat(c2, makesure87(e->E2, 0, 1, 0));
2125: }
2126: else
2127: {
2128: c2 = loadea(e->E1,&cs,op1,op2,sz,0,0);
2129: c2 = genfwait(c2);
2130: }
2131: if (fxch)
2132: c2 = genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1)
2133: cs.IEVoffset1 -= sz;
2134: gen(c2, &cs);
2135: if (fxch)
2136: genf2(c2,0xD9,0xC8 + 1); // FXCH ST(1)
2137: if (tysize[TYldouble] == 12)
2138: {
2139: if (op1 == 0xDB)
2140: {
2141: cs.Iop = 0xC7; // MOV EA+10,0
2142: NEWREG(cs.Irm, 0);
2143: cs.IEV1.sp.Voffset += 10;
2144: cs.IFL2 = FLconst;
2145: cs.IEV2.Vint = 0;
2146: cs.Iflags |= CFopsize;
2147: c2 = gen(c2, &cs);
2148: cs.IEVoffset1 += 12;
2149: c2 = gen(c2, &cs); // MOV EA+22,0
2150: }
2151: }
2152: if (tysize[TYldouble] == 16)
2153: {
2154: if (op1 == 0xDB)
2155: {
2156: cs.Iop = 0xC7; // MOV EA+10,0
2157: NEWREG(cs.Irm, 0);
2158: cs.IEV1.sp.Voffset += 10;
2159: cs.IFL2 = FLconst;
2160: cs.IEV2.Vint = 0;
2161: cs.Iflags |= CFopsize;
2162: c2 = gen(c2, &cs);
2163:
2164: cs.IEV1.sp.Voffset += 2;
2165: cs.Iflags &= ~CFopsize;
2166: c2 = gen(c2, &cs);
2167:
2168: cs.IEV1.sp.Voffset += 14;
2169: cs.Iflags |= CFopsize;
2170: c2 = gen(c2, &cs);
2171:
2172: cs.IEV1.sp.Voffset += 2;
2173: cs.Iflags &= ~CFopsize;
2174: c2 = gen(c2, &cs);
2175: }
2176: }
2177: c2 = genfwait(c2);
2178: freenode(e->E1);
2179: return cat3(c1,c2,fixresult_complex87(e,mST01 | mPSW,pretregs));
2180: }
2181:
2182: /*******************************
2183: * Perform an assignment while converting to integral type,
2184: * i.e. handle (e1 = (int) e2)
2185: */
2186:
2187: code *cnvteq87(elem *e,regm_t *pretregs)
2188: {
2189: regm_t retregs;
2190: code *c1,*c2;
2191: code cs;
2192: unsigned op1;
2193: unsigned op2;
2194:
2195: assert(e->Eoper == OPeq);
2196: assert(!*pretregs);
2197: retregs = mST0;
2198: elem_debug(e->E2);
2199: c1 = codelem(e->E2->E1,&retregs,FALSE);
2200:
2201: switch (e->E2->Eoper)
2202: { case OPd_s16:
2203: op1 = ESC(MFword,1);
2204: op2 = 3;
2205: break;
2206: case OPd_s32:
2207: case OPd_u16:
2208: op1 = ESC(MFlong,1);
2209: op2 = 3;
2210: break;
2211: case OPd_s64:
2212: op1 = 0xDF;
2213: op2 = 7;
2214: break;
2215: default:
2216: assert(0);
2217: }
2218: freenode(e->E2);
2219:
2220: c1 = genfwait(c1);
2221: c1 = genrnd(c1, CW_roundto0); // FLDCW roundto0
2222:
2223: pop87();
2224: cs.Iflags = ADDFWAIT() ? CFwait : 0;
2225: if (e->E1->Eoper == OPvar)
2226: notreg(e->E1); // cannot be put in register anymore
2227: c2 = loadea(e->E1,&cs,op1,op2,0,0,0);
2228:
2229: c2 = genfwait(c2);
2230: c2 = genrnd(c2, CW_roundtonearest); // FLDCW roundtonearest
2231:
2232: freenode(e->E1);
2233: return cat(c1,c2);
2234: }
2235:
2236: /**********************************
2237: * Perform +=, -=, *= and /= for doubles.
2238: */
2239:
2240: code *opass87(elem *e,regm_t *pretregs)
2241: {
2242: regm_t retregs;
2243: code *cl,*cr,*c;
2244: code cs;
2245: unsigned op;
2246: unsigned opld;
2247: unsigned op1;
2248: unsigned op2;
2249: tym_t ty1;
2250:
2251: ty1 = tybasic(e->E1->Ety);
2252: switch (ty1)
2253: { case TYdouble_alias:
2254: case TYidouble:
2255: case TYdouble: op1 = ESC(MFdouble,1); op2 = 3; break;
2256: case TYifloat:
2257: case TYfloat: op1 = ESC(MFfloat,1); op2 = 3; break;
2258: case TYildouble:
2259: case TYldouble: op1 = 0xDB; op2 = 7; break;
2260:
2261: case TYcfloat:
2262: case TYcdouble:
2263: case TYcldouble:
2264: return (e->Eoper == OPmodass)
2265: ? opmod_complex87(e, pretregs)
2266: : opass_complex87(e, pretregs);
2267:
2268: default:
2269: assert(0);
2270: }
2271: switch (e->Eoper)
2272: { case OPpostinc:
2273: case OPaddass: op = 0 << 3; opld = 0xC1; break; // FADD
2274: case OPpostdec:
2275: case OPminass: op = 5 << 3; opld = 0xE1; /*0xE9;*/ break; // FSUBR
2276: case OPmulass: op = 1 << 3; opld = 0xC9; break; // FMUL
2277: case OPdivass: op = 7 << 3; opld = 0xF1; break; // FDIVR
2278: case OPmodass: break;
2279: default: assert(0);
2280: }
2281: retregs = mST0;
2282: cr = codelem(e->E2,&retregs,FALSE); // evaluate rvalue
2283: note87(e->E2,0,0);
2284: cl = getlvalue(&cs,e->E1,0);
2285: cl = cat(cl,makesure87(e->E2,0,0,0));
2286: cs.Iflags |= ADDFWAIT() ? CFwait : 0;
2287: if (I32)
2288: cs.Iflags &= ~CFopsize;
2289: if (config.flags4 & CFG4fdivcall && e->Eoper == OPdivass)
2290: {
2291: c = push87();
2292: cs.Iop = op1;
2293: if (ty1 == TYldouble || ty1 == TYildouble)
2294: cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ...
2295: c = gen(c,&cs);
2296: c = genf2(c,0xD9,0xC8 + 1); // FXCH ST(1)
2297: c = cat(c,callclib(e,CLIBfdiv87,&retregs,0));
2298: pop87();
2299: }
2300: else if (e->Eoper == OPmodass)
2301: {
2302: /*
2303: * fld tbyte ptr y
2304: * fld tbyte ptr x // ST = x, ST1 = y
2305: * FM1: // We don't use fprem1 because for some inexplicable
2306: * // reason we get -5 when we do _modulo(15, 10)
2307: * fprem // ST = ST % ST1
2308: * fstsw word ptr sw
2309: * fwait
2310: * mov AH,byte ptr sw+1 // get msb of status word in AH
2311: * sahf // transfer to flags
2312: * jp FM1 // continue till ST < ST1
2313: * fstp ST(1) // leave remainder on stack
2314: */
2315: code *c1;
2316:
2317: c = push87();
2318: cs.Iop = op1;
2319: if (ty1 == TYldouble || ty1 == TYildouble)
2320: cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ...
2321: c = gen(c,&cs); // FLD e->E1
2322:
2323: c1 = gen2(NULL, 0xD9, 0xF8); // FPREM
2324: c1 = cg87_87topsw(c1);
2325: c1 = genjmp(c1, JP, FLcode, (block *)c1); // JP FM1
2326: c1 = genf2(c1,0xDD,0xD8 + 1); // FSTP ST(1)
2327: c = cat(c,c1);
2328:
2329: pop87();
2330: }
2331: else if (ty1 == TYldouble || ty1 == TYildouble)
2332: {
2333: c = push87();
2334: cs.Iop = op1;
2335: cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ...
2336: c = gen(c,&cs); // FLD e->E1
2337: genf2(c,0xDE,opld); // FopP ST(1)
2338: pop87();
2339: }
2340: else
2341: { cs.Iop = op1 & ~1;
2342: cs.Irm |= op;
2343: c = gen(CNIL,&cs); // Fop e->E1
2344: }
2345: if (*pretregs & mPSW)
2346: genftst(c,e,0); // FTST ST0
2347: /* if want result in registers */
2348: if (*pretregs & (mST0 | ALLREGS | mBP))
2349: {
2350: if (ty1 == TYldouble || ty1 == TYildouble)
2351: {
2352: c = cat(c,push87());
2353: c = genf2(c,0xD9,0xC0); // FLD ST(0)
2354: pop87();
2355: }
2356: else
2357: op2 = 2; // FST e->E1
2358: }
2359: else
2360: { // FSTP
2361: pop87();
2362: }
2363: cs.Iop = op1;
2364: NEWREG(cs.Irm,op2); // FSTx e->E1
2365: freenode(e->E1);
2366: gen(c,&cs);
2367: genfwait(c);
2368: return cat4(cr,cl,c,fixresult87(e,mST0 | mPSW,pretregs));
2369: }
2370:
2371: /***********************************
2372: * Perform %= where E1 is complex and E2 is real or imaginary.
2373: */
2374:
2375: code *opmod_complex87(elem *e,regm_t *pretregs)
2376: {
2377: regm_t retregs;
2378: code *cl,*cr,*c;
2379: code cs;
2380: tym_t ty1;
2381: unsigned sz2;
2382:
2383: /* fld E2
2384: fld E1.re
2385: FM1: fprem
2386: fstsw word ptr sw
2387: fwait
2388: mov AH, byte ptr sw+1
2389: jp FM1
2390: fxch ST(1)
2391: fld E1.im
2392: FM2: fprem
2393: fstsw word ptr sw
2394: fwait
2395: mov AH, byte ptr sw+1
2396: jp FM2
2397: fstp ST(1)
2398: */
2399:
2400: ty1 = tybasic(e->E1->Ety);
2401: sz2 = tysize[ty1] / 2;
2402:
2403: retregs = mST0;
2404: cr = codelem(e->E2,&retregs,FALSE); // FLD E2
2405: note87(e->E2,0,0);
2406: cl = getlvalue(&cs,e->E1,0);
2407: cl = cat(cl,makesure87(e->E2,0,0,0));
2408: cs.Iflags |= ADDFWAIT() ? CFwait : 0;
2409: if (!I16)
2410: cs.Iflags &= ~CFopsize;
2411:
2412: c = push87();
2413: switch (ty1)
2414: {
2415: case TYcdouble: cs.Iop = ESC(MFdouble,1); break;
2416: case TYcfloat: cs.Iop = ESC(MFfloat,1); break;
2417: case TYcldouble: cs.Iop = 0xDB; cs.Irm |= modregrm(0, 5, 0); break;
2418: default:
2419: assert(0);
2420: }
2421: c = gen(c,&cs); // FLD E1.re
2422:
2423: code *c1;
2424:
2425: c1 = gen2(NULL, 0xD9, 0xF8); // FPREM
2426: c1 = cg87_87topsw(c1);
2427: c1 = genjmp(c1, JP, FLcode, (block *)c1); // JP FM1
2428: c1 = genf2(c1, 0xD9, 0xC8 + 1); // FXCH ST(1)
2429: c = cat(c,c1);
2430:
2431: c = cat(c, push87());
2432: cs.IEVoffset1 += sz2;
2433: gen(c, &cs); // FLD E1.im
2434:
2435: c1 = gen2(NULL, 0xD9, 0xF8); // FPREM
2436: c1 = cg87_87topsw(c1);
2437: c1 = genjmp(c1, JP, FLcode, (block *)c1); // JP FM2
2438: c1 = genf2(c1,0xDD,0xD8 + 1); // FSTP ST(1)
2439: c = cat(c,c1);
2440:
2441: pop87();
2442:
2443: if (*pretregs & (mST01 | mPSW))
2444: {
2445: cs.Irm |= modregrm(0, 2, 0);
2446: gen(c, &cs); // FST mreal.im
2447: cs.IEVoffset1 -= sz2;
2448: gen(c, &cs); // FST mreal.re
2449: retregs = mST01;
2450: }
2451: else
2452: {
2453: cs.Irm |= modregrm(0, 3, 0);
2454: gen(c, &cs); // FSTP mreal.im
2455: cs.IEVoffset1 -= sz2;
2456: gen(c, &cs); // FSTP mreal.re
2457: pop87();
2458: pop87();
2459: retregs = 0;
2460: }
2461: freenode(e->E1);
2462: genfwait(c);
2463: return cat4(cr,cl,c,fixresult_complex87(e,retregs,pretregs));
2464: }
2465:
2466: /**********************************
2467: * Perform +=, -=, *= and /= for the lvalue being complex.
2468: */
2469:
2470: code *opass_complex87(elem *e,regm_t *pretregs)
2471: {
2472: regm_t retregs;
2473: regm_t idxregs;
2474: code *cl,*cr,*c;
2475: code cs;
2476: unsigned op;
2477: unsigned op2;
2478: tym_t ty1;
2479: unsigned sz2;
2480:
2481: ty1 = tybasic(e->E1->Ety);
2482: sz2 = tysize[ty1] / 2;
2483: switch (e->Eoper)
2484: { case OPpostinc:
2485: case OPaddass: op = 0 << 3; // FADD
2486: op2 = 0xC0; // FADDP ST(i),ST
2487: break;
2488: case OPpostdec:
2489: case OPminass: op = 5 << 3; // FSUBR
2490: op2 = 0xE0; // FSUBRP ST(i),ST
2491: break;
2492: case OPmulass: op = 1 << 3; // FMUL
2493: op2 = 0xC8; // FMULP ST(i),ST
2494: break;
2495: case OPdivass: op = 7 << 3; // FDIVR
2496: op2 = 0xF0; // FDIVRP ST(i),ST
2497: break;
2498: default: assert(0);
2499: }
2500:
2501: if (!tycomplex(e->E2->Ety) &&
2502: (e->Eoper == OPmulass || e->Eoper == OPdivass))
2503: {
2504: retregs = mST0;
2505: cr = codelem(e->E2, &retregs, FALSE);
2506: note87(e->E2, 0, 0);
2507: cl = getlvalue(&cs, e->E1, 0);
2508: cl = cat(cl,makesure87(e->E2,0,0,0));
2509: cl = cat(cl,push87());
2510: cl = genf2(cl,0xD9,0xC0); // FLD ST(0)
2511: goto L1;
2512: }
2513: else
2514: {
2515: cr = loadComplex(e->E2);
2516: cl = getlvalue(&cs,e->E1,0);
2517: cl = cat(cl,makesure87(e->E2,sz2,0,0));
2518: cl = cat(cl,makesure87(e->E2,0,1,0));
2519: }
2520: cs.Iflags |= ADDFWAIT() ? CFwait : 0;
2521: if (!I16)
2522: cs.Iflags &= ~CFopsize;
2523:
2524: switch (e->Eoper)
2525: {
2526: case OPpostinc:
2527: case OPaddass:
2528: case OPpostdec:
2529: case OPminass:
2530: L1:
2531: if (ty1 == TYcldouble)
2532: {
2533: c = push87();
2534: c = cat(c, push87());
2535: cs.Iop = 0xDB;
2536: cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ...
2537: c = gen(c,&cs); // FLD e->E1.re
2538: cs.IEVoffset1 += sz2;
2539: gen(c,&cs); // FLD e->E1.im
2540: genf2(c, 0xDE, op2 + 2); // FADDP/FSUBRP ST(2),ST
2541: genf2(c, 0xDE, op2 + 2); // FADDP/FSUBRP ST(2),ST
2542: pop87();
2543: pop87();
2544: if (tyimaginary(e->E2->Ety))
2545: {
2546: if (e->Eoper == OPmulass)
2547: {
2548: genf2(c, 0xD9, 0xE0); // FCHS
2549: genf2(c, 0xD9, 0xC8+1); // FXCH ST(1)
2550: }
2551: else if (e->Eoper == OPdivass)
2552: {
2553: genf2(c, 0xD9, 0xC8+1); // FXCH ST(1)
2554: genf2(c, 0xD9, 0xE0); // FCHS
2555: }
2556: }
2557: L2:
2558: if (*pretregs & (mST01 | mPSW))
2559: {
2560: c = cat(c,push87());
2561: c = cat(c,push87());
2562: c = genf2(c,0xD9,0xC1); // FLD ST(1)
2563: c = genf2(c,0xD9,0xC1); // FLD ST(1)
2564: retregs = mST01;
2565: }
2566: else
2567: retregs = 0;
2568: cs.Iop = 0xDB;
2569: cs.Irm |= modregrm(0,7,0);
2570: gen(c,&cs); // FSTP e->E1.im
2571: cs.IEVoffset1 -= sz2;
2572: gen(c,&cs); // FSTP e->E1.re
2573: pop87();
2574: pop87();
2575:
2576: }
2577: else
2578: { unsigned char rmop = cs.Irm | op;
2579: unsigned char rmfst = cs.Irm | modregrm(0,2,0);
2580: unsigned char rmfstp = cs.Irm | modregrm(0,3,0);
2581: unsigned char iopfst = (ty1 == TYcfloat) ? 0xD9 : 0xDD;
2582: unsigned char iop = (ty1 == TYcfloat) ? 0xD8 : 0xDC;
2583:
2584: cs.Iop = iop;
2585: cs.Irm = rmop;
2586: cs.IEVoffset1 += sz2;
2587: c = gen(NULL, &cs); // FSUBR mreal.im
2588: if (tyimaginary(e->E2->Ety) && (e->Eoper == OPmulass || e->Eoper == OPdivass))
2589: {
2590: if (e->Eoper == OPmulass)
2591: genf2(c, 0xD9, 0xE0); // FCHS
2592: genf2(c,0xD9,0xC8 + 1); // FXCH ST(1)
2593: cs.IEVoffset1 -= sz2;
2594: gen(c, &cs); // FMUL mreal.re
2595: if (e->Eoper == OPdivass)
2596: genf2(c, 0xD9, 0xE0); // FCHS
2597: if (*pretregs & (mST01 | mPSW))
2598: {
2599: cs.Iop = iopfst;
2600: cs.Irm = rmfst;
2601: cs.IEVoffset1 += sz2;
2602: gen(c, &cs); // FST mreal.im
2603: genf2(c,0xD9,0xC8 + 1); // FXCH ST(1)
2604: cs.IEVoffset1 -= sz2;
2605: gen(c, &cs); // FST mreal.re
2606: genf2(c,0xD9,0xC8 + 1); // FXCH ST(1)
2607: retregs = mST01;
2608: }
2609: else
2610: {
2611: cs.Iop = iopfst;
2612: cs.Irm = rmfstp;
2613: cs.IEVoffset1 += sz2;
2614: gen(c, &cs); // FSTP mreal.im
2615: pop87();
2616: cs.IEVoffset1 -= sz2;
2617: gen(c, &cs); // FSTP mreal.re
2618: pop87();
2619: retregs = 0;
2620: }
2621: goto L3;
2622: }
2623:
2624: if (*pretregs & (mST01 | mPSW))
2625: {
2626: cs.Iop = iopfst;
2627: cs.Irm = rmfst;
2628: gen(c, &cs); // FST mreal.im
2629: genf2(c,0xD9,0xC8 + 1); // FXCH ST(1)
2630: cs.Iop = iop;
2631: cs.Irm = rmop;
2632: cs.IEVoffset1 -= sz2;
2633: gen(c, &cs); // FSUBR mreal.re
2634: cs.Iop = iopfst;
2635: cs.Irm = rmfst;
2636: gen(c, &cs); // FST mreal.re
2637: genf2(c,0xD9,0xC8 + 1); // FXCH ST(1)
2638: retregs = mST01;
2639: }
2640: else
2641: {
2642: cs.Iop = iopfst;
2643: cs.Irm = rmfstp;
2644: gen(c, &cs); // FSTP mreal.im
2645: pop87();
2646: cs.Iop = iop;
2647: cs.Irm = rmop;
2648: cs.IEVoffset1 -= sz2;
2649: gen(c, &cs); // FSUBR mreal.re
2650: cs.Iop = iopfst;
2651: cs.Irm = rmfstp;
2652: gen(c, &cs); // FSTP mreal.re
2653: pop87();
2654: retregs = 0;
2655: }
2656: }
2657: L3:
2658: freenode(e->E1);
2659: genfwait(c);
2660: return cat4(cr,cl,c,fixresult_complex87(e,retregs,pretregs));
2661:
2662: case OPmulass:
2663: c = push87();
2664: c = cat(c, push87());
2665: if (ty1 == TYcldouble)
2666: {
2667: cs.Iop = 0xDB;
2668: cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ...
2669: c = gen(c,&cs); // FLD e->E1.re
2670: cs.IEVoffset1 += sz2;
2671: gen(c,&cs); // FLD e->E1.im
2672: retregs = mST01;
2673: c = cat(c,callclib(e, CLIBcmul, &retregs, 0));
2674: goto L2;
2675: }
2676: else
2677: {
2678: cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD;
2679: cs.Irm |= modregrm(0, 0, 0); // FLD tbyte ptr ...
2680: c = gen(c,&cs); // FLD e->E1.re
2681: cs.IEVoffset1 += sz2;
2682: gen(c,&cs); // FLD e->E1.im
2683: retregs = mST01;
2684: c = cat(c,callclib(e, CLIBcmul, &retregs, 0));
2685: if (*pretregs & (mST01 | mPSW))
2686: {
2687: cs.Irm |= modregrm(0, 2, 0);
2688: gen(c, &cs); // FST mreal.im
2689: cs.IEVoffset1 -= sz2;
2690: gen(c, &cs); // FST mreal.re
2691: retregs = mST01;
2692: }
2693: else
2694: {
2695: cs.Irm |= modregrm(0, 3, 0);
2696: gen(c, &cs); // FSTP mreal.im
2697: cs.IEVoffset1 -= sz2;
2698: gen(c, &cs); // FSTP mreal.re
2699: pop87();
2700: pop87();
2701: retregs = 0;
2702: }
2703: goto L3;
2704: }
2705:
2706: case OPdivass:
2707: c = push87();
2708: c = cat(c, push87());
2709: idxregs = idxregm(&cs); // mask of index regs used
2710: if (ty1 == TYcldouble)
2711: {
2712: cs.Iop = 0xDB;
2713: cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ...
2714: c = gen(c,&cs); // FLD e->E1.re
2715: genf2(c,0xD9,0xC8 + 2); // FXCH ST(2)
2716: cs.IEVoffset1 += sz2;
2717: gen(c,&cs); // FLD e->E1.im
2718: genf2(c,0xD9,0xC8 + 2); // FXCH ST(2)
2719: retregs = mST01;
2720: c = cat(c,callclib(e, CLIBcdiv, &retregs, idxregs));
2721: goto L2;
2722: }
2723: else
2724: {
2725: cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD;
2726: cs.Irm |= modregrm(0, 0, 0); // FLD tbyte ptr ...
2727: c = gen(c,&cs); // FLD e->E1.re
2728: genf2(c,0xD9,0xC8 + 2); // FXCH ST(2)
2729: cs.IEVoffset1 += sz2;
2730: gen(c,&cs); // FLD e->E1.im
2731: genf2(c,0xD9,0xC8 + 2); // FXCH ST(2)
2732: retregs = mST01;
2733: c = cat(c,callclib(e, CLIBcdiv, &retregs, idxregs));
2734: if (*pretregs & (mST01 | mPSW))
2735: {
2736: cs.Irm |= modregrm(0, 2, 0);
2737: gen(c, &cs); // FST mreal.im
2738: cs.IEVoffset1 -= sz2;
2739: gen(c, &cs); // FST mreal.re
2740: retregs = mST01;
2741: }
2742: else
2743: {
2744: cs.Irm |= modregrm(0, 3, 0);
2745: gen(c, &cs); // FSTP mreal.im
2746: cs.IEVoffset1 -= sz2;
2747: gen(c, &cs); // FSTP mreal.re
2748: pop87();
2749: pop87();
2750: retregs = 0;
2751: }
2752: goto L3;
2753: }
2754:
2755: default:
2756: assert(0);
2757: }
2758: return NULL;
2759: }
2760:
2761: /**************************
2762: * OPnegass
2763: */
2764:
2765: code *cdnegass87(elem *e,regm_t *pretregs)
2766: { regm_t retregs;
2767: tym_t tyml;
2768: unsigned op;
2769: code *cl,*cr,*c,cs;
2770: elem *e1;
2771: int sz;
2772:
2773: //printf("cdnegass87(e = %p, *pretregs = x%x)\n", e, *pretregs);
2774: e1 = e->E1;
2775: tyml = tybasic(e1->Ety); // type of lvalue
2776: sz = tysize[tyml];
2777:
2778: cl = getlvalue(&cs,e1,0);
2779: cr = modEA(&cs);
2780: cs.Irm |= modregrm(0,6,0);
2781: cs.Iop = 0x80;
2782: cs.Irex = 0;
2783: #if LNGDBLSIZE > 10
2784: if (tyml == TYldouble || tyml == TYildouble)
2785: cs.IEVoffset1 += 10 - 1;
2786: else if (tyml == TYcldouble)
2787: cs.IEVoffset1 += tysize[TYldouble] + 10 - 1;
2788: else
2789: #endif
2790: cs.IEVoffset1 += sz - 1;
2791: cs.IFL2 = FLconst;
2792: cs.IEV2.Vuns = 0x80;
2793: c = gen(NULL,&cs); // XOR 7[EA],0x80
2794: if (tycomplex(tyml))
2795: {
2796: cs.IEVoffset1 -= sz / 2;
2797: gen(c,&cs); // XOR 7[EA],0x80
2798: }
2799: c = cat3(cl,cr,c);
2800:
2801: if (*pretregs)
2802: {
2803: switch (tyml)
2804: {
2805: case TYifloat:
2806: case TYfloat: cs.Iop = 0xD9; op = 0; break;
2807: case TYidouble:
2808: case TYdouble:
2809: case TYdouble_alias: cs.Iop = 0xDD; op = 0; break;
2810: case TYildouble:
2811: case TYldouble: cs.Iop = 0xDB; op = 5; break;
2812: default:
2813: assert(0);
2814: }
2815: NEWREG(cs.Irm,op);
2816: cs.IEVoffset1 -= sz - 1;
2817: c = cat(c, push87());
2818: c = gen(c,&cs); // FLD EA
2819: retregs = mST0;
2820: }
2821: else
2822: retregs = 0;
2823:
2824: freenode(e1);
2825: return cat(c,fixresult87(e,retregs,pretregs));
2826: }
2827:
2828: /************************
2829: * Take care of OPpostinc and OPpostdec.
2830: */
2831:
2832: code *post87(elem *e,regm_t *pretregs)
2833: {
2834: regm_t retregs;
2835: code *cl,*cr,*c;
2836: code cs;
2837: unsigned op;
2838: unsigned op1;
2839: unsigned reg;
2840: tym_t ty1;
2841:
2842: //printf("post87()\n");
2843: assert(*pretregs);
2844: cl = getlvalue(&cs,e->E1,0);
2845: cs.Iflags |= ADDFWAIT() ? CFwait : 0;
2846: if (!I16)
2847: cs.Iflags &= ~CFopsize;
2848: ty1 = tybasic(e->E1->Ety);
2849: switch (ty1)
2850: { case TYdouble_alias:
2851: case TYidouble:
2852: case TYdouble:
2853: case TYcdouble: op1 = ESC(MFdouble,1); reg = 0; break;
2854: case TYifloat:
2855: case TYfloat:
2856: case TYcfloat: op1 = ESC(MFfloat,1); reg = 0; break;
2857: case TYildouble:
2858: case TYldouble:
2859: case TYcldouble: op1 = 0xDB; reg = 5; break;
2860: default:
2861: assert(0);
2862: }
2863: NEWREG(cs.Irm, reg);
2864: if (reg == 5)
2865: reg = 7;
2866: else
2867: reg = 3;
2868: cs.Iop = op1;
2869: cl = cat(cl,push87());
2870: cl = gen(cl,&cs); // FLD e->E1
2871: if (tycomplex(ty1))
2872: { unsigned sz = tysize[ty1] / 2;
2873:
2874: cl = cat(cl,push87());
2875: cs.IEVoffset1 += sz;
2876: cl = gen(cl,&cs); // FLD e->E1
2877: retregs = mST0; // note kludge to only load real part
2878: cr = codelem(e->E2,&retregs,FALSE); // load rvalue
2879: c = genf2(NULL,0xD8, // FADD/FSUBR ST,ST2
2880: (e->Eoper == OPpostinc) ? 0xC0 + 2 : 0xE8 + 2);
2881: NEWREG(cs.Irm,reg);
2882: pop87();
2883: cs.IEVoffset1 -= sz;
2884: gen(c,&cs); // FSTP e->E1
2885: genfwait(c);
2886: freenode(e->E1);
2887: return cat4(cl, cr, c, fixresult_complex87(e, mST01, pretregs));
2888: }
2889:
2890: if (*pretregs & (mST0 | ALLREGS | mBP))
2891: { // Want the result in a register
2892: cl = cat(cl,push87());
2893: genf2(cl,0xD9,0xC0); // FLD ST0
2894: }
2895: if (*pretregs & mPSW) /* if result in flags */
2896: genftst(cl,e,0); // FTST ST0
2897: retregs = mST0;
2898: cr = codelem(e->E2,&retregs,FALSE); /* load rvalue */
2899: pop87();
2900: op = (e->Eoper == OPpostinc) ? modregrm(3,0,1) : modregrm(3,5,1);
2901: c = genf2(NULL,0xDE,op); // FADDP/FSUBRP ST1
2902: NEWREG(cs.Irm,reg);
2903: pop87();
2904: gen(c,&cs); /* FSTP e->E1 */
2905: genfwait(c);
2906: freenode(e->E1);
2907: return cat4(cl,cr,c,fixresult87(e,mPSW | mST0,pretregs));
2908: }
2909:
2910: /************************
2911: * Do the following opcodes:
2912: * OPd_s16
2913: * OPd_s32
2914: * OPd_u16
2915: * OPd_s64
2916: */
2917:
2918: code *cnvt87(elem *e,regm_t *pretregs)
2919: {
2920: regm_t retregs;
2921: code *c1,*c2;
2922: unsigned mf,rf,reg;
2923: tym_t tym;
2924: int clib;
2925: int sz;
2926: int szoff;
2927:
2928: //printf("cnvt87(e = %p, *pretregs = x%x)\n", e, *pretregs);
2929: assert(*pretregs);
2930: tym = e->Ety;
2931: sz = tysize(tym);
2932: szoff = sz;
2933: unsigned grex = I64 ? REX_W << 16 : 0;
2934:
2935: switch (e->Eoper)
2936: { case OPd_s16:
2937: clib = CLIBdblint87;
2938: mf = ESC(MFword,1);
2939: rf = 3;
2940: break;
2941:
2942: case OPd_u16:
2943: szoff = 4;
2944: case OPd_s32:
2945: clib = CLIBdbllng87;
2946: mf = ESC(MFlong,1);
2947: rf = 3;
2948: break;
2949:
2950: case OPd_s64:
2951: clib = CLIBdblllng;
2952: mf = 0xDF;
2953: rf = 7;
2954: break;
2955:
2956: default:
2957: assert(0);
2958: }
2959:
2960: if (I16) // C may change the default control word
2961: {
2962: if (clib == CLIBdblllng)
2963: { retregs = I32 ? DOUBLEREGS_32 : DOUBLEREGS_16;
2964: c1 = codelem(e->E1,&retregs,FALSE);
2965: c2 = callclib(e,clib,pretregs,0);
2966: }
2967: else
2968: { retregs = mST0; //I32 ? DOUBLEREGS_32 : DOUBLEREGS_16;
2969: c1 = codelem(e->E1,&retregs,FALSE);
2970: c2 = callclib(e,clib,pretregs,0);
2971: pop87();
2972: }
2973: }
2974: else if (1)
2975: { // Generate:
2976: // sub ESP,12
2977: // fstcw 8[ESP]
2978: // fldcw roundto0
2979: // fistp long64 ptr [ESP]
2980: // fldcw 8[ESP]
2981: // pop lsw
2982: // pop msw
2983: // add ESP,4
2984:
2985: unsigned szpush = szoff + 2;
2986: if (config.flags3 & CFG3pic)
2987: szpush += 2;
2988: szpush = (szpush + REGSIZE - 1) & ~(REGSIZE - 1);
2989:
2990: retregs = mST0;
2991: c1 = codelem(e->E1,&retregs,FALSE);
2992:
2993: if (szpush == REGSIZE)
2994: c1 = gen1(c1,0x50 + AX); // PUSH EAX
2995: else
2996: c1 = genc2(c1,0x81,grex | modregrm(3,5,SP), szpush); // SUB ESP,12
2997: c1 = genfwait(c1);
2998: genc1(c1,0xD9,modregrm(2,7,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FSTCW szoff[ESP]
2999:
3000: c1 = genfwait(c1);
3001:
3002: if (config.flags3 & CFG3pic)
3003: {
3004: genc(c1,0xC7,modregrm(2,0,4) + 256*modregrm(0,4,SP),FLconst,szoff+2,FLconst,CW_roundto0); // MOV szoff+2[ESP], CW_roundto0
3005: code_orflag(c1, CFopsize);
3006: genc1(c1,0xD9,modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff+2); // FLDCW szoff+2[ESP]
3007: }
3008: else
3009: c1 = genrnd(c1, CW_roundto0); // FLDCW roundto0
3010:
3011: pop87();
3012:
3013: c1 = genfwait(c1);
3014: gen2sib(c1,mf,grex | modregrm(0,rf,4),modregrm(0,4,SP)); // FISTP [ESP]
3015:
3016: retregs = *pretregs & (ALLREGS | mBP);
3017: if (!retregs)
3018: retregs = ALLREGS;
3019: c2 = allocreg(&retregs,®,tym);
3020:
3021: c2 = genfwait(c2); // FWAIT
3022: c2 = genc1(c2,0xD9,grex | modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FLDCW szoff[ESP]
3023:
3024: if (szoff > REGSIZE)
3025: { szpush -= REGSIZE;
3026: c2 = genpop(c2,findreglsw(retregs)); // POP lsw
3027: }
3028: szpush -= REGSIZE;
3029: c2 = genpop(c2,reg); // POP reg
3030:
3031: if (szpush)
3032: genc2(c2,0x81,grex | modregrm(3,0,SP), szpush); // ADD ESP,4
3033: c2 = cat(c2,fixresult(e,retregs,pretregs));
3034: }
3035: else
3036: {
3037: // This is incorrect. For -inf and nan, the 8087 returns the largest
3038: // negative int (0x80000....). For -inf, 0x7FFFF... should be returned,
3039: // and for nan, 0 should be returned.
3040: retregs = mST0;
3041: c1 = codelem(e->E1,&retregs,FALSE);
3042:
3043: c1 = genfwait(c1);
3044: c1 = genrnd(c1, CW_roundto0); // FLDCW roundto0
3045:
3046: pop87();
3047: c1 = genfltreg(c1,mf,rf,0); // FISTP floatreg
3048: retregs = *pretregs & (ALLREGS | mBP);
3049: if (!retregs)
3050: retregs = ALLREGS;
3051: c2 = allocreg(&retregs,®,tym);
3052:
3053: c2 = genfwait(c2);
3054:
3055: if (sz > REGSIZE)
3056: { c2 = genfltreg(c2,0x8B,reg,REGSIZE); // MOV reg,floatreg + REGSIZE
3057: // MOV lsreg,floatreg
3058: genfltreg(c2,0x8B,findreglsw(retregs),0);
3059: }
3060: else
3061: c2 = genfltreg(c2,0x8B,reg,0); // MOV reg,floatreg
3062: c2 = genrnd(c2, CW_roundtonearest); // FLDCW roundtonearest
3063: c2 = cat(c2,fixresult(e,retregs,pretregs));
3064: }
3065: return cat(c1,c2);
3066: }
3067:
3068: /************************
3069: * Do OPrndtol.
3070: */
3071:
3072: code *cdrndtol(elem *e,regm_t *pretregs)
3073: {
3074: regm_t retregs;
3075: code *c1,*c2;
3076: unsigned reg;
3077: tym_t tym;
3078: unsigned sz;
3079: unsigned char op1,op2;
3080:
3081: if (*pretregs == 0)
3082: return codelem(e->E1,pretregs,FALSE);
3083: tym = e->Ety;
3084: retregs = mST0;
3085: c1 = codelem(e->E1,&retregs,FALSE);
3086:
3087: sz = tysize(tym);
3088: switch (sz)
3089: { case 2:
3090: op1 = 0xDF;
3091: op2 = 3;
3092: break;
3093: case 4:
3094: op1 = 0xDB;
3095: op2 = 3;
3096: break;
3097: case 8:
3098: op1 = 0xDF;
3099: op2 = 7;
3100: break;
3101: default:
3102: assert(0);
3103: }
3104:
3105: pop87();
3106: c1 = genfltreg(c1,op1,op2,0); // FISTP floatreg
3107: retregs = *pretregs & (ALLREGS | mBP);
3108: if (!retregs)
3109: retregs = ALLREGS;
3110: c2 = allocreg(&retregs,®,tym);
3111: c2 = genfwait(c2); // FWAIT
3112: if (tysize(tym) > REGSIZE)
3113: { c2 = genfltreg(c2,0x8B,reg,REGSIZE); // MOV reg,floatreg + REGSIZE
3114: // MOV lsreg,floatreg
3115: genfltreg(c2,0x8B,findreglsw(retregs),0);
3116: }
3117: else
3118: {
3119: c2 = genfltreg(c2,0x8B,reg,0); // MOV reg,floatreg
3120: if (tysize(tym) == 8 && I64)
3121: code_orrex(c2, REX_W);
3122: }
3123: c2 = cat(c2,fixresult(e,retregs,pretregs));
3124:
3125: return cat(c1,c2);
3126: }
3127:
3128: /*************************
3129: * Do OPscale, OPyl2x, OPyl2xp1.
3130: */
3131:
3132: code *cdscale(elem *e,regm_t *pretregs)
3133: {
3134: regm_t retregs;
3135: code *c1,*c2,*c3;
3136:
3137: assert(*pretregs != 0);
3138:
3139: retregs = mST0;
3140: c1 = codelem(e->E1,&retregs,FALSE);
3141: note87(e->E1,0,0);
3142: c2 = codelem(e->E2,&retregs,FALSE);
3143: c2 = cat(c2,makesure87(e->E1,0,1,0)); // now have x,y on stack; need y,x
3144: switch (e->Eoper)
3145: {
3146: case OPscale:
3147: c2 = genf2(c2,0xD9,0xFD); // FSCALE
3148: genf2(c2,0xDD,0xD8 + 1); // FSTP ST(1)
3149: break;
3150:
3151: case OPyl2x:
3152: c2 = genf2(c2,0xD9,0xF1); // FYL2X
3153: break;
3154:
3155: case OPyl2xp1:
3156: c2 = genf2(c2,0xD9,0xF9); // FYL2XP1
3157: break;
3158: }
3159: pop87();
3160: c3 = fixresult87(e,mST0,pretregs);
3161: return cat3(c1,c2,c3);
3162: }
3163:
3164:
3165: /**********************************
3166: * Unary -, absolute value, square root, sine, cosine
3167: */
3168:
3169: code *neg87(elem *e,regm_t *pretregs)
3170: {
3171: regm_t retregs;
3172: code *c1,*c2;
3173: int op;
3174:
3175: assert(*pretregs);
3176: switch (e->Eoper)
3177: { case OPneg: op = 0xE0; break;
3178: case OPabs: op = 0xE1; break;
3179: case OPsqrt: op = 0xFA; break;
3180: case OPsin: op = 0xFE; break;
3181: case OPcos: op = 0xFF; break;
3182: case OPrint: op = 0xFC; break; // FRNDINT
3183: default:
3184: assert(0);
3185: }
3186: retregs = mST0;
3187: c1 = codelem(e->E1,&retregs,FALSE);
3188: c1 = genf2(c1,0xD9,op); // FCHS/FABS/FSQRT/FSIN/FCOS/FRNDINT
3189: c2 = fixresult87(e,mST0,pretregs);
3190: return cat(c1,c2);
3191: }
3192:
3193: /**********************************
3194: * Unary - for complex operands
3195: */
3196:
3197: code *neg_complex87(elem *e,regm_t *pretregs)
3198: {
3199: regm_t retregs;
3200: code *c1,*c2;
3201:
3202: assert(e->Eoper == OPneg);
3203: retregs = mST01;
3204: c1 = codelem(e->E1,&retregs,FALSE);
3205: c1 = genf2(c1,0xD9,0xE0); // FCHS
3206: genf2(c1,0xD9,0xC8 + 1); // FXCH ST(1)
3207: genf2(c1,0xD9,0xE0); // FCHS
3208: genf2(c1,0xD9,0xC8 + 1); // FXCH ST(1)
3209: c2 = fixresult_complex87(e,mST01,pretregs);
3210: return cat(c1,c2);
3211: }
3212:
3213: /*********************************
3214: */
3215:
3216: code *cdind87(elem *e,regm_t *pretregs)
3217: { code *c,*ce,cs;
3218:
3219: //printf("cdind87(e = %p, *pretregs = x%x)\n",e,*pretregs);
3220:
3221: c = getlvalue(&cs,e,0); // get addressing mode
3222: if (*pretregs)
3223: {
3224: switch (tybasic(e->Ety))
3225: { case TYfloat:
3226: case TYifloat:
3227: cs.Iop = 0xD9;
3228: break;
3229:
3230: case TYidouble:
3231: case TYdouble:
3232: case TYdouble_alias:
3233: cs.Iop = 0xDD;
3234: break;
3235:
3236: case TYildouble:
3237: case TYldouble:
3238: cs.Iop = 0xDB;
3239: cs.Irm |= modregrm(0,5,0);
3240: break;
3241:
3242: default:
3243: assert(0);
3244: }
3245: c = cat(c,push87());
3246: c = gen(c,&cs); // FLD EA
3247: ce = fixresult87(e,mST0,pretregs);
3248: c = cat(c,ce);
3249: }
3250: return c;
3251: }
3252:
3253: /************************************
3254: * Reset statics for another .obj file.
3255: */
3256:
3257: void cg87_reset()
3258: {
3259: memset(&oldd,0,sizeof(oldd));
3260: }
3261:
3262:
3263: /*****************************************
3264: * Initialize control word constants.
3265: */
3266:
3267: STATIC code *genrnd(code *c, short cw)
3268: {
3269: if (config.flags3 & CFG3pic)
3270: { code *c1;
3271:
3272: c1 = genfltreg(NULL, 0xC7, 0, 0); // MOV floatreg, cw
3273: c1->IFL2 = FLconst;
3274: c1->IEV2.Vuns = cw;
3275:
3276: c1 = genfltreg(c1, 0xD9, 5, 0); // FLDCW floatreg
3277: c = cat(c, c1);
3278: }
3279: else
3280: {
3281: if (!oldd.round) // if not initialized
3282: { short cwi;
3283:
3284: oldd.round = 1;
3285:
3286: cwi = CW_roundto0; // round to 0
3287: oldd.roundto0 = out_readonly_sym(TYshort,&cwi,2);
3288: cwi = CW_roundtonearest; // round to nearest
3289: oldd.roundtonearest = out_readonly_sym(TYshort,&cwi,2);
3290: }
3291: symbol *rnddir = (cw == CW_roundto0) ? oldd.roundto0 : oldd.roundtonearest;
3292: code cs;
3293: cs.Iop = 0xD9;
3294: cs.Iflags = CFoff;
3295: cs.Irex = 0;
3296: cs.IEVsym1 = rnddir;
3297: cs.IFL1 = rnddir->Sfl;
3298: cs.IEVoffset1 = 0;
3299: cs.Irm = modregrm(0,5,BPRM);
3300: c = gen(c,&cs);
3301: }
3302: return c;
3303: }
3304:
3305: /************************* Complex Numbers *********************/
3306:
3307: /***************************
3308: * Set the PSW based on the state of ST01.
3309: * Input:
3310: * pop if stack should be popped after test
3311: * Returns:
3312: * start of code appended to c.
3313: */
3314:
3315: STATIC code * genctst(code *c,elem *e,int pop)
3316: #if __DMC__
3317: __in
3318: {
3319: assert(pop == 0 || pop == 1);
3320: }
3321: __body
3322: #endif
3323: {
3324: // Generate:
3325: // if (pop)
3326: // FLDZ
3327: // FUCOMPP
3328: // FSTSW AX
3329: // SAHF
3330: // FLDZ
3331: // FUCOMPP
3332: // JNE L1
3333: // JP L1 // if NAN
3334: // FSTSW AX
3335: // SAHF
3336: // L1:
3337: // else
3338: // FLDZ
3339: // FUCOM
3340: // FSTSW AX
3341: // SAHF
3342: // FUCOMP ST(2)
3343: // JNE L1
3344: // JP L1 // if NAN
3345: // FSTSW AX
3346: // SAHF
3347: // L1:
3348: // FUCOMP doesn't raise exceptions on QNANs, unlike FTST
3349:
3350: code *cnop;
3351:
3352: cnop = gennop(CNIL);
3353: c = cat(c,push87());
3354: c = gen2(c,0xD9,0xEE); // FLDZ
3355: if (pop)
3356: {
3357: gen2(c,0xDA,0xE9); // FUCOMPP
3358: pop87();
3359: pop87();
3360: cg87_87topsw(c); // put 8087 flags in CPU flags
3361: gen2(c,0xD9,0xEE); // FLDZ
3362: gen2(c,0xDA,0xE9); // FUCOMPP
3363: pop87();
3364: genjmp(c,JNE,FLcode,(block *) cnop); // JNE L1
3365: genjmp(c,JP, FLcode,(block *) cnop); // JP L1
3366: cg87_87topsw(c); // put 8087 flags in CPU flags
3367: }
3368: else
3369: {
3370: gen2(c,0xDD,0xE1); // FUCOM
3371: cg87_87topsw(c); // put 8087 flags in CPU flags
3372: gen2(c,0xDD,0xEA); // FUCOMP ST(2)
3373: pop87();
3374: genjmp(c,JNE,FLcode,(block *) cnop); // JNE L1
3375: genjmp(c,JP, FLcode,(block *) cnop); // JP L1
3376: cg87_87topsw(c); // put 8087 flags in CPU flags
3377: }
3378: return cat(c, cnop);
3379: }
3380:
3381: /******************************
3382: * Given the result of an expression is in retregs,
3383: * generate necessary code to return result in *pretregs.
3384: */
3385:
3386:
3387: code *fixresult_complex87(elem *e,regm_t retregs,regm_t *pretregs)
3388: {
3389: tym_t tym;
3390: code *c1,*c2;
3391: unsigned sz;
3392:
3393: #if 0
3394: printf("fixresult_complex87(e = %p, retregs = %s, *pretregs = %s)\n",
3395: e,regm_str(retregs),regm_str(*pretregs));
3396: #endif
3397: assert(!*pretregs || retregs);
3398: c1 = CNIL;
3399: c2 = CNIL;
3400: tym = tybasic(e->Ety);
3401: sz = tysize[tym];
3402:
3403: if (*pretregs == 0 && retregs == mST01)
3404: {
3405: c1 = genf2(c1,0xDD,modregrm(3,3,0)); // FPOP
3406: pop87();
3407: c1 = genf2(c1,0xDD,modregrm(3,3,0)); // FPOP
3408: pop87();
3409: }
3410: else if (tym == TYcfloat && *pretregs & (mAX|mDX) && retregs & mST01)
3411: {
3412: if (*pretregs & mPSW && !(retregs & mPSW))
3413: c1 = genctst(c1,e,0); // FTST
3414: pop87();
3415: c1 = genfltreg(c1, ESC(MFfloat,1),3,0); // FSTP floatreg
3416: genfwait(c1);
3417: c2 = getregs(mDX|mAX);
3418: c2 = genfltreg(c2, 0x8B, DX, 0); // MOV EDX,floatreg
3419:
3420: pop87();
3421: c2 = genfltreg(c2, ESC(MFfloat,1),3,0); // FSTP floatreg
3422: genfwait(c2);
3423: c2 = genfltreg(c2, 0x8B, AX, 0); // MOV EAX,floatreg
3424: }
3425: else if (tym == TYcfloat && retregs & (mAX|mDX) && *pretregs & mST01)
3426: {
3427: c1 = push87();
3428: c1 = genfltreg(c1, 0x89, AX, 0); // MOV floatreg, EAX
3429: genfltreg(c1, 0xD9, 0, 0); // FLD float ptr floatreg
3430:
3431: c2 = push87();
3432: c2 = genfltreg(c2, 0x89, DX, 0); // MOV floatreg, EDX
3433: genfltreg(c2, 0xD9, 0, 0); // FLD float ptr floatreg
3434:
3435: if (*pretregs & mPSW)
3436: c2 = genctst(c2,e,0); // FTST
3437: }
3438: else if ((tym == TYcfloat || tym == TYcdouble) &&
3439: *pretregs & (mXMM0|mXMM1) && retregs & mST01)
3440: {
3441: if (*pretregs & mPSW && !(retregs & mPSW))
3442: c1 = genctst(c1,e,0); // FTST
3443: pop87();
3444: c1 = genfltreg(c1, ESC(MFdouble,1),3,0); // FSTP floatreg
3445: genfwait(c1);
3446: c2 = getregs(mXMM0|mXMM1);
3447: c2 = genfltreg(c2, 0xF20F10, XMM1 - XMM0, 0); // MOVD XMM1,floatreg
3448:
3449: pop87();
3450: c2 = genfltreg(c2, ESC(MFdouble,1),3,0); // FSTP floatreg
3451: genfwait(c2);
3452: c2 = genfltreg(c2, 0xF20F10, XMM0 - XMM0, 0); // MOVD XMM0,floatreg
3453: }
3454: else if ((tym == TYcfloat || tym == TYcdouble) &&
3455: retregs & (mXMM0|mXMM1) && *pretregs & mST01)
3456: {
3457: c1 = push87();
3458: c1 = genfltreg(c1, 0xF20F11, XMM0-XMM0, 0); // MOVD floatreg, XMM0
3459: genfltreg(c1, 0xDD, 0, 0); // FLD double ptr floatreg
3460:
3461: c2 = push87();
3462: c2 = genfltreg(c2, 0xF20F11, XMM1-XMM0, 0); // MOV floatreg, XMM1
3463: genfltreg(c2, 0xDD, 0, 0); // FLD double ptr floatreg
3464:
3465: if (*pretregs & mPSW)
3466: c2 = genctst(c2,e,0); // FTST
3467: }
3468: else
3469: { if (*pretregs & mPSW)
3470: { if (!(retregs & mPSW))
3471: { assert(retregs & mST01);
3472: c1 = genctst(c1,e,!(*pretregs & mST01)); // FTST
3473: }
3474: }
3475: assert(!(*pretregs & mST01) || (retregs & mST01));
3476: }
3477: if (*pretregs & mST01)
3478: { note87(e,0,1);
3479: note87(e,sz/2,0);
3480: }
3481: return cat(c1,c2);
3482: }
3483:
3484: /*****************************************
3485: * Operators OPc_r and OPc_i
3486: */
3487:
3488: code *cdconvt87(elem *e, regm_t *pretregs)
3489: {
3490: regm_t retregs;
3491: code *c;
3492:
3493: retregs = mST01;
3494: c = codelem(e->E1, &retregs, FALSE);
3495: switch (e->Eoper)
3496: {
3497: case OPc_r:
3498: c = genf2(c,0xDD,0xD8 + 0); // FPOP
3499: pop87();
3500: break;
3501:
3502: case OPc_i:
3503: c = genf2(c,0xDD,0xD8 + 1); // FSTP ST(1)
3504: pop87();
3505: break;
3506:
3507: default:
3508: assert(0);
3509: }
3510: retregs = mST0;
3511: c = cat(c, fixresult87(e, retregs, pretregs));
3512: return c;
3513: }
3514:
3515: /**************************************
3516: * Load complex operand into ST01 or flags or both.
3517: */
3518:
3519: code *cload87(elem *e, regm_t *pretregs)
3520: #if __DMC__
3521: __in
3522: {
3523: assert(I32 && config.inline8087);
3524: elem_debug(e);
3525: assert(*pretregs & (mST01 | mPSW));
3526: assert(!(*pretregs & ~(mST01 | mPSW)));
3527: }
3528: __out (result)
3529: {
3530: }
3531: __body
3532: #endif
3533: {
3534: tym_t ty = tybasic(e->Ety);
3535: code *c = NULL;
3536: code *cpush = NULL;
3537: code cs;
3538: unsigned mf;
3539: unsigned sz;
3540: unsigned char ldop;
3541: regm_t retregs;
3542: int i;
3543:
3544: //printf("cload87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
3545: sz = tysize[ty] / 2;
3546: memset(&cs, 0, sizeof(cs));
3547: if (ADDFWAIT())
3548: cs.Iflags = CFwait;
3549: switch (ty)
3550: {
3551: case TYcfloat: mf = MFfloat; break;
3552: case TYcdouble: mf = MFdouble; break;
3553: case TYcldouble: break;
3554: default: assert(0);
3555: }
3556: switch (e->Eoper)
3557: {
3558: case OPvar:
3559: notreg(e); // never enregister this variable
3560: case OPind:
3561: cpush = cat(push87(), push87());
3562: switch (ty)
3563: {
3564: case TYcfloat:
3565: case TYcdouble:
3566: c = loadea(e,&cs,ESC(mf,1),0,0,0,0); // FLD var
3567: cs.IEVoffset1 += sz;
3568: c = gen(c, &cs);
3569: break;
3570:
3571: case TYcldouble:
3572: c = loadea(e,&cs,0xDB,5,0,0,0); // FLD var
3573: cs.IEVoffset1 += sz;
3574: c = gen(c, &cs);
3575: break;
3576:
3577: default:
3578: assert(0);
3579: }
3580: retregs = mST01;
3581: break;
3582:
3583: case OPd_ld:
3584: case OPld_d:
3585: case OPf_d:
3586: case OPd_f:
3587: c = cload87(e->E1, pretregs);
3588: freenode(e->E1);
3589: return c;
3590:
3591: case OPconst:
3592: cpush = cat(push87(), push87());
3593: for (i = 0; i < 2; i++)
3594: {
3595: ldop = loadconst(e, i);
3596: if (ldop)
3597: {
3598: c = genf2(c,0xD9,ldop); // FLDx
3599: }
3600: else
3601: {
3602: assert(0);
3603: }
3604: }
3605: retregs = mST01;
3606: break;
3607:
3608: default:
3609: #ifdef DEBUG
3610: elem_print(e);
3611: #endif
3612: assert(0);
3613: }
3614: return cat4(cpush,c,fixresult_complex87(e, retregs, pretregs), NULL);
3615: }
3616:
3617: #endif // !SPP
3618: