1: // Copyright (C) 1985-1998 by Symantec
   2: // Copyright (C) 2000-2011 by Digital Mars
   3: // All Rights Reserved
   4: // http://www.digitalmars.com
   5: // Written by Walter Bright
   6: /*
   7:  * This source file is made available for personal use
   8:  * only. The license is in /dmd/src/dmd/backendlicense.txt
   9:  * or /dm/src/dmd/backendlicense.txt
  10:  * For any other uses, please contact Digital Mars.
  11:  */
  12: 
  13: 
  14: #if (SCPP || MARS) && !HTOD
  15: 
  16: #include        <stdio.h>
  17: #include        <string.h>
  18: #include        <time.h>
  19: 
  20: #include        "cc.h"
  21: #include        "el.h"
  22: #include        "go.h"
  23: #include        "oper.h"
  24: #include        "global.h"
  25: #include        "type.h"
  26: 
  27: static char __file__[] = __FILE__;      /* for tassert.h                */
  28: #include        "tassert.h"
  29: 
  30: /*#define vec_copy(t,f) (dbg_printf("line %d\n",__LINE__),vec_copy((t),(f)))*/
  31: 
  32: extern mftype mfoptim;
  33: 
  34: struct Iv;
  35: 
  36: /*********************************
  37:  * Loop data structure.
  38:  */
  39: 
  40: struct loop
  41: {   loop *Lnext;        // Next loop in list (startloop -> start of list)
  42:     vec_t Lloop;        // Vector of blocks in this loop
  43:     vec_t Lexit;        // Vector of exit blocks of loop
  44:     block *Lhead;       // Pointer to header of loop
  45:     block *Ltail;       // Pointer to tail
  46:     block *Lpreheader;  // Pointer to preheader (if any)
  47:     list_t Llis;        // loop invariant elems moved to Lpreheader, so
  48:                         // redundant temporaries aren't created
  49:     Iv *Livlist;        // basic induction variables
  50:     Iv *Lopeqlist;      // list of other op= variables
  51:     void print();
  52:     static loop *mycalloc();
  53: 
  54:     static loop *freelist;
  55: };
  56: 
  57: struct famlist
  58: {       elem **FLpelem;         /* parent of elem in the family         */
  59:         elem *c1,*c2;           /* c1*(basic IV) + c2                   */
  60: #define FLELIM  ((symbol *)-1)
  61:         symbol *FLtemp;         // symbol index of temporary (FLELIM if */
  62:                                 /* this entry has no temporary)         */
  63:         tym_t FLty;             /* type of this induction variable      */
  64:         tym_t FLivty;           /* type of the basic IV elem (which is  */
  65:                                 /* not necessarilly the type of the IV  */
  66:                                 /* elem!)                               */
  67:         famlist *FLnext;        // next in list
  68:         void print();
  69: 
  70:     static famlist *mycalloc();
  71: 
  72:     static famlist *freelist;
  73: };
  74: 
  75: struct Iv
  76: {
  77:         symbol *IVbasic;        // symbol of basic IV
  78:         elem **IVincr;          // pointer to parent of IV increment elem
  79:         famlist *IVfamily;      // variables in this family
  80:         Iv *IVnext;             // next iv in list
  81:         void print();
  82: 
  83:     static Iv *mycalloc();
  84: 
  85:     static Iv *freelist;
  86: };
  87: 
  88: STATIC void freeloop(loop **pl);
  89: STATIC void buildloop(loop **pl, block *head, block *tail);
  90: STATIC void insert(block *b , vec_t lv);
  91: STATIC void movelis(elem *n,block *b,loop *l,int *pdomexit);
  92: STATIC int looprotate(loop *l);
  93: STATIC void markinvar(elem *n , vec_t rd);
  94: STATIC bool refs(symbol *v , elem *n , elem *nstop);
  95: STATIC void appendelem(elem *n , elem **pn);
  96: STATIC void freeivlist(Iv *biv);
  97: STATIC void unmarkall(elem *e);
  98: void filterrd(vec_t f,vec_t rd,symbol *s);
  99: STATIC void filterrdind(vec_t f,vec_t rd,elem *e);
 100: STATIC famlist * simfl(famlist *fl , tym_t tym);
 101: STATIC famlist * newfamlist(tym_t ty);
 102: STATIC void loopiv(loop *l);
 103: STATIC void findbasivs(loop *l);
 104: STATIC void findopeqs(loop *l);
 105: STATIC void findivfams(loop *l);
 106: STATIC void ivfamelems(Iv *biv , elem **pn);
 107: STATIC void elimfrivivs(loop *l);
 108: STATIC void intronvars(loop *l);
 109: STATIC bool funcprev(Iv *biv , famlist *fl);
 110: STATIC void elimbasivs(loop *l);
 111: STATIC void elimopeqs(loop *l);
 112: STATIC famlist * flcmp(famlist *f1 , famlist *f2);
 113: STATIC elem ** onlyref(symbol *x , loop *l , elem *incn , int *prefcount);
 114: STATIC void countrefs(elem **pn , bool flag);
 115: STATIC int countrefs2(elem *e);
 116: STATIC void elimspec(loop *l);
 117: STATIC void elimspecwalk(elem **pn);
 118: 
 119: static  bool addblk;                    /* if TRUE, then we added a block */
 120: 
 121: /* is elem loop invariant?      */
 122: #define isLI(n) ((n)->Nflags & NFLli)
 123: 
 124: /* make elem loop invariant     */
 125: #define makeLI(n) ((n)->Nflags |= NFLli)
 126: 
 127: /******************************
 128:  * UNAMBIG being defined means that:
 129:  *      Only variables that could only be unambiguously defined
 130:  *      are candidates for loop invariant removal and induction
 131:  *      variables.
 132:  *      This means only variables that have the SFLunambig flag
 133:  *      set for them.
 134:  *      Doing this will still cover 90% (I hope) of the cases, and
 135:  *      is a lot faster to compute.
 136:  */
 137: 
 138: #define UNAMBIG 1
 139: 
 140: /****************************
 141:  */
 142: 
 143: void famlist::print()
 144: {
 145: #ifdef DEBUG
 146:     dbg_printf("famlist:\n");
 147:     dbg_printf("*FLpelem:\n");
 148:     elem_print(*FLpelem);
 149:     dbg_printf("c1:");
 150:     elem_print(c1);
 151:     dbg_printf("c2:");
 152:     elem_print(c2);
 153:     dbg_printf("FLty = "); WRTYxx(FLty);
 154:     dbg_printf("\nFLivty = "); WRTYxx(FLivty);
 155:     dbg_printf("\n");
 156: #endif
 157: }
 158: 
 159: 
 160: /****************************
 161:  */
 162: 
 163: void Iv::print()
 164: {
 165: #ifdef DEBUG
 166:     dbg_printf("IV: '%s'\n",IVbasic->Sident);
 167:     dbg_printf("*IVincr:\n");
 168:     elem_print(*IVincr);
 169: #endif
 170: }
 171: 
 172: /***********************
 173:  * Write loop.
 174:  */
 175: 
 176: void loop::print()
 177: {
 178: #ifdef DEBUG
 179:   loop *l = this;
 180:   dbg_printf("loop %p, next = %p\n",l,(l) ? l->Lnext : (loop *) NULL);
 181:   if (!l)
 182:         return;
 183:   dbg_printf("\thead: B%d, tail: B%d, prehead: B%d\n",l->Lhead->Bdfoidx,
 184:         l->Ltail->Bdfoidx,(l->Lpreheader ) ? l->Lpreheader->Bdfoidx :
 185:                                                         (unsigned)-1);
 186:   dbg_printf("\tLloop "); vec_println(l->Lloop);
 187:   dbg_printf("\tLexit "); vec_println(l->Lexit);
 188: #endif
 189: }
 190: 
 191: /***************************
 192:  * Allocate loop.
 193:  */
 194: 
 195: loop *loop::freelist = NULL;
 196: 
 197: loop *loop::mycalloc()
 198: {   loop *l;
 199: 
 200:     if (freelist)
 201:     {
 202:         l = freelist;
 203:         freelist = l->Lnext;
 204:         memset(l,0,sizeof(loop));
 205:     }
 206:     else
 207:         l = (loop *) mem_calloc(sizeof(loop));
 208:     return l;
 209: }
 210: 
 211: /*************
 212:  * Free loops.
 213:  */
 214: 
 215: STATIC void freeloop(loop **pl)
 216: { loop *ln;
 217:   loop *l;
 218: 
 219:   for (l = *pl; l; l = ln)
 220:   {     ln = l->Lnext;
 221:         vec_free(l->Lloop);
 222:         vec_free(l->Lexit);
 223:         list_free(&l->Llis);
 224:         l->Lnext = loop::freelist;
 225:         loop::freelist = l;
 226:   }
 227:   *pl = NULL;
 228: }
 229: 
 230: /**********************************
 231:  * Initialize block information.
 232:  * Returns:
 233:  *      !=0     contains BCasm block
 234:  */
 235: 
 236: int blockinit()
 237: { register unsigned i;
 238:   register block *b;
 239:   int hasasm = 0;
 240: 
 241:   assert(dfo);
 242:   for (i = 0, b = startblock; b; i++, b = b->Bnext)
 243:   {
 244: #ifdef DEBUG                    /* check integrity of Bpred and Bsucc   */
 245:         register list_t blp;
 246: 
 247:         for (blp = b->Bpred; blp; blp = list_next(blp))
 248:         {       register list_t bls;
 249: 
 250:                 for (bls = list_block(blp)->Bsucc; bls; bls = list_next(bls))
 251:                         if (list_block(bls) == b)
 252:                                 goto L1;
 253:                 assert(0);
 254:             L1: ;
 255:         }
 256: #endif
 257:         if (b->BC == BCasm)
 258:             hasasm = 1;
 259:         ;                               /* compute number of blocks     */
 260:   }
 261:   assert(numblks == i && maxblks);
 262:   assert(i <= maxblks);
 263:   for (i = 0; i < dfotop; i++)
 264:   {     assert(dfo[i]->Bdfoidx == i);
 265:         if (!dfo[i]->Bdom)
 266:                 dfo[i]->Bdom = vec_calloc(maxblks); /* alloc Bdom vectors */
 267:   }
 268:   return hasasm;
 269: }
 270: 
 271: /****************************************
 272:  * Compute dominators (Bdom) for each block.
 273:  * See Aho & Ullman Fig. 13.5.
 274:  * Note that flow graph is reducible if there is only one
 275:  * pass through the loop.
 276:  * Input:
 277:  *      dfo[]
 278:  * Output:
 279:  *      fills in the Bdom vector for each block
 280:  */
 281: 
 282: void compdom()
 283: { unsigned i;
 284:   unsigned cntr;
 285:   vec_t t1;
 286:   list_t bl;
 287:   bool chgs;
 288:   block *sb;
 289: 
 290:   assert(dfo);
 291:   sb = dfo[0];                          // starting block
 292:   t1 = vec_calloc(vec_numbits(sb->Bdom));       // allocate a temporary
 293:   vec_clear(sb->Bdom);
 294:   vec_setbit(0,sb->Bdom);               // starting block only doms itself
 295:   for (i = 1; i < dfotop; i++)          // for all except startblock
 296:         vec_set(dfo[i]->Bdom);          // dominate all blocks
 297:   cntr = 0;                             // # of times thru loop
 298:   do
 299:   {     chgs = FALSE;
 300:         for (i = 1; i < dfotop; ++i)    // for each block in dfo[]
 301:         {                               // except startblock
 302:                 bl = dfo[i]->Bpred;
 303:                 if (bl)                 // if there are predecessors
 304:                 {       vec_copy(t1,list_block(bl)->Bdom);
 305:                         while ((bl = list_next(bl)) != NULL)
 306:                             vec_andass(t1,list_block(bl)->Bdom);
 307:                 }
 308:                 else
 309:                         vec_clear(t1);  // no predecessors to dominate
 310:                 vec_setbit(i,t1);       // each block doms itself
 311:                 if (chgs)
 312:                         vec_copy(dfo[i]->Bdom,t1);
 313:                 else if (!vec_equal(dfo[i]->Bdom,t1))   // if any changes
 314:                 {       vec_copy(dfo[i]->Bdom,t1);
 315:                         chgs = TRUE;
 316:                 }
 317:         }
 318:         cntr++;
 319:         assert(cntr < 50);              // should have converged by now
 320:   } while (chgs);
 321:   vec_free(t1);
 322:   if (cntr <= 2)
 323:         cmes("Flow graph is reducible\n");
 324:   else
 325:         cmes("Flow graph is not reducible\n");
 326: }
 327: 
 328: /***************************
 329:  * Return !=0 if block A dominates block B.
 330:  */
 331: 
 332: HINT dom(block *A,block *B)
 333: {
 334:   assert(A && B && dfo && dfo[A->Bdfoidx] == A);
 335:   return vec_testbit(A->Bdfoidx,B->Bdom);
 336: }
 337: 
 338: /**********************
 339:  * Find all the loops.
 340:  */
 341: 
 342: STATIC void findloops(loop **ploops)
 343: { unsigned i;
 344:   list_t bl;
 345:   block *b,*s;
 346: 
 347:   freeloop(ploops);
 348: 
 349:   //printf("findloops()\n");
 350:   for (i = 0; i < dfotop; i++)
 351:         dfo[i]->Bweight = 1;            /* reset Bweights               */
 352:   for (i = dfotop; i--;)                /* for each block (note reverse */
 353:                                         /* dfo order, so most nested    */
 354:                                         /* loops are found first)       */
 355:   {     b = dfo[i];
 356:         assert(b);
 357:         for (bl = b->Bsucc; bl; bl = list_next(bl))
 358:         {       s = list_block(bl);             /* each successor s to b */
 359:                 assert(s);
 360:                 if (dom(s,b))                   /* if s dominates b     */
 361:                     buildloop(ploops,s,b);      // we found a loop
 362:         }
 363:   }
 364: 
 365: #ifdef DEBUG
 366:   if (debugc)
 367:   { loop *l;
 368: 
 369:     for (l = *ploops; l; l = l->Lnext)
 370:     {
 371:         l->print();
 372:     }
 373:   }
 374: #endif
 375: }
 376: 
 377: /********************************
 378:  */
 379: 
 380: STATIC void loop_weight(block *b,int factor)
 381: {
 382:     // Be careful not to overflow
 383:     if (b->Bweight < 0x10000)
 384:         b->Bweight *= 10 * factor;
 385:     else if (b->Bweight < 0x100000)
 386:         b->Bweight *= 2 * factor;
 387:     else
 388:         b->Bweight += factor;
 389: }
 390: 
 391: /*****************************
 392:  * Construct natural loop.
 393:  * Algorithm 13.1 from Aho & Ullman.
 394:  * Note that head dom tail.
 395:  */
 396: 
 397: STATIC void buildloop(loop **ploops,block *head,block *tail)
 398: { loop *l;
 399:   unsigned i;
 400:   list_t bl;
 401: 
 402:   //printf("buildloop()\n");
 403:   /* See if this is part of an existing loop. If so, merge the two.     */
 404:   for (l = *ploops; l; l = l->Lnext)
 405:         if (l->Lhead == head)           /* two loops with same header   */
 406:         {
 407:             vec_t v;
 408: 
 409:             // Calculate loop contents separately so we get the Bweights
 410:             // done accurately.
 411: 
 412:             v = vec_calloc(maxblks);
 413:             vec_setbit(head->Bdfoidx,v);
 414:             loop_weight(head,1);
 415:             insert(tail,v);
 416: 
 417:             vec_orass(l->Lloop,v);      // merge into existing loop
 418:             vec_free(v);
 419: 
 420:             vec_clear(l->Lexit);        // recompute exit blocks
 421:             goto L1;
 422:         }
 423: 
 424:   /* Allocate loop entry        */
 425:   l = loop::mycalloc();
 426:   l->Lnext = *ploops;
 427:   *ploops = l;                          // put l at beginning of list
 428: 
 429:   l->Lloop = vec_calloc(maxblks);       /* allocate loop bit vector     */
 430:   l->Lexit = vec_calloc(maxblks);       /* bit vector for exit blocks   */
 431:   l->Lhead = head;
 432:   l->Ltail = tail;
 433: 
 434:   vec_setbit(head->Bdfoidx,l->Lloop);   /* add head to the loop         */
 435:   loop_weight(head,2);                  // *20 usage for loop header
 436: 
 437:   insert(tail,l->Lloop);                /* insert tail in loop          */
 438: 
 439: L1:
 440:   /* Find all the exit blocks (those blocks with
 441:    * successors outside the loop).
 442:    */
 443: 
 444:   foreach (i,dfotop,l->Lloop)           /* for each block in this loop  */
 445:   {     if (dfo[i]->BC == BCret || dfo[i]->BC == BCretexp || dfo[i]->BC == BCexit)
 446:                 vec_setbit(i,l->Lexit); /* ret blocks are exit blocks */
 447:         else
 448:         {       for (bl = dfo[i]->Bsucc; bl; bl = list_next(bl))
 449:                         if (!vec_testbit(list_block(bl)->Bdfoidx,l->Lloop))
 450:                         {       vec_setbit(i,l->Lexit);
 451:                                 break;
 452:                         }
 453:         }
 454:   }
 455: 
 456:     /*  Find preheader, if any, to the loop.
 457:         The preheader is a block that has only the head as a successor.
 458:         All other predecessors of head must be inside the loop.
 459:      */
 460:     l->Lpreheader = NULL;
 461:     for (bl = head->Bpred; bl; bl = list_next(bl))
 462:     {   block *b = list_block(bl);
 463: 
 464:         if (!vec_testbit(b->Bdfoidx,l->Lloop))  /* if not in loop       */
 465:         {   if (l->Lpreheader)                  /* if already one       */
 466:             {   l->Lpreheader = NULL;           /* can only be one      */
 467:                 break;
 468:             }
 469:             else
 470:             {   if (list_next(b->Bsucc))        // if more than 1 successor
 471:                     break;                      // b can't be a preheader
 472:                 l->Lpreheader = b;
 473:             }
 474:         }
 475:     }
 476: }
 477: 
 478: /********************************
 479:  * Support routine for buildloop().
 480:  * Add a block b and all its predecessors to loop lv.
 481:  */
 482: 
 483: STATIC void insert(register block *b, register vec_t lv)
 484: { register list_t bl;
 485: 
 486:   assert(b && lv);
 487:   if (!vec_testbit(b->Bdfoidx,lv))      /* if block is not in loop      */
 488:   {     vec_setbit(b->Bdfoidx,lv);      /* add block to loop            */
 489:         loop_weight(b,1);               // *10 usage count
 490:         for (bl = b->Bpred; bl; bl = list_next(bl))
 491:             insert(list_block(bl),lv);  /* insert all its predecessors  */
 492:   }
 493: }
 494: 
 495: /**************************************
 496:  * Perform loop rotations.
 497:  * Loop starts as:
 498:  *
 499:  *         prehead
 500:  *          |
 501:  *          v
 502:  *      +->head---->
 503:  *      |   |
 504:  *      |   v
 505:  *      |  body
 506:  *      |   |
 507:  *      |   v
 508:  *      +--tail
 509:  *
 510:  * Two types are done:
 511:  *      1) Header is moved to be past the tail.
 512:  *
 513:  *         prehead
 514:  *          |
 515:  *      +---+
 516:  *      |
 517:  *      |  body<-+
 518:  *      |   |    |
 519:  *      |   v    |
 520:  *      |  tail  |
 521:  *      |   |    |
 522:  *      |   v    |
 523:  *      +->head--+
 524:  *          |
 525:  *          v
 526:  *
 527:  *      2) Header is copied past the tail (done only if MFtime is set).
 528:  *
 529:  *         prehead
 530:  *          |
 531:  *          v
 532:  *         head1-----+
 533:  *          |        |
 534:  *          v        |
 535:  *         body<--+  |
 536:  *          |     |  |
 537:  *          v     |  |
 538:  *         tail   |  |
 539:  *          |     |  |
 540:  *          v     |  |
 541:  *         head2--+  |
 542:  *          |        |
 543:  *          +--------+
 544:  *          v
 545:  *
 546:  * Input:
 547:  *      Loop information (do not depend on the preheader information)
 548:  * Output:
 549:  *      Revised list of blocks, a new dfo and new loop information
 550:  * Returns:
 551:  *      TRUE need to recompute loop data
 552:  */
 553: 
 554: STATIC int looprotate(loop *l)
 555: {
 556:     register    block *tail = l->Ltail;
 557:     register    block *head = l->Lhead;
 558:     register    block *b;
 559: 
 560:     //printf("looprotate(%p)\n",l);
 561: 
 562:     // Do not rotate loop if:
 563:     if (head == tail ||                         // loop is only one block big
 564:         !vec_testbit(head->Bdfoidx,l->Lexit))   // header is not an exit block
 565:         goto Lret;
 566: 
 567:     if (//iter != 1 &&
 568:         vec_testbit(tail->Bdfoidx,l->Lexit))    // tail is an exit block
 569:         goto Lret;
 570: 
 571:     // Do not rotate if already rotated
 572:     for (b = tail->Bnext; b; b = b->Bnext)
 573:         if (b == head)                  // if loop already rotated
 574:             goto Lret;
 575: 
 576: #if SCPP
 577:     if (head->BC == BCtry)
 578:          goto Lret;
 579: #endif
 580:     if (head->BC == BC_try)
 581:          goto Lret;
 582: #ifdef DEBUG
 583:     //if (debugc) { dbg_printf("looprotate: "); l->print(); }
 584: #endif
 585: 
 586:     if (
 587: #if !TX86
 588:        // too costly overall - Slows compiler by 5%-10%,
 589:        // excess code generated, little or no time saving
 590:        0 && config.optimized &&
 591: #endif
 592:        (mfoptim & MFtime) && head->BC != BCswitch && head->BC != BCasm)
 593:     {   // Duplicate the header past the tail (but doing
 594:         // switches would be too expensive in terms of code
 595:         // generated).
 596:         register    block *head2;
 597:         register    list_t bl, *pbl2, *pbl, *pbln;
 598: 
 599:         head2 = block_calloc(); // create new head block
 600:         numblks++;                      // number of blocks in existence
 601:         head2->Btry = head->Btry;
 602: #if 1
 603:         head2->Bflags = head->Bflags;
 604:         head->Bflags = BFLnomerg;       // move flags over to head2
 605:         head2->Bflags |= BFLnomerg;
 606: #else
 607:         head->Bflags = 0;               // move flags over to head2
 608: #endif
 609:         head2->BC = head->BC;
 610:         assert(head2->BC != BCswitch);
 611:         if (head->Belem)                // copy expression tree
 612:             head2->Belem = el_copytree(head->Belem);
 613:         head2->Bnext = tail->Bnext;
 614:         tail->Bnext = head2;
 615: 
 616:         // pred(head1) = pred(head) outside loop
 617:         // pred(head2) = pred(head) inside loop
 618:         pbl2 = &(head2->Bpred);
 619:         for (pbl = &(head->Bpred); *pbl; pbl = pbln)
 620:         {
 621:             if (vec_testbit(list_block(*pbl)->Bdfoidx, l->Lloop))
 622:             {   // if this predecessor is inside the loop
 623: 
 624:                 *pbl2 = *pbl;
 625:                 *pbl = list_next(*pbl);
 626:                 pbln = pbl;                     // don't skip this next one
 627:                 list_next(*pbl2) = NULL;
 628:                 bl = list_block(*pbl2)->Bsucc;
 629:                 pbl2 = &(list_next(*pbl2));
 630:                 for (; bl; bl = list_next(bl))
 631:                     if (list_block(bl) == head)
 632:                     {
 633:                         list_ptr(bl) = (void *)head2;
 634:                         goto L2;
 635:                     }
 636:                 assert(0);
 637:         L2:     ;
 638:             }
 639:             else
 640:                 pbln = &(list_next(*pbl));      // next predecessor in list
 641:         } // for each pred(head)
 642: 
 643:         // succ(head2) = succ(head)
 644:         for (bl = head->Bsucc; bl; bl = list_next(bl))
 645:         {
 646:             list_append(&(head2->Bsucc),list_block(bl));
 647:             list_append(&(list_block(bl)->Bpred),head2);
 648:         }
 649:         changes++;
 650:         return TRUE;
 651:     }
 652:     else if (startblock != head
 653:             /* This screws up the OPctor/OPdtor sequence for:
 654:              *   struct CString
 655:              *   {   CString();
 656:              *      ~CString();
 657:              *      int GetLength();
 658:              *   };
 659:              *
 660:              *   void f(void)
 661:              *   {  for(;;)
 662:              *      {   CString s ;
 663:              *    if(s.GetLength()!=0)
 664:              *       break ;
 665:              *      }
 666:              *   }
 667:              */
 668:             && !(config.flags3 & CFG3eh)
 669:             )
 670:     {   // optimize for space
 671:         // Simply position the header past the tail
 672:         for (b = startblock; b; b = b->Bnext)
 673:             if (b->Bnext == head)
 674:                 goto L1;                // found parent b of head
 675:         assert(0);
 676: 
 677:     L1:
 678:         b->Bnext = head->Bnext;
 679:         head->Bnext = tail->Bnext;
 680:         tail->Bnext = head;
 681:         cmes2( "Rotated loop %p\n", l);
 682:         changes++;
 683:     }
 684: Lret:
 685:     return FALSE;
 686: }
 687: 
 688: static int gref;                // parameter for markinvar()
 689: static block *gblock;           // parameter for markinvar()
 690: static vec_t lv;                // parameter for markinvar()
 691: static vec_t gin;               // parameter for markinvar()
 692: static bool doflow;             // TRUE if flow analysis has to be redone
 693: 
 694: /*********************************
 695:  * Loop invariant and induction variable elimination.
 696:  * Input:
 697:  *      iter    which optimization iteration we are on
 698:  */
 699: 
 700: void loopopt()
 701: {
 702:     list_t bl;
 703:     loop *l;
 704:     loop *ln;
 705:     vec_t rd;
 706:     loop *startloop;
 707: 
 708:     cmes("loopopt()\n");
 709:     startloop = NULL;
 710: restart:
 711:     file_progress();
 712:     if (blockinit())                    // init block data
 713:     {
 714:         findloops(&startloop);          // Compute Bweights
 715:         freeloop(&startloop);           // free existing loops
 716:         return;                         // can't handle ASM blocks
 717:     }
 718:     compdom();                          // compute dominators
 719:     findloops(&startloop);              // find the loops
 720: 
 721:     for (l = startloop; l; l = ln)
 722:     {
 723:         ln = l->Lnext;
 724:         if (looprotate(l))              // rotate the loop
 725:         {
 726:             compdfo();
 727:             blockinit();
 728:             compdom();
 729:             findloops(&startloop);      // may trash l->Lnext
 730:             if (ln)
 731:             {   ln = startloop;         // start over
 732:                 file_progress();
 733:             }
 734:         }
 735:     }
 736:     // Make sure there is a preheader for each loop.
 737: 
 738:     addblk = FALSE;                     /* assume no blocks added        */
 739:     for (l = startloop; l; l = l->Lnext)/* for each loop                 */
 740:     {
 741: #ifdef DEBUG
 742:         //if (debugc) l->print();
 743: #endif
 744:         if (!l->Lpreheader)             /* if no preheader               */
 745:         {   register block *h, *p;
 746: 
 747:             cmes("Generating preheader for loop\n");
 748:             addblk = TRUE;              /* add one                       */
 749:             p = block_calloc();         // the preheader
 750:             numblks++;
 751:             assert (numblks <= maxblks);
 752:             h = l->Lhead;               /* loop header                   */
 753: 
 754:             /* Find parent of h */
 755:             if (h == startblock)
 756:                 startblock = p;
 757:             else
 758:             {   register block *ph;
 759: 
 760:                 for (ph = startblock; 1; ph = ph->Bnext)
 761:                 {   assert(ph);         /* should have found it         */
 762:                     if (ph->Bnext == h)
 763:                             break;
 764:                 }
 765:                 /* Link p into block list between ph and h      */
 766:                 ph->Bnext = p;
 767:             }
 768:             p->Bnext = h;
 769: 
 770:             l->Lpreheader = p;
 771:             p->BC = BCgoto;
 772:             assert(p->Bsucc == NULL);
 773:             list_append(&(p->Bsucc),h); /* only successor is h          */
 774:             p->Btry = h->Btry;
 775: 
 776:             cmes3("Adding preheader %p to loop %p\n",p,l);
 777: 
 778:             // Move preds of h that aren't in the loop to preds of p
 779:             for (bl = h->Bpred; bl;)
 780:             {   register block *b = list_block(bl);
 781: 
 782:                 if (!vec_testbit (b->Bdfoidx, l->Lloop))
 783:                 {   register list_t bls;
 784: 
 785:                     list_append(&(p->Bpred), b);
 786:                     list_subtract(&(h->Bpred), b);
 787:                     bl = h->Bpred;      /* dunno what subtract did      */
 788: 
 789:                     /* Fix up successors of predecessors        */
 790:                     for (bls = b->Bsucc; bls; bls = list_next(bls))
 791:                         if (list_block(bls) == h)
 792:                                 list_ptr(bls) = (void *)p;
 793:                 }
 794:                 else
 795:                     bl = list_next(bl);
 796:             }
 797:             list_append(&(h->Bpred),p); /* p is a predecessor to h      */
 798:         }
 799:     } /* for */
 800:     if (addblk)                         /* if any blocks were added      */
 801:     {
 802:         compdfo();                      /* compute depth-first order    */
 803:         blockinit();
 804:         compdom();
 805:         findloops(&startloop);          // recompute block info
 806:         addblk = FALSE;
 807:     }
 808: 
 809:     /* Do the loop optimizations. Note that accessing the loops */
 810:     /* starting from startloop will access them in least nested */
 811:     /* one first, thus moving LIs out as far as possible.       */
 812: 
 813:     doflow = TRUE;                      /* do flow analysis             */
 814:     cmes("Starting loop invariants\n");
 815: 
 816:     for (l = startloop; l; l = l->Lnext)
 817:     {   register unsigned i,j;
 818: 
 819: #ifdef DEBUG
 820:         //if (debugc) l->print();
 821: #endif
 822:         file_progress();
 823:         assert(l->Lpreheader);
 824:         if (doflow)
 825:         {
 826:                 flowrd();               /* compute reaching definitions  */
 827:                 flowlv();               /* compute live variables        */
 828:                 flowae();               // compute available expressions
 829:                 doflow = FALSE;         /* no need to redo it           */
 830:                 if (deftop == 0)        /* if no definition elems       */
 831:                         break;          /* no need to optimize          */
 832:         }
 833:         lv = l->Lloop;
 834:         cmes2("...Loop %p start...\n",l);
 835: 
 836:         /* Unmark all elems in this loop         */
 837:         foreach (i,dfotop,lv)
 838:             if (dfo[i]->Belem)
 839:                 unmarkall(dfo[i]->Belem);       /* unmark all elems     */
 840: 
 841:         /* Find & mark all LIs   */
 842:         gin = vec_clone(l->Lpreheader->Bout);
 843:         rd = vec_calloc(deftop);        /* allocate our running RD vector */
 844:         foreach (i,dfotop,lv)           /* for each block in loop       */
 845:         {   block *b = dfo[i];
 846: 
 847:             cmes2("B%d\n",i);
 848:             if (b->Belem)
 849:             {
 850:                 vec_copy(rd, b->Binrd); // IN reaching defs
 851: #if 0
 852:                 dbg_printf("i = %d\n",i);
 853:                 {   int j;
 854:                     for (j = 0; j < deftop; j++)
 855:                         elem_print(defnod[j].DNelem);
 856:                 }
 857:                 dbg_printf("rd    : "); vec_println(rd);
 858: #endif
 859:                 gblock = b;
 860:                 gref = 0;
 861:                 if (b != l->Lhead)
 862:                     gref = 1;
 863:                 markinvar(b->Belem, rd);
 864: #if 0
 865:                 dbg_printf("i = %d\n",i);
 866:                 {   int j;
 867:                     for (j = 0; j < deftop; j++)
 868:                         elem_print(defnod[j].DNelem);
 869:                 }
 870:                 dbg_printf("rd    : "); vec_println(rd);
 871:                 dbg_printf("Boutrd: "); vec_println(b->Boutrd);
 872: #endif
 873:                 assert(vec_equal(rd, b->Boutrd));
 874:             }
 875:             else
 876:                 assert(vec_equal(b->Binrd, b->Boutrd));
 877:         }
 878:         vec_free(rd);
 879:         vec_free(gin);
 880: 
 881:         /* Move loop invariants  */
 882:         foreach (i,dfotop,lv)
 883:         {
 884:             int domexit;                // TRUE if this block dominates all
 885:                                         // exit blocks of the loop
 886: 
 887:             foreach (j,dfotop,l->Lexit) /* for each exit block  */
 888:             {
 889:                     if (!vec_testbit (i, dfo[j]->Bdom))
 890:                     {   domexit = 0;
 891:                         goto L1;                // break if !(i dom j)
 892:                     }
 893:             }
 894:             // if i dom (all exit blocks)
 895:             domexit = 1;
 896:         L1:     ;
 897:             if (dfo[i]->Belem)
 898:             {   // If there is any hope of making an improvement
 899:                 if (domexit || l->Llis)
 900:                 {   if (dfo[i] != l->Lhead)
 901:                         ; //domexit |= 2;
 902:                     movelis(dfo[i]->Belem, dfo[i], l, &domexit);
warning C4390: ';' : empty controlled statement found; is this the intent?
903: } 904: } 905: } 906: //list_free(&l->Llis,FPNULL); 907: cmes2("...Loop %p done...\n",l); 908: 909: #if TX86 910: if (mfoptim & MFliv) 911: #else 912: if (config.optimized && (mfoptim & MFliv)) 913: #endif 914: { loopiv(l); /* induction variables */ 915: if (addblk) /* if we added a block */ 916: { compdfo(); 917: goto restart; /* play it safe and start over */ 918: } 919: } 920: } /* for */ 921: freeloop(&startloop); 922: } 923: 924: /***************************** 925: * If elem is loop invariant, mark it. 926: * Input: 927: * lv = vector of all the blocks in this loop. 928: * rd = vector of loop invariants for this elem. This must be 929: * continually updated. 930: * Note that we do not iterate until no more LIs are found. The only 931: * thing this would buy us is stuff that depends on LI assignments. 932: */ 933: 934: STATIC void markinvar(elem *n,vec_t rd) 935: { vec_t tmp; 936: unsigned i; 937: symbol *v; 938: elem *n1; 939: 940: assert(n && rd); 941: assert(vec_numbits(rd) == deftop); 942: switch (n->Eoper) 943: { 944: case OPaddass: case OPminass: case OPmulass: case OPandass: 945: case OPorass: case OPxorass: case OPdivass: case OPmodass: 946: case OPshlass: case OPshrass: case OPashrass: 947: case OPpostinc: case OPpostdec: 948: case OPcall: 949: markinvar(n->E2,rd); 950: case OPnegass: 951: n1 = n->E1; 952: if (n1->Eoper == OPind) 953: markinvar(n1->E1,rd); 954: else if (OTbinary(n1->Eoper)) 955: { markinvar(n1->E1,rd); 956: markinvar(n1->E2,rd); 957: } 958: L2: 959: if (n->Eoper == OPcall || 960: gblock->Btry || 961: !(n1->Eoper == OPvar && 962: symbol_isintab(n1->EV.sp.Vsym))) 963: { 964: gref = 1; 965: } 966: 967: updaterd(n,rd,NULL); 968: break; 969: 970: case OPcallns: 971: markinvar(n->E2,rd); 972: markinvar(n->E1,rd); 973: break; 974: 975: #if TX86 976: case OPstrcpy: 977: case OPstrcat: 978: case OPmemcpy: 979: case OPmemset: 980: markinvar(n->E2,rd); 981: markinvar(n->E1,rd); 982: updaterd(n,rd,NULL); 983: break; 984: case OPbtc: 985: case OPbtr: 986: case OPbts: 987: markinvar(n->E1,rd); 988: markinvar(n->E2,rd); 989: updaterd(n,rd,NULL); 990: break; 991: #endif 992: case OPucall: 993: markinvar(n->E1,rd); 994: /* FALL-THROUGH */ 995: case OPasm: 996: gref = 1; 997: updaterd(n,rd,NULL); 998: break; 999: 1000: case OPucallns: 1001: case OPstrpar: 1002: case OPstrctor: 1003: #if TX86 1004: case OPvoid: 1005: case OPstrlen: 1006: case OPinp: 1007: #endif 1008: markinvar(n->E1,rd); 1009: break; 1010: case OPcond: 1011: case OPparam: 1012: #if TX86 1013: case OPoutp: 1014: case OPstrcmp: 1015: case OPmemcmp: 1016: case OPbt: // OPbt is like OPind, assume not LI 1017: #endif 1018: markinvar(n->E1,rd); 1019: markinvar(n->E2,rd); 1020: break; 1021: case OPandand: 1022: case OPoror: 1023: markinvar(n->E1,rd); 1024: tmp = vec_clone(rd); 1025: markinvar(n->E2,tmp); 1026: vec_orass(rd,tmp); /* rd |= tmp */ 1027: vec_free(tmp); 1028: break; 1029: case OPcolon: 1030: case OPcolon2: 1031: tmp = vec_clone(rd); 1032: markinvar(n->E1,rd); 1033: markinvar(n->E2,tmp); 1034: vec_orass(rd,tmp); /* rd |= tmp */ 1035: vec_free(tmp); 1036: break; 1037: case OPaddr: // mark addresses of OPvars as LI 1038: markinvar(n->E1,rd); 1039: if (n->E1->Eoper == OPvar || isLI(n->E1)) 1040: makeLI(n); 1041: break; 1042: case OPmsw: 1043: case OPneg: case OPbool: case OPnot: case OPcom: 1044: case OPshtlng: case OPd_s32: case OPs32_d: 1045: case OPdblint: case OPs16_d: case OPd_f: case OPf_d: 1046: case OPlngsht: case OPu8int: 1047: case OPld_d: case OPd_ld: 1048: case OPld_u64: 1049: case OPc_r: case OPc_i: 1050: case OParraylength: 1051: case OPnullcheck: 1052: #if TX86 1053: case OPu16_32: 1054: #else 1055: case OPunslng: 1056: #endif 1057: case OPu16_d: case OPdbluns: 1058: case OPs8int: case OPint8: 1059: case OPd_u32: case OPu32_d: 1060: 1061: #if LONGLONG 1062: case OPlngllng: case OPu32_64: 1063: case OP64_32: 1064: case OPd_s64: case OPd_u64: 1065: case OPs64_d: 1066: case OPu64_d: 1067: case OP128_64: 1068: case OPs64_128: 1069: case OPu64_128: 1070: #endif 1071: case OPabs: 1072: case OPsqrt: 1073: case OPrndtol: 1074: case OPsin: 1075: case OPcos: 1076: case OPrint: 1077: #if TX86 1078: case OPvptrfptr: /* BUG for MacHandles */ 1079: case OPtofar16: case OPfromfar16: case OPoffset: case OPptrlptr: 1080: case OPcvptrfptr: 1081: case OPsetjmp: 1082: case OPbsf: 1083: case OPbsr: 1084: case OPbswap: 1085: #endif 1086: markinvar(n->E1,rd); 1087: if (isLI(n->E1)) /* if child is LI */ 1088: makeLI(n); 1089: break; 1090: case OPeq: 1091: case OPstreq: 1092: markinvar(n->E2,rd); 1093: n1 = n->E1; 1094: markinvar(n1,rd); 1095: 1096: /* Determine if assignment is LI. Conditions are: */ 1097: /* 1) Rvalue is LI */ 1098: /* 2) Lvalue is a variable (simplifies things a lot) */ 1099: /* 3) Lvalue can only be affected by unambiguous defs */ 1100: /* 4) No rd's of lvalue that are within the loop (other */ 1101: /* than the current def) */ 1102: if (isLI(n->E2) && n1->Eoper == OPvar) /* 1 & 2 */ 1103: { v = n1->EV.sp.Vsym; 1104: #if UNAMBIG 1105: if (v->Sflags & SFLunambig) 1106: #endif 1107: { 1108: tmp = vec_calloc(deftop); 1109: //filterrd(tmp,rd,v); 1110: listrds(rd,n1,tmp); 1111: foreach (i,deftop,tmp) 1112: if (defnod[i].DNelem != n && 1113: vec_testbit(defnod[i].DNblock->Bdfoidx,lv)) 1114: goto L3; 1115: makeLI(n); // then the def is LI 1116: L3: vec_free(tmp); 1117: } 1118: } 1119: goto L2; 1120: 1121: case OPadd: case OPmin: case OPmul: case OPand: 1122: case OPor: case OPxor: case OPdiv: case OPmod: 1123: case OPshl: case OPshr: case OPeqeq: case OPne: 1124: case OPlt: case OPle: case OPgt: case OPge: 1125: case OPashr: 1126: case OPror: case OProl: 1127: 1128: case OPunord: case OPlg: case OPleg: case OPule: 1129: case OPul: case OPuge: case OPug: case OPue: 1130: case OPngt: case OPnge: case OPnlt: case OPnle: 1131: case OPord: case OPnlg: case OPnleg: case OPnule: 1132: case OPnul: case OPnuge: case OPnug: case OPnue: 1133: 1134: case OPinstanceof: 1135: case OPfinalinstanceof: 1136: case OPcheckcast: 1137: case OPcomma: 1138: case OPpair: 1139: case OPrpair: 1140: case OPscale: 1141: case OPremquo: 1142: case OPyl2x: 1143: case OPyl2xp1: 1144: markinvar(n->E1,rd); 1145: markinvar(n->E2,rd); 1146: if (isLI(n->E2) && isLI(n->E1)) 1147: makeLI(n); 1148: break; 1149: 1150: case OPind: /* must assume this is not LI */ 1151: markinvar(n->E1,rd); 1152: if (isLI(n->E1)) 1153: { 1154: #if 0 1155: // This doesn't work with C++, because exp2_ptrtocomtype() will 1156: // transfer const to where it doesn't belong. 1157: if (n->Ety & mTYconst) 1158: { 1159: makeLI(n); 1160: } 1161: #endif 1162: #if 0 1163: // This was disabled because it was marking as LI 1164: // the loop dimension for the [i] array if 1165: // a[j][i] was in a loop. This meant the a[j] array bounds 1166: // check for the a[j].length was skipped. 1167: else if (n->Ejty) 1168: { 1169: tmp = vec_calloc(deftop); 1170: filterrdind(tmp,rd,n); // only the RDs pertaining to n 1171: 1172: // if (no RDs within loop) 1173: // then it's loop invariant 1174: 1175: foreach (i,deftop,tmp) // for each RD 1176: if (vec_testbit(defnod[i].DNblock->Bdfoidx,lv)) 1177: goto L10; // found a RD in the loop 1178: 1179: // If gref has occurred, this can still be LI 1180: // if n is an AE that was also an AE at the 1181: // point of gref. 1182: // We can catch a subset of these cases by looking 1183: // at the AEs at the start of the loop. 1184: if (gref) 1185: { int j; 1186: 1187: //printf("\tn is: "); WReqn(n); printf("\n"); 1188: foreach (j,exptop,gin) 1189: { elem *e = expnod[j]; 1190: 1191: //printf("\t\texpnod[%d] = %p\n",j,e); 1192: //printf("\t\tAE is: "); WReqn(e); printf("\n"); 1193: if (el_match2(n,e)) 1194: { 1195: makeLI(n); 1196: //printf("Ind LI: "); WReqn(n); printf("\n"); 1197: break; 1198: } 1199: } 1200: } 1201: else 1202: makeLI(n); 1203: L10: vec_free(tmp); 1204: break; 1205: } 1206: #endif 1207: } 1208: break; 1209: case OPvar: 1210: v = n->EV.sp.Vsym; 1211: #if UNAMBIG 1212: if (v->Sflags & SFLunambig) // must be unambiguous to be LI 1213: #endif 1214: { 1215: tmp = vec_calloc(deftop); 1216: //filterrd(tmp,rd,v); // only the RDs pertaining to v 1217: listrds(rd,n,tmp); // only the RDs pertaining to v 1218: 1219: // if (no RDs within loop) 1220: // then it's loop invariant 1221: 1222: foreach (i,deftop,tmp) // for each RD 1223: if (vec_testbit(defnod[i].DNblock->Bdfoidx,lv)) 1224: goto L1; // found a RD in the loop 1225: makeLI(n); 1226: 1227: L1: vec_free(tmp); 1228: } 1229: break; 1230: case OPstring: 1231: case OPrelconst: 1232: case OPconst: /* constants are always LI */ 1233: case OPhstring: 1234: case OPframeptr: 1235: makeLI(n); 1236: break; 1237: case OPinfo: 1238: markinvar(n->E2,rd); 1239: break; 1240: 1241: case OPstrthis: 1242: case OPmark: 1243: case OPctor: 1244: case OPdtor: 1245: case OPdctor: 1246: case OPddtor: 1247: case OPhalt: 1248: case OPgot: // shouldn't OPgot be makeLI ? 1249: break; 1250: 1251: default: 1252: #ifdef DEBUG 1253: WROP(n->Eoper); 1254: #endif 1255: //printf("n->Eoper = %d, OPconst = %d\n", n->Eoper, OPconst); 1256: assert(0); 1257: } 1258: #ifdef DEBUG 1259: if (debugc && isLI(n)) 1260: { dbg_printf(" LI elem: "); 1261: WReqn(n); 1262: dbg_printf("\n"); 1263: } 1264: #endif 1265: } 1266: 1267: /******************** 1268: * Update rd vector. 1269: * Input: 1270: * n assignment elem or function call elem or OPasm elem 1271: * rd reaching def vector to update 1272: * (clear bits for defs we kill, set bit for n (which is the 1273: * def we are genning)) 1274: * vecdim deftop 1275: */ 1276: 1277: void updaterd(elem *n,vec_t GEN,vec_t KILL) 1278: { unsigned op = n->Eoper; 1279: unsigned i; 1280: unsigned ni; 1281: elem *t; 1282: 1283: assert(OTdef(op)); 1284: assert(GEN); 1285: elem_debug(n); 1286: 1287: // If unambiguous def 1288: if (OTassign(op) && (t = n->E1)->Eoper == OPvar) 1289: { symbol *d = t->EV.sp.Vsym; 1290: targ_size_t toff = t->EV.sp.Voffset; 1291: targ_size_t tsize; 1292: targ_size_t ttop; 1293: 1294: tsize = (op == OPstreq) ? type_size(n->ET) : tysize(t->Ety); 1295: ttop = toff + tsize; 1296: 1297: //printf("updaterd: "); WReqn(n); printf(" toff=%d, tsize=%d\n", toff, tsize); 1298: 1299: ni = (unsigned)-1; 1300: 1301: /* for all unambig defs in defnod[] */ 1302: for (i = 0; i < deftop; i++) 1303: { elem *tn = defnod[i].DNelem; 1304: elem *tn1; 1305: targ_size_t tn1size; 1306: 1307: if (tn == n) 1308: ni = i; 1309: 1310: if (!OTassign(tn->Eoper)) 1311: continue; 1312: 1313: // If def of same variable, kill that def 1314: tn1 = tn->E1; 1315: if (tn1->Eoper != OPvar || d != tn1->EV.sp.Vsym) 1316: continue; 1317: 1318: // If t completely overlaps tn1 1319: tn1size = (tn->Eoper == OPstreq) 1320: ? type_size(tn->ET) : tysize(tn1->Ety); 1321: if (toff <= tn1->EV.sp.Voffset && 1322: tn1->EV.sp.Voffset + tn1size <= ttop) 1323: { 1324: if (KILL) 1325: vec_setbit(i,KILL); 1326: vec_clearbit(i,GEN); 1327: } 1328: } 1329: assert(ni != -1); 1330: } 1331: #if 0 1332: else if (OTassign(op) && t->Eoper != OPvar && t->Ejty) 1333: { 1334: ni = -1; 1335: 1336: // for all unambig defs in defnod[] 1337: for (i = 0; i < deftop; i++) 1338: { elem *tn = defnod[i].DNelem; 1339: elem *tn1; 1340: 1341: if (tn == n) 1342: ni = i; 1343: 1344: if (!OTassign(tn->Eoper)) 1345: continue; 1346: 1347: // If def of same variable, kill that def 1348: tn1 = tn->E1; 1349: if (tn1->Eoper != OPind || t->Ejty != tn1->Ejty) 1350: continue; 1351: 1352: if (KILL) 1353: vec_setbit(i,KILL); 1354: vec_clearbit(i,GEN); 1355: } 1356: assert(ni != -1); 1357: } 1358: #endif 1359: else 1360: { 1361: /* Set bit in GEN for this def */ 1362: for (i = 0; 1; i++) 1363: { assert(i < deftop); // should find n in defnod[] 1364: if (defnod[i].DNelem == n) 1365: { ni = i; 1366: break; 1367: } 1368: } 1369: } 1370: 1371: vec_setbit(ni,GEN); // set bit in GEN for this def 1372: } 1373: 1374: /*************************** 1375: * Mark all elems as not being loop invariant. 1376: */ 1377: 1378: STATIC void unmarkall(elem *e) 1379: { 1380: for (; 1; e = e->E1) 1381: { 1382: assert(e); 1383: e->Nflags &= ~NFLli; /* unmark this elem */ 1384: if (OTunary(e->Eoper)) 1385: continue; 1386: else if (OTbinary(e->Eoper)) 1387: { unmarkall(e->E2); 1388: continue; 1389: } 1390: return; 1391: } 1392: } 1393: 1394: /******************************* 1395: * Take a RD vector and filter out all RDs but 1396: * ones that are defs of symbol s. 1397: * Output: 1398: * f 1399: */ 1400: 1401: #if 0 // replaced by listrds() 1402: void filterrd(vec_t f,vec_t rd,symbol *s) 1403: { 1404: register unsigned i; 1405: register elem *n; 1406: 1407: vec_copy(f,rd); 1408: #if UNAMBIG 1409: foreach (i,deftop,f) /* for each def in f */ 1410: { n = defnod[i].DNelem; /* the definition elem */ 1411: elem_debug(n); 1412: if (n->Eoper == OPasm) // OPasm defs always reach (sigh) 1413: continue; 1414: /* Clear bit if it's not an unambiguous def of si */ 1415: if (OTassign(n->Eoper)) /* if assignment elem */ 1416: { if (!(n->E1->Eoper == OPvar && n->E1->EV.sp.Vsym == s 1417: )) 1418: vec_clearbit(i,f); 1419: } 1420: else /* else ambiguous def */ 1421: vec_clearbit(i,f); // and couldn't def this var 1422: } 1423: #else 1424: assert(0); /* not implemented */ 1425: #endif 1426: } 1427: #endif 1428: 1429: /******************************* 1430: * Take a RD vector and filter out all RDs but 1431: * ones that are possible defs of OPind elem e. 1432: * Output: 1433: * f 1434: */ 1435: 1436: #if 0 1437: 1438: STATIC void filterrdind(vec_t f,vec_t rd,elem *e) 1439: { 1440: unsigned i; 1441: elem *n; 1442: tym_t jty = e->Ejty; 1443: 1444: vec_copy(f,rd); 1445: #if UNAMBIG 1446: foreach (i,deftop,f) // for each def in f 1447: { n = defnod[i].DNelem; // the definition elem 1448: elem_debug(n); 1449: if (n->Eoper == OPasm) // OPasm defs always reach (sigh) 1450: continue; 1451: // Clear bit if it's not an unambiguous def of si 1452: if (OTassign(n->Eoper)) // if assignment elem 1453: { elem *n1 = n->E1; 1454: 1455: if (n1->Eoper == OPind) 1456: { 1457: if (jty && n1->Ejty && jty != n1->Ejty) 1458: vec_clearbit(i,f); 1459: } 1460: else if (n1->Eoper == OPvar) 1461: { 1462: if (jty || n1->EV.sp.Vsym->Sflags & SFLunambig) 1463: vec_clearbit(i,f); 1464: } 1465: } 1466: else if (OTcall(n->Eoper) && el_noreturn(n)) 1467: vec_clearbit(i,f); 1468: } 1469: #else 1470: assert(0); // not implemented 1471: #endif 1472: } 1473: 1474: #endif 1475: 1476: /******************************** 1477: * Return TRUE if there are any refs of v in n before nstop is encountered. 1478: * Input: 1479: * refstop = -1 1480: */ 1481: 1482: static int refstop; /* flag to stop refs() */ 1483: 1484: STATIC bool refs(symbol *v,elem *n,elem *nstop) 1485: { register bool f; 1486: register unsigned op; 1487: 1488: symbol_debug(v); 1489: elem_debug(n); 1490: assert(symbol_isintab(v)); 1491: assert(v->Ssymnum < globsym.top); 1492: assert(n); 1493: 1494: op = n->Eoper; 1495: #if UNAMBIG 1496: if (refstop == 0) 1497: return FALSE; 1498: f = FALSE; 1499: if (OTunary(op)) 1500: f = refs(v,n->E1,nstop); 1501: else if (OTbinary(op)) 1502: { if (ERTOL(n)) /* watch order of evaluation */ 1503: { 1504: /* Note that (OPvar = e) is not a ref of OPvar, whereas */ 1505: /* ((OPbit OPvar) = e) is a ref of OPvar, and (OPvar op= e) is */ 1506: /* a ref of OPvar, etc. */ 1507: f = refs(v,n->E2,nstop); 1508: if (!f) 1509: { if (op == OPeq) 1510: { if (n->E1->Eoper != OPvar) 1511: f = refs(v,n->E1->E1,nstop); 1512: } 1513: else 1514: f = refs(v,n->E1,nstop); 1515: } 1516: } 1517: else 1518: f = refs(v,n->E1,nstop) || refs(v,n->E2,nstop); 1519: } 1520: 1521: if (n == nstop) 1522: refstop = 0; 1523: else if (n->Eoper == OPvar) /* if variable reference */ 1524: return v == n->EV.sp.Vsym; 1525: else if (op == OPasm) /* everything is referenced */ 1526: return TRUE; 1527: return f; 1528: #else 1529: assert(0); 1530: #endif 1531: } 1532: 1533: /************************* 1534: * Move LIs to preheader. 1535: * Conditions to be satisfied for code motion are: 1536: * 1) All exit blocks are dominated (TRUE before this is called). 1537: * -- OR -- 1538: * 2) Variable assigned by a statement is not live on entering 1539: * any successor outside the loop of any exit block of the 1540: * loop. 1541: * 1542: * 3) Cannot move assignment to variable if there are any other 1543: * assignments to that variable within the loop (TRUE or 1544: * assignment would not have been marked LI). 1545: * 4) Cannot move assignments to a variable if there is a use 1546: * of that variable in this loop that is reached by any other 1547: * def of it. 1548: * 5) Cannot move expressions that have side effects. 1549: * 6) Do not move assignments to variables that could be affected 1550: * by ambiguous defs. 1551: * 7) It is not worth it to move expressions of the form: 1552: * (var == const) 1553: * Input: 1554: * n the elem we're considering moving 1555: * b the block this elem is in 1556: * l the loop we're in 1557: * domexit flags 1558: * bit 0: 1 this branch is always executed 1559: * 0 this branch is only sometimes executed 1560: * bit 1: 1 do not move LIs that could throw exceptions 1561: * or cannot be moved past possibly thrown exceptions 1562: * Returns: 1563: * revised domexit 1564: */ 1565: 1566: STATIC void movelis(elem *n,block *b,loop *l,int *pdomexit) 1567: { register unsigned i,j,op; 1568: register vec_t tmp; 1569: register elem *ne,*t,*n2; 1570: register list_t nl; 1571: symbol *v; 1572: tym_t ty; 1573: 1574: Lnextlis: 1575: //if (isLI(n)) { printf("movelis("); WReqn(n); printf(")\n"); } 1576: assert(l && n); 1577: elem_debug(n); 1578: op = n->Eoper; 1579: switch (op) 1580: { 1581: case OPvar: 1582: case OPconst: 1583: case OPrelconst: 1584: goto Lret; 1585: 1586: case OPandand: 1587: case OPoror: 1588: case OPcond: 1589: { int domexit; 1590: 1591: movelis(n->E1,b,l,pdomexit); // always executed 1592: domexit = *pdomexit & ~1; // sometimes executed 1593: movelis(n->E2,b,l,&domexit); 1594: *pdomexit |= domexit & 2; 1595: goto Lret; 1596: } 1597: 1598: case OPeq: 1599: // Do loop invariant assignments 1600: if (isLI(n) && n->E1->Eoper == OPvar) 1601: { v = n->E1->EV.sp.Vsym; // variable index number 1602: 1603: #ifdef UNAMBIG 1604: if (!(v->Sflags & SFLunambig)) goto L3; // case 6 1605: #endif 1606: 1607: // If case 4 is not satisfied, return 1608: 1609: // Function parameters have an implied definition prior to the 1610: // first block of the function. Unfortunately, the rd vector 1611: // does not take this into account. Therefore, we assume the 1612: // worst and reject assignments to function parameters. 1613: if (v->Sclass == SCparameter || v->Sclass == SCregpar 1614: #if TX86 1615: || v->Sclass == SCfastpar 1616: #endif 1617: ) 1618: goto L3; 1619: 1620: if (el_sideeffect(n->E2)) goto L3; // case 5 1621: 1622: // If case 1 or case 2 is not satisfied, return 1623: 1624: if (!(*pdomexit & 1)) // if not case 1 1625: { 1626: foreach (i,dfotop,l->Lexit) // for each exit block 1627: { register list_t bl; 1628: 1629: for (bl = dfo[i]->Bsucc; bl; bl = list_next(bl)) 1630: { block *s; // successor to exit block 1631: 1632: s = list_block(bl); 1633: if (!vec_testbit(s->Bdfoidx,l->Lloop) && 1634: (!symbol_isintab(v) || 1635: vec_testbit(v->Ssymnum,s->Binlv))) // if v is live on exit 1636: goto L3; 1637: } 1638: } 1639: } 1640: 1641: tmp = vec_calloc(deftop); 1642: foreach (i,dfotop,l->Lloop) // for each block in loop 1643: { 1644: if (dfo[i] == b) // except this one 1645: continue; 1646: 1647: //<if there are any RDs of v in Binrd other than n> 1648: // <if there are any refs of v in that block> 1649: // return; 1650: 1651: //filterrd(tmp,dfo[i]->Binrd,v); 1652: listrds(dfo[i]->Binrd,n->E1,tmp); 1653: foreach (j,deftop,tmp) // for each RD of v in Binrd 1654: { if (defnod[j].DNelem == n) 1655: continue; 1656: refstop = -1; 1657: if (dfo[i]->Belem && 1658: refs(v,dfo[i]->Belem,(elem *)NULL)) //if refs of v 1659: { vec_free(tmp); 1660: goto L3; 1661: } 1662: break; 1663: } 1664: } // foreach 1665: 1666: // <if there are any RDs of v in b->Binrd other than n> 1667: // <if there are any references to v before the 1668: // assignment to v> 1669: // <can't move this assignment> 1670: 1671: //filterrd(tmp,b->Binrd,v); 1672: listrds(b->Binrd,n->E1,tmp); 1673: foreach (j,deftop,tmp) // for each RD of v in Binrd 1674: { if (defnod[j].DNelem == n) 1675: continue; 1676: refstop = -1; 1677: if (b->Belem && refs(v,b->Belem,n)) 1678: { vec_free(tmp); 1679: goto L3; // can't move it 1680: } 1681: break; // avoid redundant looping 1682: } 1683: vec_free(tmp); 1684: 1685: // We have an LI assignment, n. 1686: // Check to see if the rvalue is already in the preheader. 1687: for (nl = l->Llis; nl; nl = list_next(nl)) 1688: { 1689: if (el_match(n->E2,list_elem(nl)->E2)) 1690: { 1691: el_free(n->E2); 1692: n->E2 = el_calloc(); 1693: el_copy(n->E2,list_elem(nl)->E1); 1694: cmes("LI assignment rvalue was replaced\n"); 1695: doflow = TRUE; 1696: changes++; 1697: break; 1698: } 1699: } 1700: 1701: // move assignment elem to preheader 1702: cmes("Moved LI assignment "); 1703: #ifdef DEBUG 1704: if (debugc) 1705: { WReqn(n); 1706: dbg_printf(";\n"); 1707: } 1708: #endif 1709: changes++; 1710: doflow = TRUE; // redo flow analysis 1711: ne = el_calloc(); 1712: el_copy(ne,n); // create assignment elem 1713: assert(l->Lpreheader); // make sure there is one 1714: appendelem(ne,&(l->Lpreheader->Belem)); // append ne to preheader 1715: list_prepend(&l->Llis,ne); 1716: 1717: el_copy(n,ne->E1); // replace n with just a reference to v 1718: goto Lret; 1719: } // if 1720: break; 1721: 1722: case OPcall: 1723: case OPucall: 1724: *pdomexit |= 2; 1725: break; 1726: } 1727: 1728: L3: 1729: // Do leaves of non-LI expressions, leaves of = elems that didn't 1730: // meet the invariant assignment removal criteria, and don't do leaves 1731: if (OTleaf(op)) 1732: goto Lret; 1733: if (!isLI(n) || op == OPeq || op == OPcomma || OTrel(op) || op == OPnot || 1734: // These are usually addressing modes, so moving them is a net loss 1735: (I32 && op == OPshl && n->E2->Eoper == OPconst && el_tolong(n->E2) <= 3ull) 1736: ) 1737: { 1738: if (OTassign(op)) 1739: { elem *n1 = n->E1; 1740: elem *n11; 1741: 1742: if (OTbinary(op)) 1743: movelis(n->E2,b,l,pdomexit); 1744: 1745: // Do lvalue only if it is an expression 1746: if (n1->Eoper == OPvar) 1747: goto Lret; 1748: n11 = n1->E1; 1749: if (OTbinary(n1->Eoper)) 1750: { 1751: movelis(n11,b,l,pdomexit); 1752: n = n1->E2; 1753: } 1754: // If *(x + c), just make x the LI, not the (x + c). 1755: // The +c comes free with the addressing mode. 1756: else if (n1->Eoper == OPind && 1757: isLI(n11) && 1758: n11->Eoper == OPadd && 1759: n11->E2->Eoper == OPconst 1760: ) 1761: { 1762: n = n11->E1; 1763: } 1764: else 1765: n = n11; 1766: movelis(n,b,l,pdomexit); 1767: if (b->Btry || !(n1->Eoper == OPvar && symbol_isintab(n1->EV.sp.Vsym))) 1768: { 1769: //printf("assign to global => domexit |= 2\n"); 1770: *pdomexit |= 2; 1771: } 1772: } 1773: else if (OTunary(op)) 1774: { elem *e1 = n->E1; 1775: 1776: // If *(x + c), just make x the LI, not the (x + c). 1777: // The +c comes free with the addressing mode. 1778: if (op == OPind && 1779: isLI(e1) && 1780: e1->Eoper == OPadd && 1781: e1->E2->Eoper == OPconst 1782: ) 1783: { 1784: n = e1->E1; 1785: } 1786: else 1787: n = e1; 1788: } 1789: else if (OTbinary(op)) 1790: { movelis(n->E1,b,l,pdomexit); 1791: n = n->E2; 1792: } 1793: goto Lnextlis; 1794: } 1795: 1796: if (el_sideeffect(n)) 1797: goto Lret; 1798: 1799: #if 0 1800: printf("*pdomexit = %d\n",*pdomexit); 1801: if (*pdomexit & 2) 1802: { 1803: // If any indirections, can't LI it 1804: 1805: // If this operand has already been indirected, we can let 1806: // it pass. 1807: Symbol *s; 1808: 1809: printf("looking at:\n"); 1810: elem_print(n); 1811: s = el_basesym(n->E1); 1812: if (s) 1813: { 1814: for (nl = l->Llis; nl; nl = list_next(nl)) 1815: { elem *el; 1816: tym_t ty2; 1817: 1818: el = list_elem(nl); 1819: el = el->E2; 1820: elem_print(el); 1821: if (el->Eoper == OPind && el_basesym(el->E1) == s) 1822: { 1823: printf(" pass!\n"); 1824: goto Lpass; 1825: } 1826: } 1827: } 1828: printf(" skip!\n"); 1829: goto Lret; 1830: 1831: Lpass: 1832: ; 1833: } 1834: #endif 1835: 1836: // Move the LI expression to the preheader 1837: cmes("Moved LI expression "); 1838: #ifdef DEBUG 1839: if (debugc) 1840: { WReqn(n); 1841: dbg_printf(";\n"); 1842: } 1843: #endif 1844: 1845: // See if it's already been moved 1846: ty = n->Ety; 1847: for (nl = l->Llis; nl; nl = list_next(nl)) 1848: { elem *el; 1849: tym_t ty2; 1850: 1851: el = list_elem(nl); 1852: //printf("existing LI: "); WReqn(el); printf("\n"); 1853: ty2 = el->E2->Ety; 1854: if (tysize(ty) == tysize(ty2)) 1855: { el->E2->Ety = ty; 1856: if (el_match(n,el->E2)) 1857: { 1858: el->E2->Ety = ty2; 1859: if (!OTleaf(n->Eoper)) 1860: { el_free(n->E1); 1861: if (OTbinary(n->Eoper)) 1862: el_free(n->E2); 1863: } 1864: el_copy(n,el->E1); // make copy of temp 1865: n->Ety = ty; 1866: #ifdef DEBUG 1867: if (debugc) 1868: { dbg_printf("Already moved: LI expression replaced with "); 1869: WReqn(n); 1870: dbg_printf("\nPreheader %d expression %p ", 1871: l->Lpreheader->Bdfoidx,l->Lpreheader->Belem); 1872: WReqn(l->Lpreheader->Belem); 1873: dbg_printf("\n"); 1874: } 1875: #endif 1876: changes++; 1877: doflow = TRUE; // redo flow analysis 1878: goto Lret; 1879: } 1880: el->E2->Ety = ty2; 1881: } 1882: } 1883: 1884: if (!(*pdomexit & 1)) // if only sometimes executed 1885: { cmes(" doesn't dominate exit\n"); 1886: goto Lret; // don't move LI 1887: } 1888: 1889: if (tyaggregate(n->Ety)) 1890: goto Lret; 1891: 1892: changes++; 1893: doflow = TRUE; // redo flow analysis 1894: 1895: t = el_alloctmp(n->Ety); /* allocate temporary t */ 1896: #if DEBUG 1897: cmes2("movelis() introduced new variable '%s' of type ",t->EV.sp.Vsym->Sident); 1898: if (debugc) WRTYxx(t->Ety); 1899: cmes("\n"); 1900: #endif 1901: n2 = el_calloc(); 1902: el_copy(n2,n); /* create copy n2 of n */ 1903: ne = el_bin(OPeq,t->Ety,t,n2); /* create elem t=n2 */ 1904: assert(l->Lpreheader); /* make sure there is one */ 1905: appendelem(ne,&(l->Lpreheader->Belem)); /* append ne to preheader */ 1906: #ifdef DEBUG 1907: if (debugc) 1908: { dbg_printf("Preheader %d expression %p\n\t", 1909: l->Lpreheader->Bdfoidx,l->Lpreheader->Belem); 1910: WReqn(l->Lpreheader->Belem); 1911: dbg_printf("\nLI expression replaced with "); WReqn(t); 1912: dbg_printf("\n"); 1913: } 1914: #endif 1915: el_copy(n,t); /* replace this elem with t */ 1916: 1917: // Remember LI expression in elem list 1918: list_prepend(&l->Llis,ne); 1919: 1920: Lret: 1921: ; 1922: } 1923: 1924: /*************************** 1925: * Append elem to existing elem using an OPcomma elem. 1926: * Input: 1927: * n elem to append 1928: * *pn elem to append to 1929: */ 1930: 1931: STATIC void appendelem(register elem *n,elem **pn) 1932: { 1933: assert(n && pn); 1934: if (*pn) /* if this elem exists */ 1935: { while ((*pn)->Eoper == OPcomma) /* while we see OPcomma elems */ 1936: { (*pn)->Ety = n->Ety; 1937: pn = &((*pn)->E2); /* cruise down right side */ 1938: } 1939: *pn = el_bin(OPcomma,n->Ety,*pn,n); 1940: } 1941: else 1942: *pn = n; /* else create a elem */ 1943: } 1944: 1945: /************** LOOP INDUCTION VARIABLES **********************/ 1946: 1947: /*************************** 1948: * Allocate famlist. 1949: */ 1950: 1951: famlist *famlist::freelist = NULL; 1952: 1953: famlist *famlist::mycalloc() 1954: { famlist *fl; 1955: 1956: if (freelist) 1957: { 1958: fl = freelist; 1959: freelist = fl->FLnext; 1960: memset(fl,0,sizeof(famlist)); 1961: } 1962: else 1963: fl = (famlist *) mem_calloc(sizeof(famlist)); 1964: return fl; 1965: } 1966: 1967: /*************************** 1968: * Allocate Iv. 1969: */ 1970: 1971: Iv *Iv::freelist = NULL; 1972: 1973: Iv *Iv::mycalloc() 1974: { Iv *iv; 1975: 1976: if (freelist) 1977: { 1978: iv = freelist; 1979: freelist = iv->IVnext; 1980: memset(iv,0,sizeof(Iv)); 1981: } 1982: else 1983: iv = (Iv *) mem_calloc(sizeof(Iv)); 1984: return iv; 1985: } 1986: 1987: /********************* 1988: * Free iv list. 1989: */ 1990: 1991: STATIC void freeivlist(register Iv *biv) 1992: { register Iv *bivnext; 1993: 1994: #if TX86 1995: while (biv) 1996: { register famlist *fl,*fln; 1997: 1998: for (fl = biv->IVfamily; fl; fl = fln) 1999: { el_free(fl->c1); 2000: el_free(fl->c2); 2001: fln = fl->FLnext; 2002: 2003: fl->FLnext = famlist::freelist; 2004: famlist::freelist = fl; 2005: } 2006: bivnext = biv->IVnext; 2007: 2008: biv->IVnext = Iv::freelist; 2009: Iv::freelist = biv; 2010: 2011: biv = bivnext; 2012: } 2013: #endif 2014: } 2015: 2016: /**************************** 2017: * Create a new famlist entry. 2018: */ 2019: 2020: STATIC famlist * newfamlist(tym_t ty) 2021: { register famlist *fl; 2022: union eve c; 2023: 2024: memset(&c,0,sizeof(c)); 2025: fl = famlist::mycalloc(); 2026: fl->FLty = ty; 2027: switch (tybasic(ty)) 2028: { case TYfloat: 2029: c.Vfloat = 1; 2030: break; 2031: case TYdouble: 2032: case TYdouble_alias: 2033: c.Vdouble = 1; 2034: break; 2035: case TYldouble: 2036: c.Vldouble = 1; 2037: break; 2038: #if _MSDOS || __OS2__ || _WIN32 // if no byte ordering problems 2039: case TYsptr: 2040: case TYcptr: 2041: #if JHANDLE 2042: case TYjhandle: 2043: #endif 2044: case TYnptr: 2045: case TYfptr: 2046: case TYvptr: 2047: /* Convert pointers to integrals to avoid things like */ 2048: /* multiplying pointers */ 2049: ty = TYptrdiff; 2050: /* FALL-THROUGH */ 2051: default: 2052: c.Vlong = 1; 2053: break; 2054: #if TX86 2055: case TYhptr: 2056: ty = TYlong; 2057: c.Vlong = 1; 2058: break; 2059: #endif 2060: #else 2061: case TYbool: 2062: case TYchar: 2063: case TYschar: 2064: case TYuchar: 2065: c.Vchar = 1; 2066: break; 2067: case TYshort: 2068: case TYushort: 2069: case TYchar16: 2070: case TYwchar_t: // BUG: what about 4 byte wchar_t's? 2071: c.Vshort = 1; 2072: break; 2073: #if TX86 2074: case TYsptr: 2075: case TYcptr: 2076: #if JHANDLE 2077: case TYjhandle: 2078: #endif 2079: case TYnptr: 2080: #endif 2081: case TYnullptr: 2082: case TYfptr: 2083: case TYvptr: 2084: ty = TYint; 2085: if (I64) 2086: ty = TYllong; 2087: /* FALL-THROUGH */ 2088: case TYint: 2089: case TYuint: 2090: c.Vint = 1; 2091: break; 2092: #if TX86 2093: case TYhptr: 2094: ty = TYlong; 2095: #endif 2096: case TYlong: 2097: case TYulong: 2098: case TYdchar: 2099: default: 2100: c.Vlong = 1; 2101: break; 2102: #if 0 2103: default: 2104: printf("ty = x%x\n", tybasic(ty)); 2105: assert(0); 2106: #endif 2107: #endif 2108: } 2109: fl->c1 = el_const(ty,&c); /* c1 = 1 */ 2110: c.Vldouble = 0; 2111: if (typtr(ty)) 2112: { 2113: #if TX86 2114: ty = (tybasic(ty) == TYhptr) ? TYlong : TYint; 2115: if (I64) 2116: ty = TYllong; 2117: #else 2118: ty = TYint; 2119: #endif 2120: } 2121: fl->c2 = el_const(ty,&c); /* c2 = 0 */ 2122: return fl; 2123: } 2124: 2125: /*************************** 2126: * Remove induction variables from loop l. 2127: * Loop invariant removal should have been done just previously. 2128: */ 2129: 2130: STATIC void loopiv(register loop *l) 2131: { 2132: cmes2("loopiv(%p)\n",l); 2133: assert(l->Livlist == NULL && l->Lopeqlist == NULL); 2134: elimspec(l); 2135: if (doflow) 2136: { flowrd(); /* compute reaching defs */ 2137: flowlv(); /* compute live variables */ 2138: flowae(); // compute available expressions 2139: doflow = FALSE; 2140: } 2141: findbasivs(l); /* find basic induction variables */ 2142: findopeqs(l); // find op= variables 2143: findivfams(l); /* find IV families */ 2144: elimfrivivs(l); /* eliminate less useful family IVs */ 2145: intronvars(l); /* introduce new variables */ 2146: elimbasivs(l); /* eliminate basic IVs */ 2147: if (!addblk) // adding a block changes the Binlv 2148: elimopeqs(l); // eliminate op= variables 2149: 2150: freeivlist(l->Livlist); // free up IV list 2151: l->Livlist = NULL; 2152: freeivlist(l->Lopeqlist); // free up list 2153: l->Lopeqlist = NULL; 2154: 2155: /* Do copy propagation and dead assignment elimination */ 2156: /* upon return to optfunc() */ 2157: } 2158: 2159: /************************************* 2160: * Find basic IVs of loop l. 2161: * A basic IV x of loop l is a variable x which has 2162: * exactly one assignment within l of the form: 2163: * x += c or x -= c, where c is either a constant 2164: * or a LI. 2165: * Input: 2166: * defnod[] loaded with all the definition elems of the loop 2167: */ 2168: 2169: STATIC void findbasivs(loop *l) 2170: { vec_t poss,notposs; 2171: elem *n; 2172: unsigned i,j; 2173: bool ambdone; 2174: 2175: assert(l); 2176: ambdone = FALSE; 2177: poss = vec_calloc(globsym.top); 2178: notposs = vec_calloc(globsym.top); /* vector of all variables */ 2179: /* (initially all unmarked) */ 2180: 2181: /* for each def in defnod[] that is within loop l */ 2182: 2183: for (i = 0; i < deftop; i++) 2184: { if (!vec_testbit(defnod[i].DNblock->Bdfoidx,l->Lloop)) 2185: continue; /* def is not in the loop */ 2186: 2187: n = defnod[i].DNelem; 2188: elem_debug(n); 2189: if (OTassign(n->Eoper) && n->E1->Eoper == OPvar) 2190: { symbol *s; /* if unambiguous def */ 2191: 2192: s = n->E1->EV.sp.Vsym; 2193: if (symbol_isintab(s)) 2194: { 2195: SYMIDX v; 2196: 2197: v = n->E1->EV.sp.Vsym->Ssymnum; 2198: if ((n->Eoper == OPaddass || n->Eoper == OPminass || 2199: n->Eoper == OPpostinc || n->Eoper == OPpostdec) && 2200: (cnst(n->E2) || /* if x += c or x -= c */ 2201: n->E2->Eoper == OPvar && isLI(n->E2))) 2202: { if (vec_testbit(v,poss)) 2203: /* We've already seen this def elem, */ 2204: /* therefore there is more than one */ 2205: /* def of v within the loop, therefore */ 2206: /* v is not a basic IV. */ 2207: vec_setbit(v,notposs); 2208: else 2209: vec_setbit(v,poss); 2210: } 2211: else /* else mark as not possible */ 2212: vec_setbit(v,notposs); 2213: } 2214: } 2215: else /* else ambiguous def */ 2216: { /* mark any vars that could be affected by */ 2217: /* this def as not possible */ 2218: 2219: if (!ambdone) /* avoid redundant loops */ 2220: { for (j = 0; j < globsym.top; j++)
warning C4018: '<' : signed/unsigned mismatch
2221: { if (!(globsym.tab[j]->Sflags & SFLunambig)) 2222: vec_setbit(j,notposs); 2223: } 2224: ambdone = TRUE; 2225: } 2226: } 2227: } 2228: #if 0 2229: dbg_printf("poss "); vec_println(poss); 2230: dbg_printf("notposs "); vec_println(notposs); 2231: #endif 2232: vec_subass(poss,notposs); /* poss = poss - notposs */ 2233: 2234: /* create list of IVs */ 2235: foreach (i,globsym.top,poss) /* for each basic IV */
warning C4018: '<' : signed/unsigned mismatch
2236: { register Iv *biv; 2237: symbol *s; 2238: 2239: /* Skip if we don't want it to be a basic IV (see funcprev()) */ 2240: s = globsym.tab[i]; 2241: assert(symbol_isintab(s)); 2242: if (s->Sflags & SFLnotbasiciv) 2243: continue; 2244: 2245: // Do not use aggregates as basic IVs. This is because the other loop 2246: // code doesn't check offsets into symbols, (assuming everything 2247: // is at offset 0). We could perhaps amend this by allowing basic IVs 2248: // if the struct consists of only one data member. 2249: if (tyaggregate(s->ty())) 2250: continue; 2251: 2252: biv = Iv::mycalloc(); 2253: biv->IVnext = l->Livlist; 2254: l->Livlist = biv; // link into list of IVs 2255: 2256: biv->IVbasic = s; // symbol of basic IV 2257: 2258: cmes3("Symbol '%s' (%d) is a basic IV, ",s->Sident 2259: ? (char *)s->Sident : "",i); 2260: 2261: /* We have the sym idx of the basic IV. We need to find */ 2262: /* the parent of the increment elem for it. */ 2263: 2264: /* First find the defnod[] */ 2265: for (j = 0; j < deftop; j++) 2266: { /* If defnod is a def of i and it is in the loop */ 2267: if (defnod[j].DNelem->E1 && /* OPasm are def nodes */ 2268: defnod[j].DNelem->E1->EV.sp.Vsym == s && 2269: vec_testbit(defnod[j].DNblock->Bdfoidx,l->Lloop)) 2270: goto L1; 2271: } 2272: assert(0); /* should have found it */ 2273: /* NOTREACHED */ 2274: 2275: L1: biv->IVincr = el_parent(defnod[j].DNelem,&(defnod[j].DNblock->Belem)); 2276: assert(s == (*biv->IVincr)->E1->EV.sp.Vsym); 2277: #ifdef DEBUG 2278: if (debugc) 2279: { dbg_printf("Increment elem is: "); WReqn(*biv->IVincr); dbg_printf("\n"); } 2280: #endif 2281: } 2282: 2283: vec_free(poss); 2284: vec_free(notposs); 2285: } 2286: 2287: /************************************* 2288: * Find op= elems of loop l. 2289: * Analogous to findbasivs(). 2290: * Used to eliminate useless loop code normally found in benchmark programs. 2291: * Input: 2292: * defnod[] loaded with all the definition elems of the loop 2293: */ 2294: 2295: STATIC void findopeqs(loop *l) 2296: { vec_t poss,notposs; 2297: elem *n; 2298: unsigned i,j; 2299: bool ambdone; 2300: 2301: assert(l); 2302: ambdone = FALSE; 2303: poss = vec_calloc(globsym.top); 2304: notposs = vec_calloc(globsym.top); // vector of all variables 2305: // (initially all unmarked) 2306: 2307: // for each def in defnod[] that is within loop l 2308: 2309: for (i = 0; i < deftop; i++) 2310: { if (!vec_testbit(defnod[i].DNblock->Bdfoidx,l->Lloop)) 2311: continue; // def is not in the loop 2312: 2313: n = defnod[i].DNelem; 2314: elem_debug(n); 2315: if (OTopeq(n->Eoper) && n->E1->Eoper == OPvar) 2316: { symbol *s; // if unambiguous def 2317: 2318: s = n->E1->EV.sp.Vsym; 2319: if (symbol_isintab(s)) 2320: { 2321: SYMIDX v; 2322: 2323: v = n->E1->EV.sp.Vsym->Ssymnum; 2324: { if (vec_testbit(v,poss)) 2325: // We've already seen this def elem, 2326: // therefore there is more than one 2327: // def of v within the loop, therefore 2328: // v is not a basic IV. 2329: vec_setbit(v,notposs); 2330: else 2331: vec_setbit(v,poss); 2332: } 2333: } 2334: } 2335: else // else ambiguous def 2336: { // mark any vars that could be affected by 2337: // this def as not possible 2338: 2339: if (!ambdone) // avoid redundant loops 2340: { for (j = 0; j < globsym.top; j++)
warning C4018: '<' : signed/unsigned mismatch
2341: { if (!(globsym.tab[j]->Sflags & SFLunambig)) 2342: vec_setbit(j,notposs); 2343: } 2344: ambdone = TRUE; 2345: } 2346: } 2347: } 2348: 2349: // Don't use symbols already in Livlist 2350: for (Iv *iv = l->Livlist; iv; iv = iv->IVnext) 2351: { symbol *s; 2352: 2353: s = iv->IVbasic; 2354: vec_setbit(s->Ssymnum,notposs); 2355: } 2356: 2357: 2358: #if 0 2359: dbg_printf("poss "); vec_println(poss); 2360: dbg_printf("notposs "); vec_println(notposs); 2361: #endif 2362: vec_subass(poss,notposs); // poss = poss - notposs 2363: 2364: // create list of IVs 2365: foreach (i,globsym.top,poss) // for each opeq IV
warning C4018: '<' : signed/unsigned mismatch
2366: { register Iv *biv; 2367: symbol *s; 2368: 2369: s = globsym.tab[i]; 2370: assert(symbol_isintab(s)); 2371: 2372: // Do not use aggregates as basic IVs. This is because the other loop 2373: // code doesn't check offsets into symbols, (assuming everything 2374: // is at offset 0). We could perhaps amend this by allowing basic IVs 2375: // if the struct consists of only one data member. 2376: if (tyaggregate(s->ty())) 2377: continue; 2378: 2379: biv = Iv::mycalloc(); 2380: biv->IVnext = l->Lopeqlist; 2381: l->Lopeqlist = biv; // link into list of IVs 2382: 2383: biv->IVbasic = s; // symbol of basic IV 2384: 2385: cmes3("Symbol '%s' (%d) is an opeq IV, ",s->Sident 2386: ? (char *)s->Sident : "",i); 2387: 2388: // We have the sym idx of the basic IV. We need to find 2389: // the parent of the increment elem for it. 2390: 2391: // First find the defnod[] 2392: for (j = 0; j < deftop; j++) 2393: { // If defnod is a def of i and it is in the loop 2394: if (defnod[j].DNelem->E1 && // OPasm are def nodes 2395: defnod[j].DNelem->E1->EV.sp.Vsym == s && 2396: vec_testbit(defnod[j].DNblock->Bdfoidx,l->Lloop)) 2397: goto L1; 2398: } 2399: assert(0); // should have found it 2400: // NOTREACHED 2401: 2402: L1: biv->IVincr = el_parent(defnod[j].DNelem,&(defnod[j].DNblock->Belem)); 2403: assert(s == (*biv->IVincr)->E1->EV.sp.Vsym); 2404: #ifdef DEBUG 2405: if (debugc) 2406: { dbg_printf("Opeq elem is: "); WReqn(*biv->IVincr); dbg_printf("\n"); } 2407: #endif 2408: Lcont:
warning C4102: 'Lcont' : unreferenced label
2409: ; 2410: } 2411: 2412: vec_free(poss); 2413: vec_free(notposs); 2414: } 2415: 2416: /***************************** 2417: * Find families for each basic IV. 2418: * An IV family is a list of elems of the form 2419: * c1*X+c2, where X is a basic induction variable. 2420: * Note that we do not do divides, because of roundoff error problems. 2421: */ 2422: 2423: STATIC void findivfams(register loop *l) 2424: { register Iv *biv; 2425: register unsigned i; 2426: register famlist *fl; 2427: 2428: cmes2("findivfams(%p)\n",l); 2429: for (biv = l->Livlist; biv; biv = biv->IVnext) 2430: { foreach (i,dfotop,l->Lloop) /* for each block in loop */ 2431: if (dfo[i]->Belem) 2432: ivfamelems(biv,&(dfo[i]->Belem)); 2433: /* Fold all the constant expressions in c1 and c2. */ 2434: for (fl = biv->IVfamily; fl; fl = fl->FLnext) 2435: { fl->c1 = doptelem(fl->c1,GOALvalue | GOALagain); 2436: fl->c2 = doptelem(fl->c2,GOALvalue | GOALagain); 2437: } 2438: } 2439: } 2440: 2441: /************************* 2442: * Tree walking support routine for findivfams(). 2443: * biv = basic induction variable pointer 2444: * pn pointer to elem 2445: */ 2446: 2447: STATIC void ivfamelems(register Iv *biv,register elem **pn) 2448: { register unsigned op; 2449: register tym_t ty,c2ty; 2450: register famlist *f; 2451: register elem *n,*n1,*n2; 2452: 2453: assert(pn); 2454: n = *pn; 2455: assert(biv && n); 2456: op = n->Eoper; 2457: if (OTunary(op)) 2458: { ivfamelems(biv,&n->E1); 2459: n1 = n->E1; 2460: n2 = NULL; 2461: } 2462: else if (OTbinary(op)) 2463: { ivfamelems(biv,&n->E1); 2464: ivfamelems(biv,&n->E2); /* LTOR or RTOL order is unimportant */ 2465: n1 = n->E1; 2466: n2 = n->E2; 2467: } 2468: else /* else leaf elem */ 2469: return; /* which can't be in the family */ 2470: 2471: if (op == OPmul || op == OPadd || op == OPmin || 2472: op == OPneg || op == OPshl) 2473: { /* Note that we are wimping out and not considering */ 2474: /* LI variables as part of c1 and c2, but only constants. */ 2475: 2476: ty = n->Ety; 2477: 2478: /* Since trees are canonicalized, basic induction variables */ 2479: /* will only appear on the left. */ 2480: 2481: /* Improvement: */ 2482: /* We wish to pick up the cases (biv + li), (biv - li) and */ 2483: /* (li + biv). OPmul and LS with bivs are out, since if we */ 2484: /* try to eliminate the biv, and the loop test is a >, >=, */ 2485: /* <, <=, we have a problem since we don't know if the li */ 2486: /* is negative. (Would have to call swaprel() on it.) */ 2487: 2488: /* If we have (li + var), swap the leaves. */ 2489: if (op == OPadd && isLI(n1) && n1->Eoper == OPvar && n2->Eoper == OPvar) 2490: { n->E1 = n2; 2491: n2 = n->E2 = n1; 2492: n1 = n->E1; 2493: } 2494: 2495: // Get rid of case where we painted a far pointer to a long 2496: if (op == OPadd || op == OPmin) 2497: { int sz; 2498: 2499: sz = tysize(ty); 2500: if (sz == tysize[TYfptr] && !tyfv(ty) && 2501: (sz != tysize(n1->Ety) || sz != tysize(n2->Ety))) 2502: return; 2503: } 2504: 2505: /* Look for function of basic IV (-biv or biv op const) */ 2506: if (n1->Eoper == OPvar && n1->EV.sp.Vsym == biv->IVbasic) 2507: { if (op == OPneg) 2508: { register famlist *fl; 2509: 2510: cmes2("found (-biv), elem %p\n",n); 2511: fl = newfamlist(ty); 2512: fl->FLivty = n1->Ety; 2513: fl->FLpelem = pn; 2514: fl->FLnext = biv->IVfamily; 2515: biv->IVfamily = fl; 2516: fl->c1 = el_una(op,ty,fl->c1); /* c1 = -1 */ 2517: } 2518: else if (n2->Eoper == OPconst || 2519: isLI(n2) && (op == OPadd || op == OPmin)) 2520: { register famlist *fl; 2521: 2522: #ifdef DEBUG 2523: if (debugc) 2524: { dbg_printf("found (biv op const), elem ("); 2525: WReqn(n); 2526: dbg_printf(");\n"); 2527: dbg_printf("Types: n1="); WRTYxx(n1->Ety); 2528: dbg_printf(" ty="); WRTYxx(ty); 2529: dbg_printf(" n2="); WRTYxx(n2->Ety); 2530: dbg_printf("\n"); 2531: } 2532: #endif 2533: fl = newfamlist(ty); 2534: fl->FLivty = n1->Ety; 2535: fl->FLpelem = pn; 2536: fl->FLnext = biv->IVfamily; 2537: biv->IVfamily = fl; 2538: switch (op) 2539: { case OPadd: /* c2 = right */ 2540: c2ty = n2->Ety; 2541: if (typtr(fl->c2->Ety)) 2542: c2ty = fl->c2->Ety; 2543: goto L1; 2544: case OPmin: /* c2 = -right */ 2545: c2ty = fl->c2->Ety; 2546: /* Check for subtracting two pointers */ 2547: if (typtr(c2ty) && typtr(n2->Ety)) 2548: { 2549: #if TX86 2550: if (tybasic(c2ty) == TYhptr) 2551: c2ty = TYlong; 2552: else 2553: #endif 2554: c2ty = I64 ? TYllong : TYint; 2555: } 2556: L1: 2557: fl->c2 = el_bin(op,c2ty,fl->c2,el_copytree(n2)); 2558: break; 2559: case OPmul: /* c1 = right */ 2560: case OPshl: /* c1 = 1 << right */ 2561: fl->c1 = el_bin(op,ty,fl->c1,el_copytree(n2)); 2562: break; 2563: default: 2564: assert(0); 2565: } 2566: } 2567: } 2568: 2569: /* Look for function of existing IV */ 2570: 2571: for (f = biv->IVfamily; f; f = f->FLnext) 2572: { if (*f->FLpelem != n1) /* not it */ 2573: continue; 2574: 2575: /* Look for (f op constant) */ 2576: if (op == OPneg) 2577: { 2578: cmes2("found (-f), elem %p\n",n); 2579: /* c1 = -c1; c2 = -c2; */ 2580: f->c1 = el_una(OPneg,ty,f->c1); 2581: f->c2 = el_una(OPneg,ty,f->c2); 2582: f->FLty = ty; 2583: f->FLpelem = pn; /* replace with new IV */ 2584: 2585: } 2586: else if (n2->Eoper == OPconst || 2587: isLI(n2) && (op == OPadd || op == OPmin)) 2588: { 2589: #ifdef DEBUG 2590: if (debugc) 2591: { dbg_printf("found (f op const), elem ("); 2592: WReqn(n); 2593: assert(*pn == n); 2594: dbg_printf(");\n"); 2595: elem_print(n); 2596: } 2597: #endif 2598: switch (op) 2599: { case OPmul: 2600: case OPshl: 2601: f->c1 = el_bin(op,ty,f->c1,el_copytree(n2)); 2602: break; 2603: case OPadd: 2604: case OPmin: 2605: break; 2606: default: 2607: assert(0); 2608: } 2609: f->c2 = el_bin(op,ty,f->c2,el_copytree(n2)); 2610: f->FLty = ty; 2611: f->FLpelem = pn; /* replace with new IV */ 2612: } /* else if */ 2613: } /* for */ 2614: } /* if */ 2615: } 2616: 2617: /********************************* 2618: * Eliminate frivolous family ivs, that is, 2619: * if we can't eliminate the BIV, then eliminate family ivs that 2620: * differ from it only by a constant. 2621: */ 2622: 2623: STATIC void elimfrivivs(loop *l) 2624: { Iv *biv; 2625: 2626: for (biv = l->Livlist; biv; biv = biv->IVnext) 2627: { int nfams; 2628: famlist *fl; 2629: int nrefs; 2630: 2631: cmes("elimfrivivs()\n"); 2632: /* Compute number of family ivs for biv */ 2633: nfams = 0; 2634: for (fl = biv->IVfamily; fl; fl = fl->FLnext) 2635: nfams++; 2636: cmes2("nfams = %d\n",nfams); 2637: 2638: /* Compute number of references to biv */ 2639: if (onlyref(biv->IVbasic,l,*biv->IVincr,&nrefs)) 2640: nrefs--; 2641: cmes2("nrefs = %d\n",nrefs); 2642: assert(nrefs + 1 >= nfams); 2643: if (nrefs > nfams || // if we won't eliminate the biv 2644: (!I16 && nrefs == nfams))
warning C6239: (<non-zero constant> && <expression>) always evaluates to the result of <expression>. Did you intend to use the bitwise-and operator?
2645: { /* Eliminate any family ivs that only differ by a constant */ 2646: /* from biv */ 2647: for (fl = biv->IVfamily; fl; fl = fl->FLnext) 2648: { elem *ec1 = fl->c1; 2649: targ_llong c; 2650: 2651: if (elemisone(ec1) 2652: #if TX86 2653: || 2654: // Eliminate fl's that can be represented by 2655: // an addressing mode 2656: (!I16 && ec1->Eoper == OPconst && tyintegral(ec1->Ety) &&
warning C6239: (<non-zero constant> && <expression>) always evaluates to the result of <expression>. Did you intend to use the bitwise-and operator?
2657: ((c = el_tolong(ec1)) == 2 || c == 4 || c == 8) 2658: ) 2659: #endif 2660: ) 2661: { fl->FLtemp = FLELIM; 2662: #ifdef DEBUG 2663: if (debugc) 2664: { dbg_printf("Eliminating frivolous IV "); 2665: WReqn(*fl->FLpelem); 2666: dbg_printf("\n"); 2667: } 2668: #endif 2669: } 2670: } 2671: } 2672: } 2673: } 2674: 2675: #if !TX86 2676: // input: IV tree, original IV symbol 2677: // output: offset into original IV symbol. 2678: // 2679: // the purpose is to keep track of offsets into integer after the following rewite: 2680: // 2681: // int to short|char 2682: // | 2683: // | => 2684: // int short | char 2685: // | | 2686: // i i + 2 | 3 2687: // 2688: // i.e. we do not want the 2 | 3 to be lost int the process of creating a TMP IV 2689: // 2690: // 2691: static IVSymFound; 2692: static int FindIVOffset(elem * e, symbol *s) 2693: { 2694: int offset; 2695: assert(e); 2696: assert(s); 2697: 2698: if (EOP(e)) 2699: { 2700: offset = FindIVOffset(e->E1, s); 2701: if (IVSymFound) 2702: return(offset); 2703: 2704: if (EBIN(e)) 2705: { 2706: e = e->E2; 2707: offset = FindIVOffset(e, s); 2708: if (IVSymFound) 2709: return(offset); 2710: } 2711: 2712: assert(0); // symbol should be found somewhere 2713: return(0); 2714: 2715: } 2716: 2717: if (e->Eoper == OPvar && e->EV.sp.Vsym == s) 2718: { 2719: IVSymFound = TRUE; 2720: return (e->Eoffset); 2721: } 2722: 2723: return(0); 2724: } 2725: #endif 2726: 2727: 2728: /****************************** 2729: * Introduce new variables. 2730: */ 2731: 2732: STATIC void intronvars(loop *l) 2733: { 2734: famlist *fl; 2735: Iv *biv; 2736: elem *T, *ne, *t2, *C2, *cmul; 2737: tym_t ty,tyr; 2738: 2739: cmes2("intronvars(%p)\n",l); 2740: for (biv = l->Livlist; biv; biv = biv->IVnext) // for each basic IV 2741: { register elem *bivinc = *biv->IVincr; /* ptr to increment elem */ 2742: 2743: for (fl = biv->IVfamily; fl; fl = fl->FLnext) 2744: { /* for each IV in family of biv */ 2745: if (fl->FLtemp == FLELIM) /* if already eliminated */ 2746: continue; 2747: 2748: /* If induction variable can be written as a simple function */ 2749: /* of a previous induction variable, skip it. */ 2750: if (funcprev(biv,fl)) 2751: continue; 2752: 2753: ty = fl->FLty; 2754: T = el_alloctmp(ty); /* allocate temporary T */ 2755: fl->FLtemp = T->EV.sp.Vsym; 2756: #if DEBUG 2757: cmes2("intronvars() introduced new variable '%s' of type ",T->EV.sp.Vsym->Sident); 2758: if (debugc) WRTYxx(ty); 2759: cmes("\n"); 2760: #endif 2761: 2762: /* append elem T=biv*C1+C2 to preheader */ 2763: /* ne = biv*C1 */ 2764: tyr = fl->FLivty; /* type of biv */ 2765: ne = el_var(biv->IVbasic); 2766: #if !TX86 2767: { 2768: // see comment for FindIVOffset 2769: IVSymFound = FALSE; 2770: ne->Eoffset = FindIVOffset((*fl->FLpelem), biv->IVbasic); 2771: } 2772: #endif 2773: ne->Ety = tyr; 2774: if (!elemisone(fl->c1)) /* don't multiply ptrs by 1 */ 2775: ne = el_bin(OPmul,tyr,ne,el_copytree(fl->c1)); 2776: if (tyfv(tyr) && tysize(ty) == SHORTSIZE) 2777: ne = el_una(OPlngsht,ty,ne); 2778: C2 = el_copytree(fl->c2); 2779: t2 = el_bin(OPadd,ty,ne,C2); /* t2 = ne + C2 */ 2780: ne = el_bin(OPeq,ty,el_copytree(T),t2); 2781: appendelem(ne, &(l->Lpreheader->Belem)); 2782: 2783: /* prefix T+=C1*C to elem biv+=C */ 2784: /* Must prefix in case the value of the expression (biv+=C) */ 2785: /* is used by somebody up the tree. */ 2786: cmul = el_bin(OPmul,fl->c1->Ety,el_copytree(fl->c1), 2787: el_copytree(bivinc->E2)); 2788: t2 = el_bin(bivinc->Eoper,ty,el_copytree(T),cmul); 2789: t2 = doptelem(t2,GOALvalue | GOALagain); 2790: *biv->IVincr = el_bin(OPcomma,bivinc->Ety,t2,bivinc); 2791: biv->IVincr = &((*biv->IVincr)->E2); 2792: #ifdef DEBUG 2793: if (debugc) 2794: { dbg_printf("Replacing elem ("); 2795: WReqn(*fl->FLpelem); 2796: dbg_printf(") with '%s'\n",T->EV.sp.Vsym->Sident); 2797: dbg_printf("The init elem is ("); 2798: WReqn(ne); 2799: dbg_printf(");\nThe increment elem is ("); 2800: WReqn(t2); 2801: dbg_printf(")\n"); 2802: } 2803: #endif 2804: el_free(*fl->FLpelem); 2805: *fl->FLpelem = T; /* replace elem n with ref to T */ 2806: doflow = TRUE; /* redo flow analysis */ 2807: changes++; 2808: } /* for */ 2809: } /* for */ 2810: } 2811: 2812: /******************************* 2813: * Determine if induction variable can be rewritten as a simple 2814: * function of a previously generated temporary. 2815: * This can frequently 2816: * generate less code than that of an all-new temporary (especially 2817: * if it is the same as a previous temporary!). 2818: * Input: 2819: * biv Basic induction variable 2820: * fl Item in biv's family list we are looking at 2821: * Returns: 2822: * FALSE Caller should create a new induction variable. 2823: * TRUE *FLpelem is replaced with function of a previous 2824: * induction variable. FLtemp is set to FLELIM to 2825: * indicate this. 2826: */ 2827: 2828: STATIC bool funcprev(Iv *biv,famlist *fl) 2829: { tym_t tymin; 2830: int sz; 2831: famlist *fls; 2832: elem *e1,*e2,*flse1; 2833: 2834: #ifdef DEBUG 2835: if (debugc) 2836: dbg_printf("funcprev\n"); 2837: #endif 2838: for (fls = biv->IVfamily; fls != fl; fls = fls->FLnext) 2839: { assert(fls); /* fl must be in list */ 2840: if (fls->FLtemp == FLELIM) /* no iv we can use here */ 2841: continue; 2842: 2843: /* The multipliers must match */ 2844: if (!el_match(fls->c1,fl->c1)) 2845: continue; 2846: 2847: /* If the c2's match also, we got it easy */ 2848: if (el_match(fls->c2,fl->c2)) 2849: { 2850: #if TX86 2851: if (tysize(fl->FLty) > tysize(fls->FLtemp->ty())) 2852: continue; /* can't increase size of var */ 2853: #endif 2854: flse1 = el_var(fls->FLtemp); 2855: flse1->Ety = fl->FLty; 2856: goto L2; 2857: } 2858: 2859: /* The difference is only in the addition. Therefore, replace 2860: *fl->FLpelem with: 2861: case 1: (fl->c2 + (fls->FLtemp - fls->c2)) 2862: case 2: (fls->FLtemp + (fl->c2 - fls->c2)) 2863: */ 2864: e1 = fl->c2; 2865: #if TX86 2866: /* Subtracting relocatables usually generates slow code for */ 2867: /* linkers that can't handle arithmetic on relocatables. */ 2868: if (typtr(fls->c2->Ety)) 2869: { if (fls->c2->Eoper == OPrelconst && 2870: !(fl->c2->Eoper == OPrelconst && 2871: fl->c2->EV.sp.Vsym == fls->c2->EV.sp.Vsym) 2872: ) 2873: continue; 2874: } 2875: #endif 2876: flse1 = el_var(fls->FLtemp); 2877: e2 = flse1; /* assume case 1 */ 2878: tymin = e2->Ety; 2879: if (typtr(fls->c2->Ety)) 2880: { if (!typtr(tymin)) 2881: { if (typtr(e1->Ety)) 2882: { e1 = e2; 2883: e2 = fl->c2; /* case 2 */ 2884: } 2885: else /* can't subtract fptr */ 2886: goto L1; 2887: } 2888: #if TX86 2889: if (tybasic(fls->c2->Ety) == TYhptr) 2890: tymin = TYlong; 2891: else 2892: #endif 2893: tymin = I64 ? TYllong : TYint; /* type of (ptr - ptr) */ 2894: } 2895: 2896: #if TX86 2897: /* If e1 and fls->c2 are fptrs, and are not from the same */ 2898: /* segment, we cannot subtract them. */ 2899: if (tyfv(e1->Ety) && tyfv(fls->c2->Ety)) 2900: { if (e1->Eoper != OPrelconst || fls->c2->Eoper != OPrelconst) 2901: goto L1; /* assume expressions have diff segs */ 2902: if (e1->EV.sp.Vsym->Sclass != fls->c2->EV.sp.Vsym->Sclass) 2903: { L1: 2904: el_free(flse1); 2905: continue; 2906: } 2907: } 2908: #else 2909: L1: 2910: el_free(flse1); 2911: continue; 2912: 2913: #endif 2914: /* Some more type checking... */ 2915: sz = tysize(fl->FLty); 2916: if (sz != tysize(e1->Ety) && 2917: sz != tysize(tymin)) 2918: goto L1; 2919: 2920: /* Do some type checking (can't add pointers and get a pointer!) */ 2921: #if TX86 2922: //if (typtr(fl->FLty) && typtr(e1->Ety) && typtr(tymin)) 2923: //goto L1; 2924: #else 2925: assert(!(typtr(fl->FLty) && typtr(e1->Ety) && typtr(tymin))); 2926: #endif 2927: /* Construct (e1 + (e2 - fls->c2)) */ 2928: flse1 = el_bin(OPadd,fl->FLty, 2929: e1, 2930: el_bin(OPmin,tymin, 2931: e2, 2932: el_copytree(fls->c2))); 2933: #if TX86 2934: if (sz < tysize(tymin) && sz == tysize(e1->Ety)) 2935: flse1->E2 = el_una(OPoffset,fl->FLty,flse1->E2); 2936: #endif 2937: 2938: flse1 = doptelem(flse1,GOALvalue | GOALagain); 2939: fl->c2 = NULL; 2940: L2: 2941: #ifdef DEBUG 2942: if (debugc) 2943: { dbg_printf("Replacing "); 2944: WReqn(*fl->FLpelem); 2945: dbg_printf(" with "); 2946: WReqn(flse1); 2947: dbg_printf("\n"); 2948: } 2949: #endif 2950: el_free(*fl->FLpelem); 2951: *fl->FLpelem = flse1; 2952: 2953: /* Fix the iv so when we do loops again, we won't create */ 2954: /* yet another iv, which is just what funcprev() is supposed */ 2955: /* to prevent. */ 2956: fls->FLtemp->Sflags |= SFLnotbasiciv; 2957: 2958: fl->FLtemp = FLELIM; /* mark iv as being gone */ 2959: changes++; 2960: doflow = TRUE; 2961: return TRUE; /* it was replaced */ 2962: } 2963: return FALSE; /* need to create a new variable */ 2964: } 2965: 2966: /*********************** 2967: * Eliminate basic IVs. 2968: */ 2969: 2970: STATIC void elimbasivs(register loop *l) 2971: { famlist *fl; 2972: register Iv *biv; 2973: register unsigned i; 2974: register tym_t ty; 2975: register elem **pref,*fofe,*C2; 2976: symbol *X; 2977: int refcount; 2978: 2979: cmes2("elimbasivs(%p)\n",l); 2980: for (biv = l->Livlist; biv; biv = biv->IVnext) // for each basic IV 2981: { 2982: 2983: /* Can't eliminate this basic IV if we have a goal for the */ 2984: /* increment elem. */ 2985: // Be careful about Nflags being in a union... 2986: if (!((*biv->IVincr)->Nflags & NFLnogoal)) 2987: continue; 2988: 2989: X = biv->IVbasic; 2990: assert(symbol_isintab(X)); 2991: ty = X->ty(); 2992: pref = onlyref(X,l,*biv->IVincr,&refcount); 2993: 2994: /* if only ref of X is of the form (X) or (X relop e) or (e relop X) */ 2995: if (pref != NULL && refcount <= 1) 2996: { elem *ref; 2997: tym_t flty; 2998: 2999: fl = biv->IVfamily; 3000: if (!fl) // if no elems in family of biv 3001: continue; 3002: 3003: ref = *pref; 3004: 3005: /* Replace (X) with (X != 0) */ 3006: if (ref->Eoper == OPvar) 3007: ref = *pref = el_bin(OPne,TYint,ref,el_long(ref->Ety,0L)); 3008: 3009: fl = simfl(fl,ty); /* find simplest elem in family */ 3010: if (!fl) 3011: continue; 3012: 3013: // Don't do the replacement if we would replace a 3014: // signed comparison with an unsigned one 3015: flty = fl->FLty; 3016: if (tyuns(ref->E1->Ety) | tyuns(ref->E2->Ety)) 3017: flty = touns(flty); 3018: 3019: if (ref->Eoper >= OPle && ref->Eoper <= OPge && 3020: #if 1 3021: !(tyuns(ref->E1->Ety) | tyuns(ref->E2->Ety)) && 3022: tyuns(flty)) 3023: #else 3024: (tyuns(ref->E1->Ety) | tyuns(ref->E2->Ety)) != 3025: tyuns(flty)) 3026: #endif 3027: continue; 3028: 3029: /* if we have (e relop X), replace it with (X relop e) */ 3030: if (ref->E2->Eoper == OPvar && ref->E2->EV.sp.Vsym == X) 3031: { register elem *tmp; 3032: 3033: tmp = ref->E2; 3034: ref->E2 = ref->E1; 3035: ref->E1 = tmp; 3036: ref->Eoper = swaprel(ref->Eoper); 3037: } 3038: 3039: // If e*c1+c2 would result in a sign change or an overflow 3040: // then we can't do it 3041: if (fl->c1->Eoper == OPconst) 3042: { 3043: #if LONGLONG 3044: targ_llong c1; 3045: #else 3046: targ_long c1; 3047: #endif 3048: int sz; 3049: 3050: c1 = el_tolong(fl->c1); 3051: sz = tysize(ty); 3052: if (sz == SHORTSIZE && 3053: ((ref->E2->Eoper == OPconst && 3054: c1 * el_tolong(ref->E2) & ~0x7FFFL) || 3055: c1 & ~0x7FFFL) 3056: ) 3057: continue; 3058: 3059: if (sz == LONGSIZE && 3060: ((ref->E2->Eoper == OPconst && 3061: c1 * el_tolong(ref->E2) & ~0x7FFFFFFFL) || 3062: c1 & ~0x7FFFFFFFL) 3063: ) 3064: continue; 3065: #if LONGLONG && __INTSIZE >= 4 3066: if (sz == LLONGSIZE && 3067: ((ref->E2->Eoper == OPconst && 3068: c1 * el_tolong(ref->E2) & ~0x7FFFFFFFFFFFFFFFLL) || 3069: c1 & ~0x7FFFFFFFFFFFFFFFLL) 3070: ) 3071: continue; 3072: #endif 3073: } 3074: 3075: /* If loop started out with a signed conditional that was 3076: * replaced with an unsigned one, don't do it if c2 3077: * is less than 0. 3078: */ 3079: if (ref->Nflags & NFLtouns && fl->c2->Eoper == OPconst) 3080: { 3081: targ_llong c2 = el_tolong(fl->c2); 3082: if (c2 < 0) 3083: continue; 3084: } 3085: 3086: elem *refE2 = el_copytree(ref->E2); 3087: int refEoper = ref->Eoper; 3088: 3089: /* if c1 < 0 and relop is < <= > >= 3090: then adjust relop as if both sides were multiplied 3091: by -1 3092: */ 3093: if (!tyuns(ty) && 3094: (tyintegral(ty) && el_tolong(fl->c1) < 0 || 3095: tyfloating(ty) && el_toldouble(fl->c1) < 0.0)) 3096: refEoper = swaprel(refEoper); 3097: 3098: /* Replace (X relop e) with (X relop (short)e) 3099: if T is 1 word but e is 2 3100: */ 3101: if (tysize(flty) == SHORTSIZE && 3102: tysize(refE2->Ety) == LONGSIZE) 3103: refE2 = el_una(OPlngsht,flty,refE2); 3104: 3105: /* replace e with e*c1 + c2 */ 3106: C2 = el_copytree(fl->c2); 3107: fofe = el_bin(OPadd,flty, 3108: el_bin(OPmul,refE2->Ety, 3109: refE2, 3110: el_copytree(fl->c1)), 3111: C2); 3112: fofe = doptelem(fofe,GOALvalue | GOALagain); // fold any constants 3113: 3114: if (tyuns(flty) && refEoper == OPge && 3115: fofe->Eoper == OPconst && el_allbits(fofe, 0) && 3116: fl->c2->Eoper == OPconst && !el_allbits(fl->c2, 0)) 3117: { 3118: /* Don't do it if replacement will result in 3119: * an unsigned T>=0 which will be an infinite loop. 3120: */ 3121: el_free(fofe); 3122: continue; 3123: } 3124: 3125: cmes2("Eliminating basic IV '%s'\n",X->Sident); 3126: 3127: #ifdef DEBUG 3128: if (debugc) 3129: { dbg_printf("Comparison replaced: "); 3130: WReqn(ref); 3131: dbg_printf(" with "); 3132: } 3133: #endif 3134: 3135: el_free(ref->E2); 3136: ref->E2 = refE2; 3137: ref->Eoper = refEoper; 3138: 3139: elimass(*biv->IVincr); // dump the increment elem 3140: 3141: // replace X with T 3142: #if TX86 3143: assert(ref->E1->EV.sp.Voffset == 0); 3144: // can be set for 68K when cast to smaller size 3145: #endif 3146: ref->E1->EV.sp.Vsym = fl->FLtemp; 3147: ref->E1->Ety = flty; 3148: ref->E2 = fofe; 3149: 3150: /* If sizes of expression worked out wrong... 3151: Which can happen if we have (int)ptr==e 3152: */ 3153: #if TX86 // DJB: Who's correct here? 3154: if (EBIN(fofe)) /* if didn't optimize it away */ 3155: #else 3156: if (EOP(fofe)) /* if didn't optimize it away */ 3157: #endif 3158: { int sz; 3159: tym_t ty,ty1,ty2;
warning C6246: Local declaration of 'ty' hides declaration of the same name in outer scope. For additional information, see previous declaration at line '2974' of 'c:\projects\extern\d\dmd\src\backend\gloop.c': Lines: 2974
3160: 3161: ty = fofe->Ety; 3162: sz = tysize(ty); 3163: ty1 = fofe->E1->Ety; 3164: ty2 = fofe->E2->Ety; 3165: /* Sizes of + expression must all be the same */ 3166: if (sz != tysize(ty1) && 3167: sz != tysize(ty2) 3168: ) 3169: { 3170: if (tyuns(ty)) /* if unsigned comparison */ 3171: ty1 = touns(ty1); /* to unsigned type */ 3172: fofe->Ety = ty1; 3173: ref->E1->Ety = ty1; 3174: } 3175: } 3176: 3177: #if TX86 3178: /* Fix if leaves of compare are TYfptrs and the compare */ 3179: /* operator is < <= > >=. */ 3180: if (ref->Eoper >= OPle && ref->Eoper <= OPge && 3181: tyfv(ref->E1->Ety)) 3182: { assert(tyfv(ref->E2->Ety)); 3183: ref->E1 = el_una(OPoffset,TYuint,ref->E1); 3184: ref->E2 = el_una(OPoffset,TYuint,fofe); 3185: } 3186: #endif 3187: #ifdef DEBUG 3188: if (debugc) 3189: { WReqn(ref); 3190: dbg_printf("\n"); 3191: } 3192: #endif 3193: 3194: changes++; 3195: doflow = TRUE; /* redo flow analysis */ 3196: 3197: /* if X is live on entry to any successor S outside loop */ 3198: /* prepend elem X=(T-c2)/c1 to S.Belem */ 3199: 3200: foreach (i,dfotop,l->Lexit) /* for each exit block */ 3201: { register elem *ne; 3202: register block *b; 3203: register list_t bl; 3204: 3205: for (bl = dfo[i]->Bsucc; bl; bl = list_next(bl)) 3206: { /* for each successor */ 3207: b = list_block(bl); 3208: if (vec_testbit(b->Bdfoidx,l->Lloop)) 3209: continue; /* inside loop */ 3210: if (!vec_testbit(X->Ssymnum,b->Binlv)) 3211: continue; /* not live */ 3212: 3213: C2 = el_copytree(fl->c2); 3214: ne = el_bin(OPmin,ty, 3215: el_var(fl->FLtemp), 3216: C2); 3217: #if TX86 3218: if (tybasic(ne->E1->Ety) == TYfptr && 3219: tybasic(ne->E2->Ety) == TYfptr) 3220: { ne->Ety = I64 ? TYllong : TYint; 3221: if (tylong(ty) && intsize == 2) 3222: ne = el_una(OPshtlng,ty,ne); 3223: } 3224: #endif 3225: 3226: ne = el_bin(OPeq,X->ty(), 3227: el_var(X), 3228: el_bin(OPdiv,ne->Ety, 3229: ne, 3230: el_copytree(fl->c1))); 3231: #ifdef DEBUG 3232: if (debugc) 3233: { dbg_printf("Adding ("); 3234: WReqn(ne); 3235: dbg_printf(") to exit block B%d\n",b->Bdfoidx); 3236: //elem_print(ne); 3237: } 3238: #endif 3239: /* We have to add a new block if there is */ 3240: /* more than one predecessor to b. */ 3241: if (list_next(b->Bpred)) 3242: { block *bn; 3243: register list_t bl2; 3244: 3245: bn = block_calloc(); 3246: bn->Btry = b->Btry; 3247: numblks++; 3248: assert(numblks <= maxblks); 3249: bn->BC = BCgoto; 3250: bn->Bnext = dfo[i]->Bnext; 3251: dfo[i]->Bnext = bn; 3252: list_append(&(bn->Bsucc),b); 3253: list_append(&(bn->Bpred),dfo[i]); 3254: list_ptr(bl) = (void *)bn; 3255: for (bl2 = b->Bpred; bl2; 3256: bl2 = list_next(bl2)) 3257: if (list_block(bl2) == dfo[i]) 3258: { list_ptr(bl2) = (void *)bn; 3259: goto L2; 3260: } 3261: assert(0); 3262: L2: 3263: b = bn; 3264: addblk = TRUE; 3265: } 3266: 3267: if (b->Belem) 3268: b->Belem = 3269: el_bin(OPcomma,b->Belem->Ety, 3270: ne,b->Belem); 3271: else 3272: b->Belem = ne; 3273: changes++; 3274: doflow = TRUE; /* redo flow analysis */ 3275: } /* for each successor */ 3276: } /* foreach exit block */ 3277: if (addblk) 3278: return; 3279: } 3280: else if (refcount == 0) /* if no uses of IV in loop */ 3281: { /* Eliminate the basic IV if it is not live on any successor */ 3282: foreach (i,dfotop,l->Lexit) /* for each exit block */ 3283: { register block *b; 3284: register list_t bl; 3285: 3286: for (bl = dfo[i]->Bsucc; bl; bl = list_next(bl)) 3287: { /* for each successor */ 3288: b = list_block(bl); 3289: if (vec_testbit(b->Bdfoidx,l->Lloop)) 3290: continue; /* inside loop */ 3291: if (vec_testbit(X->Ssymnum,b->Binlv)) 3292: goto L1; /* live */ 3293: } 3294: } 3295: 3296: cmes3("No uses, eliminating basic IV '%s' (%p)\n",(X->Sident) 3297: ? (char *)X->Sident : "",X); 3298: 3299: /* Dump the increment elem */ 3300: /* (Replace it with an OPconst that only serves as a */ 3301: /* placeholder in the tree) */ 3302: *(biv->IVincr) = el_selecte2(*(biv->IVincr)); 3303: 3304: changes++; 3305: doflow = TRUE; /* redo flow analysis */ 3306: L1: ; 3307: } 3308: } /* for */ 3309: } 3310: 3311: 3312: /*********************** 3313: * Eliminate opeq IVs that are not used outside the loop. 3314: */ 3315: 3316: STATIC void elimopeqs(register loop *l) 3317: { 3318: Iv *biv; 3319: unsigned i; 3320: elem **pref; 3321: symbol *X; 3322: int refcount; 3323: 3324: cmes2("elimopeqs(%p)\n",l); 3325: for (biv = l->Lopeqlist; biv; biv = biv->IVnext) // for each opeq IV 3326: { 3327: 3328: // Can't eliminate this basic IV if we have a goal for the 3329: // increment elem. 3330: // Be careful about Nflags being in a union... 3331: if (!((*biv->IVincr)->Nflags & NFLnogoal)) 3332: continue; 3333: 3334: X = biv->IVbasic; 3335: assert(symbol_isintab(X)); 3336: pref = onlyref(X,l,*biv->IVincr,&refcount); 3337: 3338: // if only ref of X is of the form (X) or (X relop e) or (e relop X) 3339: if (pref != NULL && refcount <= 1) 3340: ; 3341: else if (refcount == 0) // if no uses of IV in loop 3342: { // Eliminate the basic IV if it is not live on any successor 3343: foreach (i,dfotop,l->Lexit) // for each exit block 3344: { block *b; 3345: list_t bl; 3346: 3347: for (bl = dfo[i]->Bsucc; bl; bl = list_next(bl)) 3348: { // for each successor 3349: b = list_block(bl); 3350: if (vec_testbit(b->Bdfoidx,l->Lloop)) 3351: continue; // inside loop 3352: if (vec_testbit(X->Ssymnum,b->Binlv)) 3353: goto L1; // live 3354: } 3355: } 3356: 3357: cmes3("No uses, eliminating opeq IV '%s' (%p)\n",(X->Sident) 3358: ? (char *)X->Sident : "",X); 3359: 3360: // Dump the increment elem 3361: // (Replace it with an OPconst that only serves as a 3362: // placeholder in the tree) 3363: *(biv->IVincr) = el_selecte2(*(biv->IVincr)); 3364: 3365: changes++; 3366: doflow = TRUE; // redo flow analysis 3367: L1: ; 3368: } 3369: } 3370: } 3371: 3372: /************************** 3373: * Find simplest elem in family. 3374: * Input: 3375: * tym type of basic IV 3376: * Return NULL if none found. 3377: */ 3378: 3379: STATIC famlist * simfl(famlist *fl,tym_t tym) 3380: { famlist *sofar; 3381: 3382: assert(fl); 3383: sofar = NULL; 3384: for (; fl; fl = fl->FLnext) 3385: { 3386: if (fl->FLtemp == FLELIM) /* no variable, so skip it */ 3387: continue; 3388: /* If size of replacement is less than size of biv, we could */ 3389: /* be in trouble due to loss of precision. */ 3390: if (size(fl->FLtemp->ty()) < size(tym)) 3391: continue; 3392: sofar = flcmp(sofar,fl); /* pick simplest */ 3393: } 3394: return sofar; 3395: } 3396: 3397: /************************** 3398: * Return simpler of two family elems. 3399: * There is room for improvement, namely if 3400: * f1.c1 = 2, f2.c1 = 27 3401: * then pick f1 because it is a shift. 3402: */ 3403: 3404: STATIC famlist * flcmp(famlist *f1,famlist *f2) 3405: { tym_t ty; 3406: union eve *t1,*t2; 3407: 3408: assert(f2); 3409: if (!f1) 3410: goto Lf2; 3411: t1 = &(f1->c1->EV); 3412: t2 = &(f2->c1->EV); 3413: ty = (*f1->FLpelem)->Ety; /* type of elem */ 3414: #if 0 3415: printf("f1: c1 = %d, c2 = %d\n",t1->Vshort,f1->c2->EV.Vshort); 3416: printf("f2: c1 = %d, c2 = %d\n",t2->Vshort,f2->c2->EV.Vshort); 3417: WRTYxx((*f1->FLpelem)->Ety); 3418: WRTYxx((*f2->FLpelem)->Ety); 3419: #endif 3420: /* Wimp out and just pick f1 if the types don't match */ 3421: if (tysize(ty) == tysize((*f2->FLpelem)->Ety)) 3422: { 3423: switch (tybasic(ty)) 3424: { case TYbool: 3425: case TYchar: 3426: case TYschar: 3427: case TYuchar: 3428: if (t2->Vuchar == 1 || 3429: t1->Vuchar != 1 && f2->c2->EV.Vuchar == 0) 3430: goto Lf2; 3431: break; 3432: case TYshort: 3433: case TYushort: 3434: case TYchar16: 3435: case TYwchar_t: // BUG: what about 4 byte wchar_t's? 3436: case_short: 3437: if (t2->Vshort == 1 || 3438: t1->Vshort != 1 && f2->c2->EV.Vshort == 0) 3439: goto Lf2; 3440: break; 3441: 3442: #if TX86 3443: #if JHANDLE 3444: case TYjhandle: 3445: #endif 3446: case TYnullptr: 3447: case TYnptr: // BUG: 64 bit pointers? 3448: case TYsptr: 3449: case TYcptr: 3450: #endif 3451: case TYint: 3452: case TYuint: 3453: if (intsize == SHORTSIZE) 3454: goto case_short; 3455: else 3456: goto case_long; 3457: case TYlong: 3458: case TYulong: 3459: case TYdchar: 3460: case TYfptr: 3461: case TYvptr: 3462: #if TX86 3463: case TYhptr: 3464: #endif 3465: case_long: 3466: if (t2->Vlong == 1 || 3467: t1->Vlong != 1 && f2->c2->EV.Vlong == 0) 3468: goto Lf2; 3469: break; 3470: case TYfloat: 3471: if (t2->Vfloat == 1 || 3472: t1->Vfloat != 1 && f2->c2->EV.Vfloat == 0) 3473: goto Lf2; 3474: break; 3475: case TYdouble: 3476: case TYdouble_alias: 3477: if (t2->Vdouble == 1.0 || 3478: t1->Vdouble != 1.0 && f2->c2->EV.Vdouble == 0) 3479: goto Lf2; 3480: break; 3481: case TYldouble: 3482: if (t2->Vldouble == 1.0 || 3483: t1->Vldouble != 1.0 && f2->c2->EV.Vldouble == 0) 3484: goto Lf2; 3485: break; 3486: case TYllong: 3487: case TYullong: 3488: if (t2->Vllong == 1 || 3489: t1->Vllong != 1 && f2->c2->EV.Vllong == 0) 3490: goto Lf2; 3491: break; 3492: default: 3493: assert(0); 3494: } 3495: } 3496: //printf("picking f1\n"); 3497: return f1; 3498: 3499: Lf2: 3500: //printf("picking f2\n"); 3501: return f2; 3502: } 3503: 3504: /************************************ 3505: * Input: 3506: * x basic IV symbol 3507: * incn increment elem for basic IV X. 3508: * Output: 3509: * *prefcount # of references to X other than the increment elem 3510: * Returns: 3511: * If ref of X in loop l is of the form (X relop e) or (e relop X) 3512: * Return the relop elem 3513: * Else 3514: * Return NULL 3515: */ 3516: 3517: static int count; 3518: static elem **nd,*sincn; 3519: static symbol *X; 3520: 3521: STATIC elem ** onlyref(symbol *x,loop *l,elem *incn,int *prefcount) 3522: { register unsigned i; 3523: 3524: //printf("onlyref('%s')\n", x->Sident); 3525: X = x; /* save some parameter passing */ 3526: assert(symbol_isintab(x)); 3527: sincn = incn; 3528: #ifdef DEBUG 3529: if (!(X->Ssymnum < globsym.top && l && incn)) 3530: dbg_printf("X = %d, globsym.top = %d, l = %p, incn = %p\n",X->Ssymnum,globsym.top,l,incn); 3531: #endif 3532: assert(X->Ssymnum < globsym.top && l && incn); 3533: count = 0; 3534: nd = NULL; 3535: foreach (i,dfotop,l->Lloop) /* for each block in loop */ 3536: { block *b; 3537: 3538: b = dfo[i]; 3539: if (b->Belem) 3540: { 3541: countrefs(&b->Belem,b->BC == BCiftrue); 3542: } 3543: } 3544: #if 0 3545: dbg_printf("count = %d, nd = ("); 3546: if (nd) WReqn(*nd); 3547: dbg_printf(")\n"); 3548: #endif 3549: *prefcount = count; 3550: return nd; 3551: } 3552: 3553: /****************************** 3554: * Count elems of the form (X relop e) or (e relop X). 3555: * Do not count the node if it is the increment node (sincn). 3556: * Input: 3557: * flag: TRUE if block wants to test the elem 3558: */ 3559: 3560: STATIC void countrefs(register elem **pn,bool flag) 3561: { elem *n = *pn; 3562: 3563: assert(n); 3564: if (n == sincn) /* if it is the increment elem */ 3565: { 3566: if (OTbinary(n->Eoper)) 3567: countrefs(&n->E2, FALSE); 3568: return; // don't count lvalue 3569: } 3570: if (OTunary(n->Eoper)) 3571: countrefs(&n->E1,FALSE); 3572: else if (OTbinary(n->Eoper)) 3573: { 3574: if (OTrel(n->Eoper)) 3575: { elem *e1 = n->E1; 3576: 3577: assert(e1->Eoper != OPcomma); 3578: if (e1 == sincn && 3579: (e1->Eoper == OPeq || OTopeq(e1->Eoper))) 3580: goto L1; 3581: 3582: /* Check both subtrees to see if n is the comparison node, 3583: * that is, if X is a leaf of the comparison. 3584: */ 3585: if (e1->Eoper == OPvar && e1->EV.sp.Vsym == X && !countrefs2(n->E2) || 3586: #if TX86 3587: n->E2->Eoper == OPvar && n->E2->EV.sp.Vsym == X && !countrefs2(e1)) 3588: #else 3589: n->E2->Eoper == OPvar && n->E2->EV.sp.Vsym == X) 3590: #endif 3591: nd = pn; /* found the relop node */ 3592: } 3593: L1: 3594: countrefs(&n->E1,FALSE); 3595: countrefs(&n->E2,(flag && n->Eoper == OPcomma)); 3596: } 3597: else if ((n->Eoper == OPvar || n->Eoper == OPrelconst) && n->EV.sp.Vsym == X) 3598: { if (flag) 3599: nd = pn; /* comparing it with 0 */ 3600: count++; /* found another reference */ 3601: } 3602: } 3603: 3604: /******************************* 3605: * Count number of times symbol X appears in elem tree e. 3606: */ 3607: 3608: STATIC int countrefs2(elem *e) 3609: { 3610: elem_debug(e); 3611: while (OTunary(e->Eoper)) 3612: e = e->E1; 3613: if (OTbinary(e->Eoper)) 3614: return countrefs2(e->E1) + countrefs2(e->E2); 3615: return ((e->Eoper == OPvar || e->Eoper == OPrelconst) && 3616: e->EV.sp.Vsym == X); 3617: } 3618: 3619: /**************************** 3620: * Eliminate some special cases. 3621: */ 3622: 3623: STATIC void elimspec(loop *l) 3624: { register unsigned i; 3625: 3626: foreach (i,dfotop,l->Lloop) /* for each block in loop */ 3627: { block *b; 3628: 3629: b = dfo[i]; 3630: if (b->Belem) 3631: elimspecwalk(&b->Belem); 3632: } 3633: } 3634: 3635: /****************************** 3636: */ 3637: 3638: STATIC void elimspecwalk(elem **pn) 3639: { elem *n; 3640: 3641: n = *pn; 3642: assert(n); 3643: if (OTunary(n->Eoper)) 3644: elimspecwalk(&n->E1); 3645: else if (OTbinary(n->Eoper)) 3646: { 3647: elimspecwalk(&n->E1); 3648: elimspecwalk(&n->E2); 3649: if (OTrel(n->Eoper)) 3650: { elem *e1 = n->E1; 3651: 3652: /* Replace ((e1,e2) rel e3) with (e1,(e2 rel e3). 3653: * This will reduce the number of cases for elimbasivs(). 3654: * Don't do equivalent with (e1 rel (e2,e3)) because 3655: * of potential side effects in e1. 3656: */ 3657: if (e1->Eoper == OPcomma) 3658: { elem *e; 3659: 3660: #ifdef DEBUG 3661: if (debugc) 3662: { dbg_printf("3rewriting ("); WReqn(n); dbg_printf(")\n"); } 3663: #endif 3664: e = n->E2; 3665: n->E2 = e1; 3666: n->E1 = n->E2->E1; 3667: n->E2->E1 = n->E2->E2; 3668: n->E2->E2 = e; 3669: n->E2->Eoper = n->Eoper; 3670: n->E2->Ety = n->Ety; 3671: n->Eoper = OPcomma; 3672: 3673: changes++; 3674: doflow = TRUE; 3675: 3676: elimspecwalk(&n->E1); 3677: elimspecwalk(&n->E2); 3678: } 3679: 3680: /* Rewrite ((X op= e2) rel e3) into ((X op= e2),(X rel e3)) 3681: * Rewrite ((X ++ e2) rel e3) into ((X += e2),(X-e2 rel e3)) 3682: * so that the op= will not have a goal, so elimbasivs() 3683: * will work on it. 3684: */ 3685: if ((OTopeq(e1->Eoper) 3686: || OTpost(e1->Eoper) 3687: ) && 3688: !el_sideeffect(e1->E1)) 3689: { elem *e; 3690: int op; 3691: #ifdef DEBUG 3692: if (debugc) 3693: { dbg_printf("4rewriting ("); WReqn(n); dbg_printf(")\n"); } 3694: #endif 3695: e = el_calloc(); 3696: el_copy(e,n); 3697: e->E1 = el_copytree(e1->E1); 3698: e->E1->Ety = n->E1->Ety; 3699: n->E2 = e; 3700: switch (e1->Eoper) 3701: { case OPpostinc: 3702: e1->Eoper = OPaddass; 3703: op = OPmin; 3704: goto L3; 3705: case OPpostdec: 3706: e1->Eoper = OPminass; 3707: op = OPadd; 3708: L3: e->E1 = el_bin(op,e->E1->Ety,e->E1,el_copytree(e1->E2)); 3709: break; 3710: 3711: } 3712: /* increment node is now guaranteed to have no goal */ 3713: e1->Nflags |= NFLnogoal; 3714: n->Eoper = OPcomma; 3715: //changes++; 3716: doflow = TRUE; 3717: 3718: elimspecwalk(&n->E1); 3719: elimspecwalk(&n->E2); 3720: } 3721: } 3722: } 3723: } 3724: 3725: #endif 3726: