


#include "Include.h"


int FindAbsReg(PN,R)  
   int PN,R;

/* converts a relative register number to an absolute one */

/* changed by NM, February 19, 1999 */

{  if (R <= 9) return (R);
   else if (R <= 15) return (R - 10 + 26 + 16*CPU[PN].CWP);
   else if (R <= 25) return (R - 16 + 16 + 16*CPU[PN].CWP);
   else return (R - 26 + 10 + 16*CPU[PN].CWP);
}


unsigned RegValue(PN,R)
   int PN,R;

/* returns the value stored in (relative) register number R of the current
   window */

{  int AR;

   AR = FindAbsReg(PN,R);
   return (CPU[PN].Reg[AR]);
}


float FloatRegValue(PN,R)
   int PN,R;

/* returns the value stored in (relative) register number R of the current
   window, in float form */

{  int AR;

   AR = FindAbsReg(PN,R);
   return (FloatValue(CPU[PN].Reg[AR]));
}


unsigned NamedGet(Symbol,Index)
   char Symbol[];  unsigned Index;

/* returns the value Index words past Symbol in Mem, where Symbol
   is one of the named data items in the .mas file */

{  unsigned Offset;

   Offset = SymLookup(Symbol,NDataSyms);
   return(Mem[Offset+Index]);
}


NamedPut(Val,Symbol,Index)
   unsigned Val; char Symbol[];  unsigned Index;

/* puts Val at Index words past Symbol in Mem, where Symbol
   is one of the named data items in the .mas file */

{  unsigned Offset;

   Offset = SymLookup(Symbol,NDataSyms);
   Mem[Offset+Index] = Val;
}


float NamedFloatGet(Symbol,Index)
   char Symbol[];  unsigned Index;

/* returns the float value Index words past Symbol in Mem, where Symbol
   is one of the named data items in the .mas file */

{  unsigned Offset;

   Offset = SymLookup(Symbol,NDataSyms);
   return(FloatValue(Mem[Offset+Index]));
}


NamedFloatPut(Val,Symbol,Index)
   double Val; char Symbol[];  unsigned Index;

/* puts float Val at Index words past Symbol in Mem, where Symbol
   is one of the named data items in the .mas file */

{  unsigned Offset;

   Offset = SymLookup(Symbol,NDataSyms);
   Mem[Offset+Index] = Value(Val);
}


SetNZ(PN,Tmp)
   int PN,Tmp;

{  if (Tmp < 0) CPU[PN].CondCodes[1] = 1;
   else CPU[PN].CondCodes[1] = 0;
   if (!Tmp) CPU[PN].CondCodes[0] = 1;
   else CPU[PN].CondCodes[0] = 0;
}


Add(PN)
   int PN;

{  int Tmp;

   Tmp = CPU[PN].Reg[CPU[PN].AbsRS1] + CPU[PN].Op2;  
   SetNZ(PN,Tmp);
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Tmp; 
}


Sub(PN)
   int PN;

{  int Tmp;  

   Tmp = CPU[PN].Reg[CPU[PN].AbsRS1] - CPU[PN].Op2;  
   SetNZ(PN,Tmp);
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Tmp; 
}


Mul(PN)
   int PN;

{  int Tmp;

   Tmp = CPU[PN].Reg[CPU[PN].AbsRS1] * CPU[PN].Op2;  
   SetNZ(PN,Tmp);
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Tmp; 
}


Div(PN)
   int PN;

{  int Tmp;  

   Tmp = CPU[PN].Reg[CPU[PN].AbsRS1] / CPU[PN].Op2;  
   SetNZ(PN,Tmp);
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Tmp; 
}

Mod(PN)
   int PN;

{  int Tmp;  

   Tmp = CPU[PN].Reg[CPU[PN].AbsRS1] % CPU[PN].Op2;
   SetNZ(PN,Tmp);
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Tmp; 
}

Bcomp(PN)
   int PN;

{  int Tmp;  

   Tmp = ~(CPU[PN].Reg[CPU[PN].AbsRS1]);  
   SetNZ(PN,Tmp);
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Tmp; 
}

FAdd(PN)
   int PN;

{  float Tmp;

   Tmp = FloatValue(CPU[PN].Reg[CPU[PN].AbsRS1]) + FloatValue(CPU[PN].Op2);  
   SetNZ(PN,Tmp);
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Value(Tmp); 
}


FSub(PN)
   int PN;

{  float Tmp;  

   Tmp = FloatValue(CPU[PN].Reg[CPU[PN].AbsRS1]) - FloatValue(CPU[PN].Op2);  
   SetNZ(PN,Tmp);
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Value(Tmp); 
}


FMul(PN)
   int PN;

{  float Tmp;  

   Tmp = FloatValue(CPU[PN].Reg[CPU[PN].AbsRS1]) * FloatValue(CPU[PN].Op2);  
   SetNZ(PN,Tmp);
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Value(Tmp); 
}


FDiv(PN)
   int PN;

{  float Tmp;  

   Tmp = FloatValue(CPU[PN].Reg[CPU[PN].AbsRS1]) / FloatValue(CPU[PN].Op2);  
   SetNZ(PN,Tmp);
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Value(Tmp); 
}

				/* my new commands */

				/* this determines if the condition codes 
				   will be set on logical R-to-R operations */
#define LogicalCC 0

Or(PN)
   int PN;

{  int Tmp;

   Tmp = (CPU[PN].Reg[CPU[PN].AbsRS1] | CPU[PN].Op2);
#if LogicalCC
   SetNZ(PN,Tmp);
#endif
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Tmp; 
}

Xor(PN)
   int PN;

{  int Tmp;

   Tmp = (CPU[PN].Reg[CPU[PN].AbsRS1] ^ CPU[PN].Op2);  
#if LogicalCC
   SetNZ(PN,Tmp);
#endif
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Tmp; 
}

Orn(PN)
   int PN;

{  int Tmp;

   Tmp = ~(CPU[PN].Reg[CPU[PN].AbsRS1] | CPU[PN].Op2);  
#if LogicalCC
   SetNZ(PN,Tmp);
#endif
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Tmp; 
}

Xorn(PN)
   int PN;

{  int Tmp;

   Tmp = ~(CPU[PN].Reg[CPU[PN].AbsRS1] ^ CPU[PN].Op2);  
#if LogicalCC
   SetNZ(PN,Tmp);
#endif
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Tmp; 
}

And(PN)
   int PN;

{  int Tmp;

   Tmp = (CPU[PN].Reg[CPU[PN].AbsRS1] & CPU[PN].Op2);  
#if LogicalCC
   SetNZ(PN,Tmp);
#endif
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Tmp; 
}

Andn(PN)
   int PN;

{  int Tmp;

   Tmp = ~(CPU[PN].Reg[CPU[PN].AbsRS1] & CPU[PN].Op2);  
#if LogicalCC
   SetNZ(PN,Tmp);
#endif
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Tmp; 
}

Sll(PN)
   int PN;

{  int Tmp;

   Tmp = (CPU[PN].Reg[CPU[PN].AbsRS1] << CPU[PN].Op2);  
#if LogicalCC
   SetNZ(PN,Tmp);
#endif
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Tmp; 
}

Srl(PN)
   int PN;

{  int Tmp;
				/* using the unsigned for portability
				   see Darnell & Margolis (p. 143-5) .kdr*/
   Tmp = ((unsigned) CPU[PN].Reg[CPU[PN].AbsRS1]) >> CPU[PN].Op2;  
#if LogicalCC
   SetNZ(PN,Tmp);
#endif
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Tmp; 
}

Sra(PN)
   int PN;

{  int Tmp, src, shift_factor;
   unsigned msb, sign_ext = 0xffffffff;

   src = CPU[PN].Reg[CPU[PN].AbsRS1];	/* for readability .kdr*/
   shift_factor = CPU[PN].Op2;	        /* for readability .kdr*/
   msb = (src & 0x7fffffff) >> 31;	/* get value of most sig. bit .kdr*/
					/* (unsigned) ensures that initial
					   shift is logical, !(arith) .kdr*/
   Tmp = (unsigned) src >> shift_factor; 
					/* perform sign extension if 
					   necessary  .kdr*/
   if (msb) Tmp |= (sign_ext << shift_factor);
#if LogicalCC
   SetNZ(PN,Tmp);
#endif
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Tmp; 
}
				/* end of my new functions */


Ld(PN)  /* initiate load; later, LdDone() will be called later */
   int PN;
  
{  CPU[PN].EffAddrs = CPU[PN].Reg[CPU[PN].AbsRS1] + ISPtr->Base;
   CPU[PN].MemAccPending = 1;
   CPU[PN].MemAccType = READ;
   if (InterconType == XBAR || InterconType == OMEGA || InterconType == USER)
      SetUpNewMemRequest(PN);
}


LdDone(PN)
   int PN;

{  unsigned Tmp; 

   Tmp = CPU[PN].MemValue;
   SetNZ(PN,Tmp);
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Tmp; 
   CPU[PN].MemAccPending = 0;
}


St(PN)  /* initiate store; StDone() will be called later */
   int PN;

{  CPU[PN].EffAddrs = CPU[PN].Reg[CPU[PN].AbsRD] + ISPtr->Base;
   CPU[PN].MemAccPending = 1;
   CPU[PN].MemAccType = WRITE;
   CPU[PN].MemValue = CPU[PN].Reg[CPU[PN].AbsRS1];
   if (InterconType == XBAR || InterconType == OMEGA || InterconType == USER)
      SetUpNewMemRequest(PN);
}


StDone(PN)
   int PN;

{  CPU[PN].MemAccPending = 0;  }


AInc(PN)  /* initiate ainc; AIncDone() will be called later */
   int PN;

{  CPU[PN].EffAddrs = CPU[PN].Reg[CPU[PN].AbsRS1] + ISPtr->Base;
   CPU[PN].MemAccPending = 1;
   CPU[PN].MemAccType = AINC;
   if (InterconType == XBAR || InterconType == OMEGA || InterconType == USER)
      SetUpNewMemRequest(PN);
}


AIncDone(PN)
   int PN;

{  unsigned Tmp; 

   Tmp = CPU[PN].MemValue;
   SetNZ(PN,Tmp);
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Tmp; 
   CPU[PN].MemAccPending = 0;
}


Tas(PN)  /* initiate Tas; TasDone() will be called later */
   int PN;

{  CPU[PN].EffAddrs = CPU[PN].Reg[CPU[PN].AbsRS1] + ISPtr->Base;
   CPU[PN].MemAccPending = 1;
   CPU[PN].MemAccType = TAS;
   if (InterconType == XBAR || InterconType == OMEGA || InterconType == USER)
      SetUpNewMemRequest(PN);
}


TasDone(PN)
   int PN;

{  int Tmp;

   Tmp = CPU[PN].MemValue;
   SetNZ(PN,Tmp);
   if (CPU[PN].AbsRD) CPU[PN].Reg[CPU[PN].AbsRD] = Tmp; 
   CPU[PN].MemAccPending = 0;
}


Jmp(PN)
   int PN;

{  int CondSatis;  

   switch (ISPtr->Condition)  {
      case LT: CondSatis = (CPU[PN].CondCodes[1]); break;
      case LE: CondSatis = (CPU[PN].CondCodes[0] ||
                            CPU[PN].CondCodes[1]); break;
      case EQ: CondSatis = (CPU[PN].CondCodes[0]); break;
      case GE: CondSatis = (!CPU[PN].CondCodes[1]); break;
      case GT: CondSatis = (!CPU[PN].CondCodes[0] &&
                !CPU[PN].CondCodes[1]); break;
      case NE: CondSatis = (!CPU[PN].CondCodes[0]); break;
      case NONE: CondSatis = 1; break;
   }  
   if (CondSatis) CPU[PN].PC = ISPtr->JumpTarget;  
}


Call(PN)
   int PN;

{  if (!CPU[PN].AbsRS1)  {
      printf("absolute register 0 used for saving return address\n");
      printf("processor number %d, PC value %d\n",PN,CPU[PN].PC);
      exit();
   }
   if (CPU[PN].CWP == NWindows-1)  {
      printf("register window allocation exceeded, at PC %x\n",
         CPU[PN].PC);
      exit();
   }
   CPU[PN].Reg[CPU[PN].AbsRS1] = CPU[PN].PC;
   CPU[PN].CWP++;
   CPU[PN].PC = ISPtr->JumpTarget;  
}

/* the "save" instruction is primarily used for frame-pointer
   management; for a given window, r30 is the frame pointer, and
   we must set up the frame pointer for a function called from
   this window, if any, so we need to subtract the frame space 
   used by this window from r30 and put the result in r14, since
   r14 will be the "r30" for the child window

   if that space is, say, 100, then we could simply do

      add r30,-100,r14

   but for the sake of clarity, we use the "save" instruction,

      save r14,-100,r14

   which has the interpretation "take the value of r14 in our parent
   window, and put it minus 100 in r14 of our window" */

Save(PN)
   int PN;

{
   if (CPU[PN].AbsRD)  /* changed by NM, February 19, 1999 */
     CPU[PN].Reg[CPU[PN].AbsRD] = CPU[PN].Reg[CPU[PN].AbsRS1] + CPU[PN].Op2;  
}   

Ret(PN)
   int PN;
 
{  CPU[PN].PC = CPU[PN].Reg[CPU[PN].AbsRS1]; 
   if (CPU[PN].CWP == 0)  {
      printf("register window underflow, at PC %x\n",
         CPU[PN].PC);
      exit();
   }
   CPU[PN].CWP--;
}


CPUNum(PN)
   int PN;

{  CPU[PN].Reg[CPU[PN].AbsRD] = PN;  }


SystSize(PN)
   int PN;

{  CPU[PN].Reg[CPU[PN].AbsRD] = NCPUs;  }


Halt(PN)
   int PN;

{  CPU[PN].Halted = 1;
   CPU[PN].PC--;
   if (++NHalted == NCPUs) printf("all CPUs halted\n");
}


Execute(PN)
   int PN;

{  int IG;  char Mnem[32];

   ISPtr = &Instr[CPU[PN].OldPC];  
   IG = ISPtr->InstrGrp;
   strcpy(Mnem,ISPtr->Mnemonic);
				/* compute the absolute register values: kdr */
				/* the strcmp for "jmp" returns true if it is
				   not a jump: kdr */

				/* handle my cases first */
   if (IG == CONST_TO_REG) {
      CPU[PN].Op2 = ISPtr->Const;
      CPU[PN].AbsRD = FindAbsReg(PN,ISPtr->RD);
   }
   else {
      if (IG == REG_TO_REG || 
	  IG == LOAD_STORE ||
	  (IG == CONTROL_XFER && strcmp(Mnem,"jmp")))
	CPU[PN].AbsRS1 = FindAbsReg(PN,ISPtr->RS1);
      if (IG == REG_TO_REG)  {
	 if (ISPtr->SecondOpType == OP2_IS_REG)  {
	    CPU[PN].AbsRS2 = FindAbsReg(PN,ISPtr->RS2);
	    CPU[PN].Op2 = CPU[PN].Reg[CPU[PN].AbsRS2];
	 }
	 else CPU[PN].Op2 = ISPtr->Const;
      }
      if (IG != CONTROL_XFER && 
	  IG != ZERO_OPERAND && 
	  IG != ONE_CONST &&
	  strcmp(Mnem,"userhook"))
	CPU[PN].AbsRD = FindAbsReg(PN,ISPtr->RD);
   }
				/* need to adjust save and restore abs 
				   register identifiers so that they use
				   the register in the previous window .kdr*/
   if (!strcmp(Mnem,"save"))  {
      CPU[PN].AbsRS1 -= 16;
      if (ISPtr->SecondOpType == OP2_IS_REG)  {
	 CPU[PN].AbsRS2 -= 16;
	 CPU[PN].Op2 = CPU[PN].Reg[CPU[PN].AbsRS2];
      }
      else CPU[PN].Op2 = ISPtr->Const;
      Save(PN);
      return;
   }

   if (!strcmp(Mnem,"add"))  {Add(PN); return;}
   if (!strcmp(Mnem,"sub"))  {Sub(PN); return;}
   if (!strcmp(Mnem,"mul"))  {Mul(PN); return;}
   if (!strcmp(Mnem,"div"))  {Div(PN); return;}
   if (!strcmp(Mnem,"mod"))  {Mod(PN); return;}
   if (!strcmp(Mnem,"bcomp"))  {Bcomp(PN); return;}
   if (!strcmp(Mnem,"fadd"))  {FAdd(PN); return;}
   if (!strcmp(Mnem,"fsub"))  {FSub(PN); return;}
   if (!strcmp(Mnem,"fmul"))  {FMul(PN); return;}
   if (!strcmp(Mnem,"fdiv"))  {FDiv(PN); return;}

   if (!strcmp(Mnem,"sll"))  {Sll(PN); return;}
   if (!strcmp(Mnem,"srl"))  {Srl(PN); return;}
   if (!strcmp(Mnem,"sra"))  {Sra(PN); return;}
   if (!strcmp(Mnem,"or"))  {Or(PN); return;}
   if (!strcmp(Mnem,"xor"))  {Xor(PN); return;}
   if (!strcmp(Mnem,"orn"))  {Orn(PN); return;}
   if (!strcmp(Mnem,"xorn"))  {Xorn(PN); return;}
   if (!strcmp(Mnem,"and"))  {And(PN); return;}
   if (!strcmp(Mnem,"andn"))  {Andn(PN); return;}

   if (!strcmp(Mnem,"jmp"))  {Jmp(PN); return;}
   if (!strcmp(Mnem,"call"))  {Call(PN); return;}
   if (!strcmp(Mnem,"ret"))  {Ret(PN); return;}
   if (!strcmp(Mnem,"ld"))  {Ld(PN); return;}
   if (!strcmp(Mnem,"st"))  {St(PN); return;}
   if (!strcmp(Mnem,"ainc"))  {AInc(PN); return;}
   if (!strcmp(Mnem,"tas"))  {Tas(PN); return;}
   if (!strcmp(Mnem,"cpunum"))  {CPUNum(PN); return;}
   if (!strcmp(Mnem,"systsize"))  {SystSize(PN); return;}
   if (!strcmp(Mnem,"userhook"))  
      {UserHook(PN,ISPtr->Const); return;}
   if (!strcmp(Mnem,"nop"))  {return;}
   if (!strcmp(Mnem,"halt"))  {Halt(PN); return;}
   printf("illegal opcode in processor %d, PC %x:  %s\n",
      PN,CPU[PN].PC,Mnem); 
   printf("previous PC was %x\n",CPU[PN].OldPC);
   printf("likely cause:  register numbers in call and ret "); 
   printf("instructions don't correspond to each other \n");
   exit();
}


DoFullMemAction(PNum)
   int PNum;

/* does the memory access, and notifies the CPU */

{  unsigned Tmp;

   switch (CPU[PNum].MemAccType)  {
      case READ: CPU[PNum].MemValue = Mem[CPU[PNum].EffAddrs];
                 LdDone(PNum);
                 break;
      case WRITE: Mem[CPU[PNum].EffAddrs] = CPU[PNum].MemValue;
                  StDone(PNum);
	          break;
      case AINC: Tmp = Mem[CPU[PNum].EffAddrs];
                 CPU[PNum].MemValue = Tmp++;
                 Mem[CPU[PNum].EffAddrs] = Tmp;
                 AIncDone(PNum);
                 break;
      case TAS: Tmp = Mem[CPU[PNum].EffAddrs];
                if (!Tmp) Mem[CPU[PNum].EffAddrs] = 1;
                CPU[PNum].MemValue = Tmp;
                TasDone(PNum);
                break;
      }
}


Pram()

{  int PNum;

   for (PNum = 0; PNum < NCPUs; PNum++)
      if (CPU[PNum].MemAccPending)  DoFullMemAction(PNum);
}


Bus()

{  int I,PNum;

   /* for fairness and to avoid starvation, choose a random CPU
      among those which are ready, if any */
   PNum = rand() % NCPUs;
   for (I = 0; I < NCPUs; I++)  {
      if (CPU[PNum].MemAccPending)
         break;
      PNum++;  if (PNum == NCPUs) PNum = 0;
   }

   /* if such a CPU is found, process that CPU's memory request */
   if (!CPU[PNum].MemAccPending) return;
   DoFullMemAction(PNum);
}


SetUpNewMemRequest(PN)
   int PN;

{  MemAccPtr TmpPtr;

   TmpPtr = (MemAccPtr) malloc(sizeof(struct MemAccStruct));
   if (!TmpPtr)  {
      printf("malloc error in attempt to set up new memory request\n");
      exit();
   }
   TmpPtr->PNum = PN;
   TmpPtr->Next = 0;
   Append(TmpPtr,&NewMemRequestsHead,&NewMemRequestsTail);
}


BackToFree(LPtr)
   MemAccPtr LPtr;

/* calls the free() function on all items in the list pointed to by LPtr */

{  MemAccPtr Tmp;

   while (LPtr)  {
      Tmp = LPtr->Next;
      free(LPtr);
      LPtr = Tmp;
    }
}


Append(APtr,HPtrPtr,TPtrPtr)
   MemAccPtr APtr,*HPtrPtr,*TPtrPtr;

/* appends APtr to the list whose head and tail are *HPtrPtr, *TPtrPtr */

{  /* treat the case in which the list is empty as special */
   if (!*HPtrPtr) *HPtrPtr = *TPtrPtr = APtr;
   else  {
      (*TPtrPtr)->Next = APtr;
      *TPtrPtr = APtr;
   }
}


MemAccPtr DeleteHead(HPtrPtr,TPtrPtr)
   MemAccPtr *HPtrPtr,*TPtrPtr;

/* deletes the head of the list whose head and tail are *HPtrPtr, *TPtrPtr,
   and returns the deleted head */

{  MemAccPtr TmpPtr;

   TmpPtr = *HPtrPtr;
   /* treat the case in which the list has only one element as special */
   if (!(*HPtrPtr)->Next) *HPtrPtr = *TPtrPtr = 0;
   else  *HPtrPtr = (*HPtrPtr)->Next;
   TmpPtr->Next = 0;
   return TmpPtr;
}


DoFirstMemAction(PNum)
   int PNum;

/* does the memory access only */

{  unsigned Tmp;

   switch (CPU[PNum].MemAccType)  {
      case READ: CPU[PNum].MemValue = Mem[CPU[PNum].EffAddrs];
                 break;
      case WRITE: Mem[CPU[PNum].EffAddrs] = CPU[PNum].MemValue;
	          break;
      case AINC: Tmp = Mem[CPU[PNum].EffAddrs];
                 CPU[PNum].MemValue = Tmp++;
                 Mem[CPU[PNum].EffAddrs] = Tmp;
                 break;
      case TAS: Tmp = Mem[CPU[PNum].EffAddrs];
                if (!Tmp) Mem[CPU[PNum].EffAddrs] = 1;
                CPU[PNum].MemValue = Tmp;
                break;
      }
}


DoSecondMemAction(PNum)
   int PNum;

/* does the memory access, and notifies the CPU */

{  switch (CPU[PNum].MemAccType)  {
      case READ: LdDone(PNum);
                 break;
      case WRITE: StDone(PNum);
	          break;
      case AINC: AIncDone(PNum);
                 break;
      case TAS: TasDone(PNum);
                break;
      }
}


Crossbar()

/* nxn crossbar, where n is the number of NCPUs, which is assumed to
   be the same as the number of memory modules; low-order interleaving
   is assumed

   the crossbar is assumed to consist of n rows of n 2x2 switches

   picture: row 0 is at the bottom, with memory module 0 at its far left,
            and n nodes, the jth of which is above CPU j; then above
	    row 0 is row 1, etc.; column 0 is the leftmost one; CPU 0
	    is at the left, above which are n nodes, the ith of which
	    is in row i

   it is assumed that it takes one CPU clock to move from one node to
      a neighboring (upward or leftward) node; it thus takes (i+1)+(j+1)
      cycles for a request from CPU j to get to memory module i, and
      the same time to return; for simplicity, we assume that the time
      spent at the memory module itself is small compared to the total
      network latency */

{  int i, j;  MemAccPtr TmpPtr;


   /* get the new requests which arose in this cycle onto the
      network */
   while (NewMemRequestsHead)  {
      TmpPtr = DeleteHead(&NewMemRequestsHead,&NewMemRequestsTail);
      j = TmpPtr->Dest = CPU[TmpPtr->PNum].EffAddrs % NCPUs;
      if(TmpPtr->Dest == TmpPtr->PNum){
       DoFirstMemAction(TmpPtr->PNum);
       DoSecondMemAction(TmpPtr->PNum);
       free(TmpPtr);
      }
      else Append(TmpPtr,&(PE[j]->ToutHead),&(PE[j]->ToutTail));
   }

   /* process request on network*/

   for(i=NCPUs-1;i>=0;i--){
     if(PE[i]->LoutHead){
       TmpPtr = DeleteHead(&(PE[i]->LoutHead),&(PE[i]->LoutTail));
       DoFirstMemAction(TmpPtr->PNum);
       Append(TmpPtr,&(PE[i]->RoutHead),&(PE[i]->RoutTail));
     }
     if(Routers[i][0]->LoutHead){
       TmpPtr = DeleteHead(&(Routers[i][0]->LoutHead),&(Routers[i][0]->LoutTail));
       Append(TmpPtr,&(PE[i]->LoutHead),&(PE[i]->LoutTail));
     }
     for(j=0;j<NCPUs;j++){
       if((j<NCPUs-1)&&(Routers[i][j+1]->LoutHead)){
         TmpPtr = DeleteHead(&(Routers[i][j+1]->LoutHead),&(Routers[i][j+1]->LoutTail));
         Append(TmpPtr,&(Routers[i][j]->LoutHead),&(Routers[i][j]->LoutTail));
       }
       else if((i>0)&&(Routers[i-1][j]->ToutHead)){
         TmpPtr = DeleteHead(&(Routers[i-1][j]->ToutHead),&(Routers[i-1][j]->ToutTail));
         if((TmpPtr->Dest)==i)Append(TmpPtr,&(Routers[i][j]->LoutHead),&(Routers[i][j]->LoutTail));
         else Append(TmpPtr,&(Routers[i][j]->ToutHead),&(Routers[i][j]->ToutTail));
       }
       else if((i==0)&&(PE[j]->ToutHead)){
         TmpPtr = DeleteHead(&(PE[j]->ToutHead),&(PE[j]->ToutTail));
         if((TmpPtr->Dest)==i)Append(TmpPtr,&(Routers[i][j]->LoutHead),&(Routers[i][j]->LoutTail));
         else Append(TmpPtr,&(Routers[i][j]->ToutHead),&(Routers[i][j]->ToutTail));
       }
     }
   }
   for(j=NCPUs-1;j>=0;j--){
     if(Routers[0][j]->BoutHead){
       TmpPtr = DeleteHead(&(Routers[0][j]->BoutHead),&(Routers[0][j]->BoutTail));
       DoSecondMemAction(TmpPtr->PNum);
       free(TmpPtr);
     }
     for(i=0;i<NCPUs;i++){
       if((i<NCPUs-1)&&(Routers[i+1][j]->BoutHead)){
         TmpPtr = DeleteHead(&(Routers[i+1][j]->BoutHead),&(Routers[i+1][j]->BoutTail));
         Append(TmpPtr,&(Routers[i][j]->BoutHead),&(Routers[i][j]->BoutTail));
       }
       else if((j>0)&&(Routers[i][j-1]->RoutHead)){
         TmpPtr = DeleteHead(&(Routers[i][j-1]->RoutHead),&(Routers[i][j-1]->RoutHead));
         if((TmpPtr->PNum)==j)Append(TmpPtr,&(Routers[i][j]->BoutHead),&(Routers[i][j]->BoutTail));
         else Append(TmpPtr,&(Routers[i][j]->RoutHead),&(Routers[i][j]->RoutTail));
       }
       else if((j==0)&&(PE[i]->RoutHead)){
         TmpPtr = DeleteHead(&(PE[i]->RoutHead),&(PE[i]->RoutTail));
         if((TmpPtr->PNum)==j)Append(TmpPtr,&(Routers[i][j]->BoutHead),&(Routers[i][j]->BoutTail));
         else Append(TmpPtr,&(Routers[i][j]->RoutHead),&(Routers[i][j]->RoutTail));
       }
     }
   }
}

Omega()
{  int i, j, left, top, bottom, right;  MemAccPtr TmpPtr;

   /* get the new requests which arose in this cycle onto the
      network */
   while (NewMemRequestsHead)  {
      TmpPtr = DeleteHead(&NewMemRequestsHead,&NewMemRequestsTail);
      j = TmpPtr->Dest = CPU[TmpPtr->PNum].EffAddrs % NCPUs;
      if(TmpPtr->Dest == TmpPtr->PNum){
       DoFirstMemAction(TmpPtr->PNum);
       DoSecondMemAction(TmpPtr->PNum);
       free(TmpPtr);
      }
      else Append(TmpPtr,&(PE[j]->ToutHead),&(PE[j]->ToutTail));
   }
   /* process request on network*/
   for(i=Stages-1;i>=0;i--){
     for(j=0;j<NodesPerStage;j++){
       if(i==Stages-1){
         if(PE[(j*2)+0]->LoutHead){
           TmpPtr = DeleteHead(&(PE[(j*2)+0]->LoutHead),&(PE[(j*2)+0]->LoutTail));
           DoFirstMemAction(TmpPtr->PNum);
           TmpPtr->Dest=TmpPtr->PNum;
           Append(TmpPtr,&(PE[(j*2)+0]->RoutHead),&(PE[(j*2)+0]->RoutTail));
         }
         if(PE[(j*2)+1]->LoutHead){
           TmpPtr = DeleteHead(&(PE[(j*2)+1]->LoutHead),&(PE[(j*2)+1]->LoutTail));
           DoFirstMemAction(TmpPtr->PNum);
           TmpPtr->Dest=TmpPtr->PNum;
           Append(TmpPtr,&(PE[(j*2)+1]->RoutHead),&(PE[(j*2)+1]->RoutTail));
         }
         if(Routers[i][j]->LoutHead){
           TmpPtr = DeleteHead(&(Routers[i][j]->LoutHead),&(Routers[i][j]->LoutTail));
           Append(TmpPtr,&(PE[(j*2)+0]->LoutHead),&(PE[(j*2)+0]->LoutTail));
         }
         if(Routers[i][j]->ToutHead){
           TmpPtr = DeleteHead(&(Routers[i][j]->ToutHead),&(Routers[i][j]->ToutTail));
           Append(TmpPtr,&(PE[(j*2)+1]->LoutHead),&(PE[(j*2)+1]->LoutTail));
         }
       }
       if(i>0){
         if(j%2){
           bottom=(j-1)%(NodesPerStage/2)+(j>=(NodesPerStage/2));
           right=bottom+(NodesPerStage/2);
           if(NodesPerStage==2){bottom=0;right=1;}
           if(Routers[i-1][bottom]->ToutHead){
             TmpPtr = DeleteHead(&(Routers[i-1][bottom]->ToutHead),&(Routers[i-1][bottom]->ToutTail));
             if((TmpPtr->Dest)<NodesPerStage)Append(TmpPtr,&(Routers[i][j]->LoutHead),&(Routers[i][j]->LoutTail));
             else Append(TmpPtr,&(Routers[i][j]->ToutHead),&(Routers[i][j]->ToutTail));
             TmpPtr->Dest=((TmpPtr->Dest)%NodesPerStage)*2;
           }
           else if(Routers[i-1][right]->ToutHead){
             TmpPtr = DeleteHead(&(Routers[i-1][right]->ToutHead),&(Routers[i-1][right]->ToutHead));
             if((TmpPtr->Dest)<NodesPerStage)Append(TmpPtr,&(Routers[i][j]->LoutHead),&(Routers[i][j]->LoutTail));
             else Append(TmpPtr,&(Routers[i][j]->ToutHead),&(Routers[i][j]->ToutTail));
             TmpPtr->Dest=((TmpPtr->Dest)%NodesPerStage)*2;
           }
         }
         else{
           bottom=j%(NodesPerStage/2)+(j>=(NodesPerStage/2));
           right=bottom+(NodesPerStage/2);
           if(NodesPerStage==2){bottom=0;right=1;}
           if(Routers[i-1][bottom]->LoutHead){
             TmpPtr = DeleteHead(&(Routers[i-1][bottom]->LoutHead),&(Routers[i-1][bottom]->LoutTail));
             if((TmpPtr->Dest)<NodesPerStage)Append(TmpPtr,&(Routers[i][j]->LoutHead),&(Routers[i][j]->LoutTail));
             else Append(TmpPtr,&(Routers[i][j]->ToutHead),&(Routers[i][j]->ToutTail));
             TmpPtr->Dest=((TmpPtr->Dest)%NodesPerStage)*2;
           }
           else if(Routers[i-1][right]->LoutHead){
             TmpPtr = DeleteHead(&(Routers[i-1][right]->LoutHead),&(Routers[i-1][right]->LoutHead));
             if((TmpPtr->Dest)<NodesPerStage)Append(TmpPtr,&(Routers[i][j]->LoutHead),&(Routers[i][j]->LoutTail));
             else Append(TmpPtr,&(Routers[i][j]->ToutHead),&(Routers[i][j]->ToutTail));
             TmpPtr->Dest=((TmpPtr->Dest)%NodesPerStage)*2;
           }
         }
       }
       else if(i==0){
         if(PE[(j*2)+0]->ToutHead){
           TmpPtr = DeleteHead(&(PE[(j*2)+0]->ToutHead),&(PE[(j*2)+0]->ToutTail));
           if((TmpPtr->Dest)<NodesPerStage)Append(TmpPtr,&(Routers[i][j]->LoutHead),&(Routers[i][j]->LoutTail));
           else Append(TmpPtr,&(Routers[i][j]->ToutHead),&(Routers[i][j]->ToutTail));
           TmpPtr->Dest=((TmpPtr->Dest)%NodesPerStage)*2;
         }
         else if(PE[(j*2)+1]->ToutHead){
           TmpPtr = DeleteHead(&(PE[(j*2)+1]->ToutHead),&(PE[(j*2)+1]->ToutTail));
           if((TmpPtr->Dest)<NodesPerStage)Append(TmpPtr,&(Routers[i][j]->LoutHead),&(Routers[i][j]->LoutTail));
           else Append(TmpPtr,&(Routers[i][j]->ToutHead),&(Routers[i][j]->ToutTail));
           TmpPtr->Dest=((TmpPtr->Dest)%NodesPerStage)*2;
         }
       }
     }
   }
   for(i=0;i<Stages;i++){
     for(j=NodesPerStage-1;j>=0;j--){
       if(i==0){
         if(Routers[0][j]->BoutHead){
           TmpPtr = DeleteHead(&(Routers[0][j]->BoutHead),&(Routers[0][j]->BoutTail));
           DoSecondMemAction(TmpPtr->PNum);
           free(TmpPtr);
         }
         if(Routers[0][j]->RoutHead){
           TmpPtr = DeleteHead(&(Routers[0][j]->RoutHead),&(Routers[0][j]->RoutTail));
           DoSecondMemAction(TmpPtr->PNum);
           free(TmpPtr);
         }
       }
       if(i<Stages-1){
         left=(j%(NodesPerStage/2))+((j%2)*(NodesPerStage/2-1));
         top=left+1;
         if(NodesPerStage==2){left=0;top=1;}
         if(j<(NodesPerStage/2)){
           if(Routers[i+1][left]->BoutHead){
             TmpPtr = DeleteHead(&(Routers[i+1][left]->BoutHead),&(Routers[i+1][left]->BoutTail));
             if((TmpPtr->Dest)>=NodesPerStage)Append(TmpPtr,&(Routers[i][j]->RoutHead),&(Routers[i][j]->RoutTail));
             else Append(TmpPtr,&(Routers[i][j]->BoutHead),&(Routers[i][j]->BoutTail));
             TmpPtr->Dest=((TmpPtr->Dest)%NodesPerStage)*2;
           }
           else if(Routers[i+1][top]->BoutHead){
             TmpPtr = DeleteHead(&(Routers[i+1][top]->BoutHead),&(Routers[i+1][top]->BoutTail));
             if((TmpPtr->Dest)>=NodesPerStage)Append(TmpPtr,&(Routers[i][j]->RoutHead),&(Routers[i][j]->RoutTail));
             else Append(TmpPtr,&(Routers[i][j]->BoutHead),&(Routers[i][j]->BoutTail));
             TmpPtr->Dest=((TmpPtr->Dest)%NodesPerStage)*2;
           }
         }
         else{
           if(Routers[i+1][left]->RoutHead){
             TmpPtr = DeleteHead(&(Routers[i+1][left]->RoutHead),&(Routers[i+1][left]->RoutTail));
             if((TmpPtr->Dest)>=NodesPerStage)Append(TmpPtr,&(Routers[i][j]->RoutHead),&(Routers[i][j]->RoutTail));
             else Append(TmpPtr,&(Routers[i][j]->BoutHead),&(Routers[i][j]->BoutTail));
             TmpPtr->Dest=((TmpPtr->Dest)%NodesPerStage)*2;
           }
           else if(Routers[i+1][top]->RoutHead){
             TmpPtr = DeleteHead(&(Routers[i+1][top]->RoutHead),&(Routers[i+1][top]->RoutTail));
             if((TmpPtr->Dest)>=NodesPerStage)Append(TmpPtr,&(Routers[i][j]->RoutHead),&(Routers[i][j]->RoutTail));
             else Append(TmpPtr,&(Routers[i][j]->BoutHead),&(Routers[i][j]->BoutTail));
             TmpPtr->Dest=((TmpPtr->Dest)%NodesPerStage)*2;
           }
         }
       }
       if(i==Stages-1){
         if(PE[j*2]->RoutHead){
           TmpPtr = DeleteHead(&(PE[j*2]->RoutHead), &(PE[j*2]->RoutTail));
           if((TmpPtr->Dest)>=NodesPerStage)Append(TmpPtr,&(Routers[i][j]->RoutHead),&(Routers[i][j]->RoutTail));
           else Append(TmpPtr,&(Routers[i][j]->BoutHead),&(Routers[i][j]->BoutTail));
           TmpPtr->Dest=((TmpPtr->Dest)%NodesPerStage)*2;
         }
         else if(PE[j*2+1]->RoutHead){
           TmpPtr = DeleteHead(&(PE[j*2+1]->RoutHead), &(PE[j*2+1]->RoutTail));
           if((TmpPtr->Dest)>=NodesPerStage)Append(TmpPtr,&(Routers[i][j]->RoutHead),&(Routers[i][j]->RoutTail));
           else Append(TmpPtr,&(Routers[i][j]->BoutHead),&(Routers[i][j]->BoutTail));
           TmpPtr->Dest=((TmpPtr->Dest)%NodesPerStage)*2;
         }
       }
     }
   }

}


DoMemActionU(P)
   int P;

{  unsigned Tmp,R;

   switch (CPU[P].MemAccType)  {
      case READ: CPU[P].MemValue = Mem[CPU[P].EffAddrs];
                 LdDone(P);
                 return;
      case WRITE: if (!NoWritesYet) return;
                  Mem[CPU[P].EffAddrs] = CPU[P].MemValue;
                  StDone(P);
		  NoWritesYet = 0;
	          return;
      case AINC: if (!NoWritesYet) return;
                 Tmp = Mem[CPU[P].EffAddrs];
                 CPU[P].MemValue = Tmp++;
                 Mem[CPU[P].EffAddrs] = Tmp;
                 AIncDone(P);
	         NoWritesYet = 0;
                 return;
      case TAS: Tmp = Mem[CPU[P].EffAddrs];
                /* if the tas is successful, i.e. 0 is found,
		   then a write is needed; if some other CPU
		   has already done a write in this cycle,
		   we will cancel this CPU's tas instruction
		   here */
                if (!Tmp && !NoWritesYet)  {
		   CPU[P].PC--;
		   CPU[P].MemAccPending = 0;
		   return;
		}
		if (!Tmp) Mem[CPU[P].EffAddrs] = 1;
                CPU[P].MemValue = Tmp;
                TasDone(P);
		if (!Tmp) NoWritesYet = 0;
                return;
      }
}


SnoopyUpdate()

/* snoopy update cache-coherency protocol

   for simpler implementation, assume infinite caches

   when a processor does a write, it writes that item to the bus; this
   is the only time a bus transaction occurs */

{  unsigned PNum,I;

   /* all CPUs will be examined; those which are doing read operations
      will be processed, but only one write will be allowed

   /* for fairness and to avoid starvation, start at a random CPU */

   NoWritesYet = 1;
   PNum = rand() % NCPUs;
   for (I = 0; I < NCPUs; I++)  {
      if (CPU[PNum].MemAccPending)
         DoMemActionU(PNum);
      PNum++;  if (PNum == NCPUs) PNum = 0;
   }
}


char State(P,B)
   int P,B;

{  return(*(StatePtr+P*NBlocks+B));  }


WriteState(P,B,C)
   int P,B;  char C;

{  *(StatePtr+P*NBlocks+B) = C;  }


DoMemActionI(P,B)
   int P,B;

/* completes the memory transaction involving block B at processor P */

{  unsigned Tmp,R;


   /* cache miss? */
   if (State(P,B) == 'i' && P != InvalPNum)  {
      if (!NoBusUseFoundYet)  {
 	 CPU[P].PC--;
	 CPU[P].MemAccPending = 0;
         return;
      }
      BlockXferTimeLeft = BlockSize;
      XferBNum = B;
      InvalPNum = P;
      NoBusUseFoundYet = 0;
      return;
   }
   switch (CPU[P].MemAccType)  {
      case READ: CPU[P].MemValue = Mem[CPU[P].EffAddrs];
                 LdDone(P);
		 if (ExclPNum[B] != -1)  {
		    WriteState(ExclPNum[B],B,'s');
		    ExclPNum[B] = -1;
		 }
		 if (State(P,B) != 'e') WriteState(P,B,'s');
                 return;
      case WRITE: if (!NoBusUseFoundYet && P != InvalPNum)  {
		     CPU[P].PC--;
		     CPU[P].MemAccPending = 0;
		     return;
		 }
                 Mem[CPU[P].EffAddrs] = CPU[P].MemValue;
                  StDone(P);
		  for (R = 0; R < NCPUs; R++)
		     if (R != P) WriteState(R,B,'i');
                  WriteState(P,B,'e');
		  ExclPNum[B] = P;
		  NoBusUseFoundYet = 0;
	          return;
      case AINC: if (!NoBusUseFoundYet && P != InvalPNum)  {
		    CPU[P].PC--;
		    CPU[P].MemAccPending = 0;
		    return;
		 }
                 Tmp = Mem[CPU[P].EffAddrs];
                 CPU[P].MemValue = Tmp++;
                 Mem[CPU[P].EffAddrs] = Tmp;
                 AIncDone(P);
		 for (R = 0; R < NCPUs; R++)
		    if (R != P) WriteState(R,B,'i');
                 WriteState(P,B,'e');
		 ExclPNum[B] = P;
	         NoBusUseFoundYet = 0;
                 return;
      case TAS: Tmp = Mem[CPU[P].EffAddrs];
                if (!Tmp && !NoBusUseFoundYet && P != InvalPNum)  {
		   CPU[P].PC--;
		   CPU[P].MemAccPending = 0;
		   return;
		}
                if (!Tmp) Mem[CPU[P].EffAddrs] = 1;
                CPU[P].MemValue = Tmp;
                TasDone(P);
		if (!Tmp)  {
		   for (R = 0; R < NCPUs; R++)
		      if (R != P) WriteState(R,B,'i');
                   WriteState(P,B,'e');
		   ExclPNum[B] = P;
	           NoBusUseFoundYet = 0;
		}
		else  {
		   if (ExclPNum[B] != -1)  {
		      WriteState(ExclPNum[B],B,'s');
		      ExclPNum[B] = -1;
		   }
		   if (State(P,B) != 'e') WriteState(P,B,'s');
		}
                return;
      }
}


SnoopyInvalidate()

/* snoopy invalidate cache-coherency protocol

   for simpler implementation, assume infinite caches (with all cache
   lines starting out as Shared), so that for example, the type of cache
   mapping, e.g. direct or set-associative, is not an issue

   State[P][B] will indicate the state of block B at processor P, one of
   the following:

      Shared ('s')  --  at least one other processor has a valid copy
      Exclusive ('e')  --  this is the only valid copy
      Invalid ('i')  --  this copy is invalid

   say processor P wishes to access block B; then the protocol is

   switch (access type)  {
        case read:
             if (State[P][B] == Invalid)  {
                  if (State[Q][B] == Exclusive at some processor Q)  {
                       update memory from Q;
	               State[Q][B] = Shared
	          }
                  copy B from memory to P;
	          State[P][B] = Shared
	     }
	     break;
        case write:
	     if (State[P][B] != Exclusive)  {
                  if (State[P][B] == Invalid)  {
                       if (State[Q][B] == Exclusive at some processor Q)
                            update memory from Q;
                       copy B from memory to P
	          }
	          State[R][B] = Invalid for all R != P;
		  State[P][B] = Exclusive
	     }
   }
*/

{  static int Startup = 1;  /* indicates first-time invocation of this
                               function, in which case some initializations
			       need to be done */
   unsigned PNum,BNum,I;

   if (Startup)  {
      if (!BlockSize)  {
         printf("block size not stated\n");
         exit();
       }
      NBlocks = MemSize / BlockSize;
      if (MemSize % BlockSize) NBlocks++;
      StatePtr = (char *) malloc(NCPUs*NBlocks);
      /* all cache lines start out as valid */
      MemSet(StatePtr,'s',NCPUs*NBlocks);
      ExclPNum = (unsigned *) malloc(sizeof(int)*NBlocks);
      for (I = 0; I < NBlocks; I++) ExclPNum[I] = -1;
      Startup = 0;
   }

   NoBusUseFoundYet = 1;

   /* block transfer in progress? */
   if (BlockXferTimeLeft > 0)  {
      NoBusUseFoundYet = 0;
      BlockXferTimeLeft--;
      /* block transfer now done? */
      if (!BlockXferTimeLeft)  {
         DoMemActionI(InvalPNum,XferBNum);
         InvalPNum = -1;
      }
   }

   /* all CPUs will be examined; those which do read operations
      from valid blocks will be processed; if a CPU needs the
      bus but it has already been used in this cycle, its
      current instruction will be canceled and restarted in
      the next cycle

      for fairness and to avoid starvation, assing the bus on
      a Round Robin basis */

   if (++StartPNum == NCPUs) StartPNum = 0;
   PNum = StartPNum;
   for (I = 0; I < NCPUs; I++)  {
      if (CPU[PNum].MemAccPending && PNum != InvalPNum)  {
         BNum = CPU[PNum].EffAddrs / BlockSize;
         DoMemActionI(PNum,BNum);
      }
      PNum++;  if (PNum == NCPUs) PNum = 0;
   }
}


StepOneCycle()

{  int PNum;

   for (PNum = 0; PNum < NCPUs; PNum++)  {
      if (!CPU[PNum].MemAccPending && !CPU[PNum].Halted)  {
         CPU[PNum].OldPC = CPU[PNum].PC++;
         Execute(PNum);
      }
   }
   switch (InterconType)  {
      case USER: UserMem(); break;
      case PRAM: Pram(); break;
      case BUS: Bus(); break;
      case XBAR: Crossbar(); break;
      case OMEGA: Omega(); break;
      case SNOOPY_INVALIDATE: SnoopyInvalidate(); break; 
      case SNOOPY_UPDATE: SnoopyUpdate(); break; 
   }
   SimTime++;
}





