
#include "Include.h"
#include <ctype.h>

char Token[MaxNInstrs][5][32];  /* the i-th one is the (at most 5)
                                   tokens for the i-th instruction
                                   line of the .mas file */
				     
int TokenCount[MaxNInstrs];  /* the i-th one is the count of tokens
                                for the i-th instruction line of the
		                .mas file */
			     
struct TableEntry SymTable[MaxSyms];

struct InstrStruct TmpInstr;

char SourceLine[MaxNInstrs][240];  /* the ith one is the ith instruction,
                                     NOT counting data, comments and
				     labels */

int OutFD,  /* file descriptor for the .out file */
    SymFD,  /* file descriptor for the .sym file */
    NInstrs,  /* length of the program, i.e. number of instructions */
    NDataNames,  /* number of items in the .data section */
    NSyms;  /* number of entries in the symbol table */ 

struct InstrStruct TmpIStruct;

char Mnem[32];  /* operation mnemonic */

FILE *MasFilePtr,*LstFilePtr;  /* for .mas file */

extern int CurrLineNum;


/* does the first pass at the source file, separating out data, comments,
   labels and instructions */

SetTokSym(char Prefix[])
{  char InLine[240],TmpArgv[5][32],Token0[32];
   unsigned LocCounter,TmpArgc;
   int DotTextLineFound = 0,T0Length,ILLength,BlankCount,B;
   float FTmp = (float) ((unsigned) -1);
   unsigned long TmpArgv1;
   
   /* get data items, skipping comments and blank lines */
   while (1)  {
      FGetS(InLine,MasFilePtr);
      CurrLineNum++; 
      if (!strncmp(".data",InLine,5)) break; 
   }
   NSyms = 0;
   LocCounter = 0;
   while (1)  {
      FGetS(InLine,MasFilePtr);
      CurrLineNum++;
                                    /* blank line? */
      ILLength = strlen(InLine);
      BlankCount = 0;
      for (B = 0; B < ILLength; B++)
         if (InLine[B] == ' ') BlankCount++;
      if (BlankCount == ILLength) {
	 continue;
       }
                                    /* comment? */
      if (InLine[0] == ';')  {
         fprintf(LstFilePtr,"%s",InLine);
         continue;
      }
                                    /* reached text section yet? */
      if (!strncmp(".text",InLine,5))  {
         DotTextLineFound = 1;
         break;
      }
      ParseCommand(InLine,&TmpArgc,TmpArgv,32);
      if (NSyms > MaxSyms)  {
	printf("maximum number of symbols (%d) exceeded (prog has %d symbols)\n",MaxSyms,NSyms);
	PERR(CurrLineNum,InLine);
	exit();
      }
                                   /* here  */
      strcpy(SymTable[NSyms].Symbol,TmpArgv[0]);
      SymTable[NSyms].Location = LocCounter;
      fprintf(LstFilePtr,"%5x   %s\n",LocCounter,TmpArgv[0]);
      write(SymFD,&SymTable[NSyms],sizeof(struct TableEntry));
      NSyms++;
      sscanf(TmpArgv[1],"%lu",&TmpArgv1);
      if ((float) LocCounter+TmpArgv1 > FTmp)  {
	printf("%s:  ",TmpArgv[0]);
	printf("next symbol would have location too big for unsigned int\n");
	PERR(CurrLineNum,InLine);
	exit();
      }
      LocCounter += TmpArgv1;
   }
   /* put in dummy record at the end of the .sym file, consisting
      of a phony symbol and the current location counter (used by
      MulSim.c to determine memory requirements) */
   strcpy(SymTable[NSyms].Symbol,"******");
   SymTable[NSyms].Location = LocCounter;
   write(SymFD,&SymTable[NSyms],sizeof(struct TableEntry));
   NDataNames = ++NSyms;
   
   /* now the instructions */
   if (!DotTextLineFound)  {
      printf("no .text line\n");
      PERR(CurrLineNum,InLine);
      exit();
   }
   NInstrs = 0;  
   while (1)  {
      FGetS(InLine,MasFilePtr);
      CurrLineNum++;
      if (!strncmp(InLine,".end",4)) return;
      ParseCommand(InLine,&TokenCount[NInstrs],Token[NInstrs],32);
      strcpy(Token0,Token[NInstrs][0]);
         /* blank line? */
      ILLength = strlen(InLine);
      BlankCount = 0;
      for (B = 0; B < ILLength; B++)
         if (InLine[B] == ' ') BlankCount++;
      if (BlankCount == ILLength) continue;
                  /* comment? */
      if (Token0[0] == ';')  {
         fprintf(LstFilePtr,"%s",InLine);
         continue;
       }
                    /* label? */
      T0Length = strlen(Token0);
      if (Token0[T0Length-1] == ':')  {
         if (NSyms > MaxSyms)  {
            printf("maximum number of symbols exceeded\n");
	    PERR(CurrLineNum,InLine);
	    exit();
	 } 
         strncpy(SymTable[NSyms].Symbol,Token0,T0Length-1);
         SymTable[NSyms].Location = NInstrs;
         fprintf(LstFilePtr,"%s\n",Token0);
         NSyms++;
      }
      else  { 
         if (NInstrs > MaxNInstrs)  {
	   printf("maximum number of instructions exceeded:\n");
	   printf("%s",InLine);
	   PERR(CurrLineNum,InLine);
	   exit();
	 } 
	 InLine[70] = 0;
         fprintf(LstFilePtr,"%5x   %s",NInstrs,InLine);
         strcpy(SourceLine[NInstrs++],InLine);
      }
   }
}


				/* all instructions followed by (KDR) where 
				   added by Kevin Rich while working on the 
				   MulSim compiler with	the VPCC system */
int FindInstrGrp(I)
   int I;
{  if (!strcmp(Mnem,"add") ||
       !strcmp(Mnem,"sub") ||
       !strcmp(Mnem,"mul") ||
       !strcmp(Mnem,"div") ||
       !strcmp(Mnem,"mod") ||		   /* new lcc .kdr*/
       !strcmp(Mnem,"bcomp") ||		   /* new lcc .kdr*/
       !strcmp(Mnem,"save") ||		   /* new lcc .kdr*/
       !strcmp(Mnem,"restore") ||          /* new lcc .kdr*/
       !strcmp(Mnem,"fadd") ||
       !strcmp(Mnem,"fsub") ||
       !strcmp(Mnem,"fmul") ||
       !strcmp(Mnem,"fdiv") ||
       !strcmp(Mnem,"cftoi") ||		   /* new lcc .kdr*/
       !strcmp(Mnem,"citof") ||		   /* new lcc .kdr*/
       !strcmp(Mnem,"sll") ||		     /* shift left logical (KDR) */
       !strcmp(Mnem,"srl") ||		     /* shift right logical (KDR) */
       !strcmp(Mnem,"sra") ||		     /* shift right arithmatic (KDR) */
       !strcmp(Mnem,"or") ||		     /* (KDR) */
       !strcmp(Mnem,"xor") ||		     /* (KDR) */
       !strcmp(Mnem,"orn") ||		     /* (KDR) */
       !strcmp(Mnem,"xorn") ||		     /* (KDR) */
       !strcmp(Mnem,"andn") ||		     /* (KDR) */
       !strcmp(Mnem,"and")) return (REG_TO_REG); /* (KDR) */
   if (!strcmp(Mnem,"ld") || 
       !strcmp(Mnem,"st") || 
       !strcmp(Mnem,"ainc") ||
       !strcmp(Mnem,"tas")) return (LOAD_STORE);
   if (!strcmp(Mnem,"jmp") || 
       !strcmp(Mnem,"call") ||
       !strcmp(Mnem,"ret")) return (CONTROL_XFER);
   if (!strcmp(Mnem,"cpunum") || !strcmp(Mnem,"systsize"))
       return (ONE_REG);
   if (!strcmp(Mnem,"userhook")) return (ONE_CONST);
   if (!strcmp(Mnem,"halt") || !strcmp(Mnem,"nop"))  return (ZERO_OPERAND);
				/* load immediate :kdr */

   printf("illegal opcode:  %s\n",Mnem); 
   PERR2(CurrLineNum,Mnem);
   exit();
}


OpenFiles(Prefix)
   char *Prefix;

{  char MasFileName[25],  /* name of .mas file */
        LstFileName[25],  /* name of .lst file */
        OutFileName[25],  /* name of .out file */
        SymFileName[25];  /* name of .sym file */

   strcpy(MasFileName,Prefix);
   strcat(MasFileName,".mas"); 
   MasFilePtr = fopen(MasFileName,"r");

   strcpy(LstFileName,Prefix);
   strcat(LstFileName,".lst");
   LstFilePtr = fopen(LstFileName,"w");

   strcpy(OutFileName,Prefix);
   strcat(OutFileName,".out");
   OutFD = open(OutFileName,O_CREAT|O_WRONLY|O_BINARY,S_IREAD|S_IWRITE);

   strcpy(SymFileName,Prefix);
   strcat(SymFileName,".sym");
   SymFD = open(SymFileName,O_CREAT|O_WRONLY,S_IREAD|S_IWRITE);
}


				/* KDR: handles the set instruction, and 
				   any others that may be added that put an 
				   immediate operand into a register */
AssembConstToReg(I)
   int I;
{  
  if (TokenCount[I] != 3)  {
    printf("missing operand:\n");
    PERR2(I,SourceLine[I]);
    exit();
  }
  TmpIStruct.Const = (unsigned long) atoi(Token[I][1]);
				/* the +1 is so we lose the 'r' in "r31"  */
  TmpIStruct.RD = atoi(Token[I][2]+1);
}

AssembOneReg(I)
   int I;
{  
  if (TokenCount[I] < 2)  {
    printf("missing operand:\n");
    PERR2(I,SourceLine[I]);
    exit();
  }
  TmpIStruct.RD = atoi(Token[I][1]+1);
}

AssembOneConst(I)
   int I;
{  if (TokenCount[I] < 2)  {
      printf("missing operand:\n");
      PERR2(I,SourceLine[I]);
      exit();
   }
   TmpIStruct.Const = (unsigned long) atoi(Token[I][1]);
}

AssembRegToReg(I)
   int I;
{  if (TokenCount[I] < 4)  {
      printf("fewer than 3 operands in a reg-to-reg instruction:\n");
      PERR2(I,SourceLine[I]);
      exit();
   }
   TmpIStruct.RS1 = atoi(Token[I][1]+1);
   if (Token[I][2][0] == 'r')  {
      TmpIStruct.SecondOpType = OP2_IS_REG;
      TmpIStruct.RS2 = atoi(Token[I][2]+1);
   }
   else  {
      TmpIStruct.SecondOpType = OP2_IS_CONST;
      if (Token[I][2][0]== '&') TmpIStruct.Const = 
         (unsigned long) SymLookup(&Token[I][2][1],NDataNames);
      else TmpIStruct.Const = (unsigned long) atoi(Token[I][2]);
   }
   TmpIStruct.RD = atoi(Token[I][3]+1);
}


AssembLoadStore(I)
   int I;
{  int J,Inc;

   if (TokenCount[I] < 4)  {
      printf("fewer than 3 operands in a load-store instruction:\n");
      PERR2(I,SourceLine[I]);
      exit();
   }
   TmpIStruct.RS1 = atoi(Token[I][1]+1);
   if (isdigit(Token[I][2][0])) TmpIStruct.Base = atoi(Token[I][2]);
   else TmpIStruct.Base = SymLookup(Token[I][2],NDataNames);
   TmpIStruct.RD = atoi(Token[I][3]+1);
   /* added by NM to handle the expressions like "r29+1" generated
      by the compiler for code involving pointers to structs */
   for (J = 1; J < strlen(Token[I][1]+1); J++)  {
      if (Token[I][1][J] == '+')  {
         Token[I][1][J] = ' ';
         sscanf(Token[I][1]+J+1,"%d",&Inc);
         TmpIStruct.Base += Inc;
      }
   }
   for (J = 1; J < strlen(Token[I][3]+1); J++)  {
      if (Token[I][3][J] == '+')  {
         Token[I][3][J] = ' ';
         sscanf(Token[I][3]+J+1,"%d",&Inc);
         TmpIStruct.Base += Inc;
      }
   }
}


AssembControlXfer(I)
   int I;
   
{  char Cond[5];  

   if (!strcmp(Mnem,"jmp") || !strcmp(Mnem,"call"))  {
      if (TokenCount[I] < 3)  {
         printf("fewer than 2 operands in a jmp-call instruction:\n");
         PERR2(I,SourceLine[I]);
         exit();
      }
      TmpIStruct.JumpTarget = SymLookup(Token[I][2],NSyms);
   }
   if (!strcmp(Mnem,"ret"))
      if (TokenCount[I] < 2)  {
         printf("missing return-address register:\n");
         PERR2(I,SourceLine[I]);
         exit();
      }
   if (!strcmp(Mnem,"call") || 
       !strcmp(Mnem,"ret")) 
          TmpIStruct.RS1 = atoi(Token[I][1]+1);
   if (!strcmp(Mnem,"jmp"))  {
      strcpy(Cond,Token[I][1]);
      if (!strcmp(Cond,"lt"))  {TmpIStruct.Condition = LT; return;}
      if (!strcmp(Cond,"le"))  {TmpIStruct.Condition = LE; return;}
      if (!strcmp(Cond,"eq"))  {TmpIStruct.Condition = EQ; return;}
      if (!strcmp(Cond,"ge"))  {TmpIStruct.Condition = GE; return;}
      if (!strcmp(Cond,"gt"))  {TmpIStruct.Condition = GT; return;}
      if (!strcmp(Cond,"ne"))  {TmpIStruct.Condition = NE; return;}
      if (!strcmp(Cond,"none"))  {TmpIStruct.Condition = NONE; return;}
      printf("illegal condition \"%s\", at PC %d\n",Cond,I); 
      PERR2(I,SourceLine[I]); 
      exit();
   } 
}

/* assembler the ith instruction (NOT counting data, comments and labels) */

Assemble(int I)
{  int IG;

   MemSet(&TmpIStruct,0,sizeof(struct InstrStruct));
   strcpy(TmpIStruct.AsmblSrcLine,SourceLine[I]);
   TmpIStruct.AsmblSrcLine[strlen(TmpIStruct.AsmblSrcLine)-1] = 0;
   strcpy(Mnem,Token[I][0]);
   strcpy(TmpIStruct.Mnemonic,Mnem);
   IG = TmpIStruct.InstrGrp = FindInstrGrp(I);
   switch (IG)  {
      case REG_TO_REG: AssembRegToReg(I); break;
      case LOAD_STORE: AssembLoadStore(I); break;
      case CONTROL_XFER: AssembControlXfer(I); break;
      case ONE_REG: AssembOneReg(I); break;
      case ONE_CONST: AssembOneConst(I); break;
      case CONST_TO_REG: AssembConstToReg(I); break; /* KDR */
   }
}


main(argc,argv)
   int argc;  char *argv[];
 
{  int I,LabelNum;  

   OpenFiles(argv[1]);
   SetTokSym(argv[1]);  /* read .mas file, and set Token, SymTable */
   LabelNum = NDataNames;
   for (I = 0; I < NInstrs; I++)  {
      Assemble(I);
      if (I == SymTable[LabelNum].Location)  
         strcpy(TmpIStruct.Label,SymTable[LabelNum++].Symbol);
      write(OutFD,&TmpIStruct,sizeof(struct InstrStruct));
   }
   return 0;
}


/*  assembler for MulSim

    this is an assembler for the basic processor simulated by
    MulSim; the processor is of RISC type (i.e. a load/store
    machine, with all arithmetic operations being done on a
    register-to-register mode only), with register windows
    being used for the runtime stack

    the source file name must have the suffix .mas; from this file,
    the assembler produces several output files, with the same
    prefix; say for example that the source file is x.mas; then
    the following files are produced by the assembler:

       x.out  --  "machine-language" file (actually not a strict
                  "binary" file, though; it is a file of InstrStruct;
		  see Include.h)
       x.lst  --  shows which addresses the source lines have been
                  assigned to; handy when setting breakpoints with
		  the 'b' command in MulSim
       x.sym  --  shows which addresses the variables declared in
                  x.mas have been assigned to (used internally by
		  MulSim; typically not directly accessed by the user)
    
    the data declarations must come first, preceded by a line ".data",
    and in the format of a symbol and a number of words to be reserved
    for that symbol
    
    then come the instructions, preceded by a line ".text"; labels are
    required to be on a SEPARATE line, ending with `:'; they cannot
    begin with a digit, `r' or `&'; the main program must come first
    (MulSim begins execution at instruction 0); the last source line 
    must be ".end"
    
    memory addresses are word numbers for data, instruction numbers for 
    instructions
    
    register file (similar to those used in the SPARC and the Berkeley
       RISC I and II):
   
       absolute register numbers are 0,1,2,..

       each window has 32 registers, with relative (i.e. virtual) numbers 
       0-31
      
       relative numbers 0-9 are global registers; they are also absolute 
       numbers 0-9, shared by all windows
       
       relative (and absolute) number 0 has the value 0 hard-wired into it;
       it can be used as the destination register RD, but its value will
       not change
      
       relative numbers 10-15 of a parent window are relative numbers 26-31 
       of the child window, i.e. the window of the procedure called from the 
       parent window; the relations are p =  c - 16 and c = p + 16, where
       c is the number of the register in the child window, and p is the 
       number of that same register in the parent window
      
       the simulator assumes no overflow, i.e. an infinite supply, but of 
       course there are only a finite number of registers, NRegs of them
       (NWindows' worth, plus 0-9); the simulator will detect overflow
       and halt the program in that case

       the window at the beginning of the simulated program's run is called 
       Window 0, and then the window arising from the first call is Window 
       1, etc.; the last window is Window NWindows-1; CWP indicates the
       current window number; ordinary users will typically not access
       this quantity directly

    asembly-language syntax and instruction set definition:


    REG_TO_REG
       add rs1,op2,rd
       sub rs1,op2,rd
       mul rs1,op2,rd
       div rs1,op2,rd
       mod rs1,op2,rd
       fadd rs1,op2,rd
       fsub rs1,op2,rd
       fmul rs1,op2,rd
       fdiv rs1,op2,rd
       and rs1,op2,rd
       andn rs1,op2,rd  (nand)
       or rs1,op2,rd
       xor rs1,op2,rd  
       orn rs1,op2,rd (nor)
       xorn rs1,op2,rd
       sll rs1,op2,rd (shift left logical)
       srl rs1,op2,rd (shift right logical)
       sra rs1,op2,rd (shift right arithmatic)
       (op2 is either rs2 or const)

       bcomp rs1, 0, rd  (bit complement, 0 is for simplicity)

    CONTROL_XFER

       jmp cond,label  (cond is one of lt,le,eq,ge,gt,ne,none, the latter
                        for an unconditional jump)
       call rs1,label  (rs1 is used to save the return address)
       ret rs1  (restore PC from rs1; the rs1 here should be 16 more
                 than the rs1 in the call)

    LOAD_STORE
       ld rs1,base,rd  (mem[rs1+base] --> rd)
       st rs1,base,rd  (rs1 --> mem[rd+base])
       ainc rs1,base,rd  (atomic implementation of mem[rs1+base]++;
                          returns old value of mem[rs1+base] to rd)
       tas rs1,base,rd  (test-and-set, i.e. atomic implementation of 
                             tmp = mem[rs1+base];
			     rd = tmp;
			     if (tmp == 0) tmp = 1;
			     mem[rs1+base] = tmp;
                         0 means unlocked, 1 means locked; note that rs1 
			 operand can be used to set up an array of locks
       (base is one of const,label; note that direct, indirect and
       and indexed addressing are all supported) 

    ONE_REG
       cpunum rd  (CPU number is put in rd)       
       systsize rd  (total number of CPUs is put in rd)


    ONE_CONST
       userhook const  (the user-defined function UserHook() is called; see
                        description in the file MulSim.c)
       (cpunum, systsize and userhook are included for convenience and are
       thus not "real" instructions, so they should be used sparingly, so 
       as to produce minimal perturbation of the results)

    ZERO_OPERAND
       nop
       halt  (PC stops incrementing)


       constants (described as `const' above) are taken to be decimal;
       if a constant is pre-pended with `&', the rest of the token
       is assumed to be a data label, and the constant generated is the
       address of that label
 
       comments must be on a SEPARATE line and have `;' as the first
       character in the line

    memory-access unit:  for now, smallest accessible unit is a word
    (note:  most items are declared as type long, to facilitate
    Unix/DOS compatibility; there will be no effect from this on
    32-bit Unix systems)

    synthesized instructions and addressing modes:

       mov  --  do add with one of the first two operands being 0
       indirect addressing  --  load/store with base set to 0
       indexed addressing  --  load/store with base set to index base,
                               rs1/rd to index value
       direct addressing  --  load/store with base set to the target,
                              and rs1/rd set to 0

    when adding new instructions, be sure to check:

       Mas.c:
          the entry for this instruction in the comments above
          the switch statement in Assemble(), and the function it calls
             for this instruction
          the function FindInstrGrp()
       
       OneCycle.c:
          the if statements involving IG in Execute()
	  the if statements checking Mnem in Execute()
          add a function for this instruction, like Add() etc.
*/








