#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/time.h>
#include <UCTInclude.h>

int N;

int main(int argc, char * argv[]){
   N = atoi(argv[5]);

   UCTInit(argc,argv);
}

void Barrier() {
  int NHit;                             // # of hits to this barrier
  static int EvenOdd = 0;               // records current barrier to

  in ("rsip", "Barrier", EvenOdd, &NHit);       //grab barrier value
  NHit++;                                       //increment number of
  if (NHit == UCTNWorkNodes+1) NHit = 1;        //reset if necessary
  out("rsii", "Barrier", EvenOdd, NHit);        //output new barrier
  if (NHit <= UCTNWorkNodes)
    rd("rsii", "Barrier", EvenOdd, UCTNWorkNodes);      // block until

  EvenOdd = 1 - EvenOdd;
}

//Fetch and incrament the index of next cell to calc
void FAInextIndex(char *inArgs, int inLen, char *outArgs, int* outLen){
   int i,j;
   //get the index tuple
   in("lspp", "nextIndex", &i,&j);
   
   //copy current values to output arguments 
   *outLen = 2*sizeof(int);
   memcpy(outArgs, &i, sizeof(int));
   memcpy(outArgs+sizeof(int), &j, sizeof(int));

   //incrament the index
   j++;

   //check to see if on next column and return tuple to tuple space
   if (j<N){
      out("lsii", "nextIndex", i,j);
   }
   else{
      j=0;
      i++;
      out("lsii", "nextIndex", i,j);
   }
}

//multiply the given row and column.  i is the first argument and j the next
void multRow(char* inArgs, int inLen, char* outArgs, int* outLen){
   int i,j,n,prod, row[100000], col[100000];

   //retrieve arguments
   memcpy(&i, inArgs, sizeof(int));
   memcpy(&j, inArgs+sizeof(int), sizeof(int));
   *outLen =0;

   //read rows
   rd("lsipp", "row", i, &n, row);
   rd("lsipp", "col", j, &n, col);

   //calculate product
   prod=0;
   for(int k=0;k<N;k++){
      prod+=(row[k]*col[k]);
   }
   //insert the answer into tuple space
   out("lsiii", "prod",i,j,prod);
}

int Worker(void){
   int Me = UCTNodeNum; 
   int row[100000], col[100000];
   int i,j,n; //counters and stuff
//   int prod;

   //let node 1 set up matricies in tuple space
   if (Me == 1){
      out("rsii", "Barrier", 0, 0);  //Setup the Barrier
      out("rsii", "Barrier", 1, 0);
      for (i=0;i<N;i++){
         for(j=0;j<N;j++){
            row[j] = j;
            col[j] = j;
         }
         out("rsiiI", "row", i, N, row);
         out("rsiiI", "col", i, N, col);
      }
      out("rsii", "nextIndex", 0,0);
   }

   Barrier();

   while(1){

      tmexec(FAInextIndex, NULL, 0, (char*) row, &n);
      memcpy(&i,row,sizeof(int));
      memcpy(&j,row+sizeof(int), sizeof(int));

      //Fetch and add index
      //in("rspp", "nextIndex", &i,&j);
      //k=j+1;
      //if (k<N){
      //   out("rsii", "nextIndex", i,k);
      //}
      //else{
      //   k=0;
      //   out("rsii", "nextIndex", i+1, k);
      //}

      if (i>=N) break;

      //multiply Row
      memcpy(row, &i, sizeof(int));
      memcpy(row + sizeof(int), &j, sizeof(int));
      tmexec(multRow, (char*) row,2*sizeof(int), (char*) row, &n);

      //rd("rsipI", "row", i, &n, row);
      //rd("rsipI", "col", j, &n, col);
      //prod=0;
      //for(k=0;k<N;k++){
      //   prod+=(row[k]*col[k]);
      //}
      //out("rsiii", "prod",i,j,prod);
   }

   if(Me==1){
      printf("Done\n");
      //now read in all the rows and print them out
/*      printf("A:\n");
      for (i=0;i<N;i++){
         in("rsipp", "row", i, &n, row);
         for(j=0;j<N;j++){
            printf("%d ", row[j]);
         }
         printf("\n");
      }
      printf("\n");
      for (i=0;i<N;i++){
         for(j=0;j<N;j++){
            in("rsiip", "prod", i, j, &prod);
            printf("%d ", prod);
         }
         printf("\n");
      }
      printf("\n");
   */
   }
   out("rs", "UCTEnd");   
   return(0);
}

