#include <iostream.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <UCTInclude.h>		// include UCTTuplets include file


#define NUM_VERTS	150
#define MAX_INT		10000000   
int NNodes;			// stores the number of processors
int NewDone;			// stores new vertex to consider
int Me;				// stores our node number 

/************ FindMinDistHelper() user function ***********/
void FindMinDistHelper(char* inargs, int argsize, char* outargs, int* outsize) {
  float min = 1000000.0;
  float dist;
  int done, min_index;
  int start, end;

  memcpy(&start, inargs, sizeof(int));
  memcpy(&end, inargs + sizeof(int), sizeof(int));

  for (int i=start; i <= end; i++) {
    rd("lsip", "Done", i, &done);	// grab the "Done" value for this vertex
    if (done) continue;		// if the vertex is already done, skip to next iteration

    rd("lsip", "Dist", i, &dist);	// grab Dist value
    if (dist < min) {			// update minimum value found thus far
      min_index = i;
      min = dist;
    }
  }

  memcpy(outargs, (char*) &min, sizeof(float));
  memcpy(outargs + sizeof(float), (char*) &min_index, sizeof(int));
  *outsize = sizeof(float) + sizeof(int);
}
/*********************************************/

/************ UpdateListHelper() user function ***********/
void UpdateListHelper(char* inargs, int argsize, char* outargs, int* outsize) {
  int i, min_index, done;
  float  dist1, dist2;
  float a[NUM_VERTS];		// array for temp storage of Weight array
  int start, end;

  memcpy(&start, inargs, sizeof(int));
  memcpy(&end, inargs + sizeof(int), sizeof(int));

  for (i=start; i <= end; i++) {
    rd ("lsip", "Done", i, &done);	// find the Done value for this vertex
    if (done) continue;		// if the vertex is already done, skip to next iteration

    in ("lsip", "Dist", i, &dist1);	// grab Dist value for this vertex
    rd ("lspp", "Min", &min_index, &dist2);
    rd ("lsiip", "Weights", i, NUM_VERTS, a);	//grab a row of the Weight matrix
  
    if (dist1 > dist2 + a[min_index])	// Here is the Dijkstra comparision
        dist1 = dist2 + a[min_index];

    out("lsif", "Dist", i, dist1);	// Update Dist value for this node
  }

  *outsize = 0;
}


/************ Start original code ****************/
   
/* InitG() is used to set up our weight matrix.  For G[i][j], i is the row and j is the column
   for our matrix.  This matrix is then output to the tuple by rows.  So, when a vertex x needs to
   determine its weight with respect to vertex y, it will read row x from the tuple space and access
   element y within that row (array). This could be done in a separate generated file that is #included.
   The user must keep in mind that this matrix is symmetrical, so G[i][j] == G[j][i]. */

void InitG() {
        int i, j;
        float G[NUM_VERTS][NUM_VERTS];

        srand(27);
  
        for (i=0; i < NUM_VERTS; i++) {
          for (j=i+1; j < NUM_VERTS; j++){
             if ( i==j )
                G[i][j] = 0.0;
             else
                G[i][j] = (((rand() % 5001) + 1)/1.0);
          }
        }
  
        for (i=0; i < NUM_VERTS; i++)
           out("rsiiF", "Weights", i, NUM_VERTS, G[i]);
}



/* FindMinDist() is used to find the min dist value in the Dist array.  The Dist
   array is split up into chunks designated by start & end.  So each node has a
   piece of the array to search for the min. */

void FindMinDist(int start, int end) {
  float min = MAX_INT, cur_min;
//  float dist;		/********* Removed since not needed with new user function *******/
//  int done;			/********* Removed since not needed with new user function *******/
  int min_index;
  int dummy;
  char * inbuffer = new char[2*sizeof(int)];
  char * outbuffer = new char[sizeof(float) + sizeof(int)];

  if (Me == 1)			/* Output the tuple to use for finding the min */
    out("rsif", "Min", 0, min);

/*************  Use new user function to calculate min **************/
  memcpy(inbuffer, &start, sizeof(int));
  memcpy(inbuffer+sizeof(int), &end, sizeof(int));
  tmexec(FindMinDistHelper, inbuffer, 2*sizeof(int), outbuffer, &dummy);
  memcpy(&min, outbuffer, sizeof(float));
  memcpy(&min_index, outbuffer + sizeof(int), sizeof(int));
/**************************************************************************/

/************** Original min routine, replaced by user function ************
  for (int i=start; i <= end; i++) {
    rd("rsip", "Done", i, &done);	// grab the "Done" value for this vertex
    if (done) continue;			// if the vertex is already done, skip to next iteration

    rd("rsip", "Dist", i, &dist);	// grab Dist value
    if (dist < min) {			// update minimum value found thus far
      min_index = i;
      min = dist;
    }
  }
**************************************************************************/

  in("rspp", "Min", &dummy, &cur_min);	// Min for our chunk is found, so update the Min tuple

  if (min < cur_min) 
    out("rsif", "Min", min_index, min);
  else
    out("rsif", "Min", dummy, cur_min);

  delete [] inbuffer;
  delete [] outbuffer;
}


/************** This version of the UpdateList() function has been replaced by user fxn **********/
/* UpdateList() is used to update our Dist values.  This is parallelized by, once again,
   splitting the Dist array up into chunks designated by start & end, and giving each
   chunk to separate nodes for independent calculation. */

void UpdateList(int start, int end) {
  int i, min_index, done;
  float  dist1, dist2;
  float a [NUM_VERTS];		// array for temp storage of Weight array

  for (i=start; i <= end; i++) {
    rd ("rsip", "Done", i, &done);	// find the Done value for this vertex
    if (done) continue;			// if the vertex is already done, skip to next iteration

    in ("rsip", "Dist", i, &dist1);	// grab Dist value for this vertex
    rd ("rspp", "Min", &min_index, &dist2);
    rd ("rsiip", "Weights", i, NUM_VERTS, a);	//grab a row of the Weight matrix
  
    if (dist1 > dist2 + a[min_index])	// Here is the Dijkstra comparision
        dist1 = dist2 + a[min_index];

    out("rsif", "Dist", i, dist1);	// Update Dist value for this node
  }

  delete [] a;
}





/* Barrier() implements a reusable barrier using the Tuple space */
void Barrier() {
  int NHit;				// # of hits to this barrier thus far
  static int EvenOdd = 0;		// records current barrier to index

  in ("rsip", "Barrier", EvenOdd, &NHit);	//grab barrier value
  NHit++;					//increment number of hits
  if (NHit == UCTNWorkNodes+1) NHit = 1;	//reset if necessary
  out("rsii", "Barrier", EvenOdd, NHit);	//output new barrier value
  if (NHit <= UCTNWorkNodes)
    rd("rsii", "Barrier", EvenOdd, UCTNWorkNodes);	// block until barrier is released

  EvenOdd = 1 - EvenOdd;
}





/* Worker() is the where all the action occurs.  Here we setup our Dijkstra loop and run it */

void Worker() {
  int i, min_index, dummy;
  float min, dist;
  int start, end; 		// stores the start & end of this nodes array chunk
  float Weights[NUM_VERTS];

  NNodes = UCTNWorkNodes;	// set number of processors
  Me = UCTNodeNum;		// set self ID

  if (Me == 1) {
    out("rsii", "Barrier", 0, 0);  //Setup the Barrier
    out("rsii", "Barrier", 1, 0);

    InitG();			//Init the weight matrix

    /* Grab the first row of the weight matrix.  Use this to initialize the Dist data
       with the weight from vertex 0 to every other node. */
    rd("rsiip", "Weights", 0, NUM_VERTS, Weights);

    for (i=1; i < NUM_VERTS; i++) {
      out("rsii", "Done", i, 0);		//Init the Done array to all 0's
      out("rsif", "Dist", i, Weights[i]);
    }
    out("rsii", "Done", 0, 1);		//Vertex 0 is already done, record this.
  }


  /* set our start and end values for splitting arrays into chunks */
  start = (Me-1) * (NUM_VERTS/NNodes);
  end = Me * (NUM_VERTS/NNodes) - 1;
  if ( Me == NNodes )
    end = NUM_VERTS -1;

  Barrier();		//Synchronize everyone for the main Dijkstra loop

  /* main Dijkstra loop */
  for (i=1; i < NUM_VERTS; i++) {
    /******** Internals of FindMinDist now use user function to find min *********/
    FindMinDist(start, end); 	// find min Dist for this iteration
    Barrier();			// Synchronize for Min to be found

    if (Me == 1) {
      rd ("rspp", "Min", &min_index, &min);  //grab the index for the vertex with min Dist
      in ("rsii", "Done", min_index, 0);
      out ("rsii", "Done", min_index, 1);    //Set this vertex to done
    }

    Barrier();			// Synchronize to update Done array

/******** Old UpdateList() yanked.  Now use user function UpdateListHelper() *******/
//    UpdateList(start, end); 	// Min found, update Dist array in parallel
    char outbuffer[1];
    char* inbuffer = new char[sizeof(int)*2];
    memcpy(inbuffer, &start, sizeof(int));
    memcpy(inbuffer + sizeof(int), &end, sizeof(int));
    tmexec(UpdateListHelper, inbuffer, 2*sizeof(int), outbuffer, &dummy);
    delete [] inbuffer;
    
    Barrier();			// Synchronize to update Dist array

    if ( Me == 1 )
       in ("rspp", "Min", &min_index, &min);    // Clear out this cycles min 
  }


  /* print if I am node 1 */
  if (Me == 1) {
//    printf("\n\n");
    for (i=1; i < NUM_VERTS; i++) {
      rd("rsip", "Dist", i, &dist);
//      printf("Node 0 to Node %d : %f\n", i, dist);
    }
//    printf("\n");
  }
//  fflush(stdout);

  out ("rs", "UCTEnd");  // terminate worker
}

int main(int argc, char ** argv) {
  UCTInit(argc, argv);  // Init the UCTuplets environment
}


