/****(c) 2020. A. Gerbessiotis. See ../KAagcopy.txt ****/
/*******************************************************/
#include <memory.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "avgaii.h"
#include "aimisc.h"
#include "prdx.h"
#include "psrtai.h"

#define ELMNTS 8
#define MODULO 100000
#define MODULO_UINT ((unsigned int) 0x7FFFFFFF)
#define WRITE1  elapsed=0.0; for(j=0;j<runs;j++){ for(i=0;i<n;i++) ptr[i] = bptr[i]; avg_tstart();
#define WRITE2  avg_tend(); elapsed += (avg_treturn()); }
#define WRITELINE3 printf("(%2d,%2d,%9d,%1d,%1d,%1d):",THRESI16,TMERGEI,n,type,SPLITTER,runs);
#define CHECKLINE  for(i=0;i<n-1;i++) if (ptr[i]!=obptr[i] {printf("i=%d ERR",i); break;}


void printseq(idata *inp,int n,char *s,int debug)           ;
void printuint(idata *inp,int n,char *s,int debug)          ;
void datainit (idata *d, int n, int type)                   ;
void dataseq  (idata *d, int n, int type)                   ;
void i32rdx4  (idata *, int )                               ;
void rdx4 (idata *, int )                                   ;
void rdx2 (idata *, int )                                   ;
void pprdx (idata *, int )                                  ;
void datacheck (idata *res, idata *ori , int n)             ;
void seqdatacheck (idata *ori , int n)                      ;
int bspbtndat (idata *,idata *,int)                         ;
int bspoets  (idata *,int)                                 ;
void qqsort (void *akeys,int n,int size,int (*compare)(const void*,const void*) );


int compare(const void *x,const void *y)
{
        return(int)(* ((idata*)x)-*((idata*)y));
}



int avg_p;
struct avg_prms {
  int n,type,runs,debug;
} prms ;

void avg_display(void)
{
 if (AIPID()==0) {
  fprintf(stdout,"pid=%d of %d n=%d type=%d runs =%d debug=%d\n",
          AIPID(),AINPROCS(),prms.n,prms.type,prms.runs,prms.debug);
  fflush(stdout);
 }
}

void mmain() {
  register int nprocs,pid,r;
  int rsize=0;
  int *rrsize, *prsize;
  register int n,type,runs,debug;
  double t1,t2,t3;
  idata  *ptr, *bptr, *sptr, *sbptr, *pptr;



   AIBEGIN(avg_p);       /* BEGIN */


















   ptr=bptr=sptr= sbptr= NULL;

   nprocs = AINPROCS();
   pid    = AIPID();
   n      = prms.n;
   type   = prms.type;
   debug  = prms.debug;
   runs   = prms.runs;
   if (1==debug) avg_display();
   AIBARRIER();

   /* n is the per processor key size */
   /* that is N = n * nprocs          */
   /* next two lines only for proc 0 */
    sptr  = (idata *) malloc(n*nprocs*sizeof(idata));
    sbptr = (idata *) malloc(n*nprocs*sizeof(idata));

    rrsize= (int   *) malloc(nprocs*sizeof(int  ));
    prsize= (int   *) malloc(nprocs*sizeof(int  ));

   dataseq(sptr,n*nprocs,type); /* only on processor 0 */
   printseq(sptr,n*nprocs,"InQ ",debug);
   AIBARRIER();
   ptr  = (idata *) malloc((n)*sizeof(idata));
   pptr = (idata *) malloc((n)*sizeof(idata));
   bptr = (idata *) malloc((n)*sizeof(idata));
   AIBARRIER();
   bspscatter(0,(char*) sptr,(char*) bptr,n*sizeof(idata));
   AIBARRIER();
   printuint(bptr,n,"In ",debug);

      t1=0.0;
      AIBARRIER();
      t2=AITIME();
      if (pid == 0) {
         i32rdx4(sptr,n*nprocs);
/*
         qsort(sptr,n*nprocs,sizeof(idata),compare);
*/
      }
      t3=AITIME();
      AIBARRIER();
      t1=(t3-t2);
      bspfold((void (*)(void *,void *,void *)) foldmax,(char *)&t1,(char *)&t3,sizeof(double));
      AIBARRIER();
      printseq(sptr,n*nprocs,"OutQ",debug);
      if (pid==0) {
      fprintf(stdout,"(%2d,%2d,%1d):",n*nprocs,type,runs);
      fprintf(stdout,"    %18s        : Elapsed time is: %10.8f","i32r4",t1);
/*
      fprintf(stdout,"    %18s generic: Elapsed time is: %10.8f","qsort",t3);
*/
      fflush(stdout);
      }
      seqdatacheck(sptr,n*nprocs);
      AIBARRIER();

   /* START BENCHMARKING : BENCHMARK 1 */
   t1 = 0.0;
   for(r=0;r<runs;r++){
   memcpy(ptr,bptr,n*sizeof(idata));
   AIBARRIER();
   t2=AITIME();
     rdx4(ptr,n);
   t3=AITIME();
   AIBARRIER();
   t1+=(t3-t2);
   }
   bspfold((void (*)(void *,void *,void *)) foldmax,(char *)&t1,(char *)&t3,sizeof(double));
   AIBARRIER();
   if (0 == pid) {
      fprintf(stdout,"(%2d,%2d,%1d):",n*nprocs,type,runs);
      fprintf(stdout,"    %18s        : Elapsed time is: %10.8f","rdx4",t3/runs);
      fflush(stdout);
   }
   bspgather(0,(char*)ptr,(char*)sbptr,n*sizeof(idata));
   datacheck(sbptr,sptr,n);
   printuint(ptr,n,"Out",debug);
   AIBARRIER();
   printseq(sbptr,n*nprocs,"OutR",debug);
   /* END BENCHMARKING */



   /* START BENCHMARKING : BENCHMARK 2 */
   t1 = 0.0;
   for(r=0;r<runs;r++){
   memcpy(ptr,bptr,n*sizeof(idata));
   AIBARRIER();
   t2=AITIME();
     pprdx(ptr,n);
   t3=AITIME();
   AIBARRIER();
   t1+=(t3-t2);
   }
   bspfold((void (*)(void *,void *,void *)) foldmax,(char *)&t1,(char *)&t3,sizeof(double));
   AIBARRIER();
   if (0 == pid) {
      fprintf(stdout,"(%2d,%2d,%1d):",n*nprocs,type,runs);
      fprintf(stdout,"    %18s        : Elapsed time is: %10.8f","pprd",t3/runs);
      fflush(stdout);
   }
   bspgather(0,(char*)ptr,(char*)sbptr,n*sizeof(idata));
   datacheck(sbptr,sptr,n);
   printuint(ptr,n,"Out",debug);
   AIBARRIER();
   printseq(sbptr,n*nprocs,"OutR",debug);
   /* END BENCHMARKING */



   /* START BENCHMARKING : BENCHMARK 3 */
   t1 = 0.0;
   for(r=0;r<runs;r++){
   memcpy(ptr,bptr,n*sizeof(idata));
   AIBARRIER();
   t2=AITIME();
      rdx2(ptr,n);
   t3=AITIME();
   AIBARRIER();
   t1+=(t3-t2);
   }
   bspfold((void (*)(void *,void *,void *)) foldmax,(char *)&t1,(char *)&t3,sizeof(double));
   AIBARRIER();
   if (0 == pid) {
      fprintf(stdout,"(%2d,%2d,%1d):",n*nprocs,type,runs);
      fprintf(stdout,"    %18s        : Elapsed time is: %10.8f","rdx2",t3/runs);
      fflush(stdout);
   }
   bspgather(0,(char*)ptr,(char*)sbptr,n*sizeof(idata));
   datacheck(sbptr,sptr,n);
   printuint(ptr,n,"Out",debug);
   AIBARRIER();
   printseq(sbptr,n*nprocs,"OutR",debug);
   /* END BENCHMARKING */




   /* START BENCHMARKING : BENCHMARK 4 */
   t1 = 0.0;
   for(r=0;r<runs;r++){
   memcpy(ptr,bptr,n*sizeof(idata));
   AIBARRIER();
   t2=AITIME();
      i32rdx4(ptr,n);
      bspbtndat(ptr,pptr,n);
   t3=AITIME();
   AIBARRIER();
   t1+=(t3-t2);
   }
   bspfold((void (*)(void *,void *,void *)) foldmax,(char *)&t1,(char *)&t3,sizeof(double));
   AIBARRIER();
   if (0 == pid) {
      fprintf(stdout,"(%2d,%2d,%1d):",n*nprocs,type,runs);
      fprintf(stdout,"    %18s        : Elapsed time is: %10.8f","btns",t3/runs);
      fflush(stdout);
   }
   bspgather(0,(char*)pptr,(char*)sbptr,n*sizeof(idata));
   datacheck(sbptr,sptr,n);
   printuint(pptr,n,"Out",debug);
   AIBARRIER();
   printseq(sbptr,n*nprocs,"OutR",debug);
   /* END BENCHMARKING */

   /* START BENCHMARKING : BENCHMARK 5 */
   t1 = 0.0;
   for(r=0;r<runs;r++){
   memcpy(ptr,bptr,n*sizeof(idata));
   AIBARRIER();
   t2=AITIME();
     bspoets(ptr,n);
   t3=AITIME();
   AIBARRIER();
   t1+=(t3-t2);
   }
   bspfold((void (*)(void *,void *,void *)) foldmax,(char *)&t1,(char *)&t3,sizeof(double));
   AIBARRIER();
   if (0 == pid) {
      fprintf(stdout,"(%2d,%2d,%1d):",n*nprocs,type,runs);
      fprintf(stdout,"    %18s        : Elapsed time is: %10.8f","oets",t3/runs);
      fflush(stdout);
   }
   bspgather(0,(char*)ptr,(char*)sbptr,n*sizeof(idata));
   datacheck(sbptr,sptr,n);
   printuint(ptr,n,"Out",debug);
   AIBARRIER();
   printseq(sbptr,n*nprocs,"OutR",debug);
   /* END BENCHMARKING */

   /* START BENCHMARKING : BENCHMARK 6 */
   t1 = 0.0;
   for(r=0;r<runs;r++){
   memcpy(ptr,bptr,n*sizeof(idata));
   free((void*) pptr); /* used as output in Bitonic Sorting! discard it here */
   AIBARRIER();
   t2=AITIME();
      rsize = gsd(ptr,n,n*nprocs,&pptr,sizeof(idata),i32rdx4,qqsort,100);
   t3=AITIME();
   AIBARRIER();
   t1+=(t3-t2);
   }
   bspfold((void (*)(void *,void *,void *)) foldmax,(char *)&t1,(char *)&t3,sizeof(double));
   AIBARRIER();
   if (0 == pid) {
      fprintf(stdout,"(%2d,%2d,%1d):",n*nprocs,type,runs);
      fprintf(stdout,"    %18s        : Elapsed time is: %10.8f","gsd ",t3/runs);
      fflush(stdout);
   }
   AIBARRIER();
   aippf(&rsize, rrsize,prsize);
   bspunevengather(0,(char*)pptr,(char*)sbptr,sizeof(idata),rrsize,prsize);
   AIBARRIER();
   datacheck(sbptr,sptr,n);
   printuint(pptr,rsize,"Out",debug);
   AIBARRIER();
   printseq(sbptr,n*nprocs,"OutR",debug);
   /* END BENCHMARKING */

   /* START BENCHMARKING : BENCHMARK 7 */
   t1 = 0.0;
   for(r=0;r<runs;r++){
   memcpy(ptr,bptr,n*sizeof(idata));
   free((void*) pptr); /* used as output in Bitonic Sorting! discard it here */
   AIBARRIER();
   t2=AITIME();
      rsize = gvr(ptr,n,n*nprocs,&pptr,sizeof(idata),i32rdx4,qqsort,100);
   t3=AITIME();
   AIBARRIER();
   t1+=(t3-t2);
   }
   bspfold((void (*)(void *,void *,void *)) foldmax,(char *)&t1,(char *)&t3,sizeof(double));
   AIBARRIER();
   if (0 == pid) {
      fprintf(stdout,"(%2d,%2d,%1d):",n*nprocs,type,runs);
      fprintf(stdout,"    %18s        : Elapsed time is: %10.8f","gvr ",t3/runs);
      fflush(stdout);
   }
   AIBARRIER();
   aippf(&rsize, rrsize,prsize);
   bspunevengather(0,(char*)pptr,(char*)sbptr,sizeof(idata),rrsize,prsize);
   AIBARRIER();
   datacheck(sbptr,sptr,n);
   printuint(pptr,rsize,"Out",debug);
   AIBARRIER();
   printseq(sbptr,n*nprocs,"OutR",debug);
   /* END BENCHMARKING */

   /* START BENCHMARKING : BENCHMARK 8 */
   t1 = 0.0;
   for(r=0;r<runs;r++){
   memcpy(ptr,bptr,n*sizeof(idata));
   free((void*) pptr); /* used as output in Bitonic Sorting! discard it here */
   AIBARRIER();
   t2=AITIME();
      rsize = ger(ptr,n,n*nprocs,&pptr,sizeof(idata),i32rdx4,qqsort,100);
   t3=AITIME();
   AIBARRIER();
   t1+=(t3-t2);
   }
   bspfold((void (*)(void *,void *,void *)) foldmax,(char *)&t1,(char *)&t3,sizeof(double));
   AIBARRIER();
   if (0 == pid) {
      fprintf(stdout,"(%2d,%2d,%1d):",n*nprocs,type,runs);
      fprintf(stdout,"    %18s        : Elapsed time is: %10.8f","ger ",t3/runs);
      fflush(stdout);
   }
   AIBARRIER();
   aippf(&rsize, rrsize,prsize);
   bspunevengather(0,(char*)pptr,(char*)sbptr,sizeof(idata),rrsize,prsize);
   AIBARRIER();
   datacheck(sbptr,sptr,n);
   printuint(pptr,rsize,"Out",debug);
   AIBARRIER();
   printseq(sbptr,n*nprocs,"OutR",debug);
   /* END BENCHMARKING */


   free((void *)  bptr);
   free((void *)  pptr);
   free((void *)   ptr);
   free((void *) sbptr);
   free((void *)  sptr);

   AIEND();             /* END  */

}


int main(int argc, char *argv[]) {

   bsp_init(mmain,argc,argv);
   if (argc == 6) {
    prms.n    = atoi(argv[2]);
    prms.type = atoi(argv[3]);
    prms.runs = atoi(argv[4]);
    prms.debug= atoi(argv[5]);
    avg_p = atoi(argv[1]);
   }
   else {
    fprintf(stdout,"%s procs n type runs debug (type option can be 1 or 2)\n",argv[0]);
    prms.n    = 1000000;
    prms.type = 1;
    prms.runs = 1;
    prms.debug= 0;
    avg_p     = 4;
   }
/* */
   mmain();
   return(1);
}



void printseq(idata *inp,int n,char *s,int debug) 
{
 register int i,pid;
 pid=AIPID();
 if ((1==debug) && (pid ==0)) { 
 fprintf(stdout,"[%2d]:%4s ",pid,s);
 fflush(stdout);
 for(i=0;i<ELMNTS;i++){
   fprintf(stdout,"%10d,", (idata) inp[i]);
 }
   fprintf(stdout,"...\n          ");
 for(i=n-ELMNTS;i<n;i++){
   fprintf(stdout,"%10d,", (idata) inp[i]);
 }
 fprintf(stdout,"\n");
 fflush(stdout);
 }
 AIBARRIER();
}


void printuint(idata *inp,int n,char *s,int debug)
{
 register int i,j,pid,nprocs;
 pid=AIPID();
 nprocs=AINPROCS();


  for(j=0;j<nprocs;j++) {
    AIBARRIER();
    if ((pid == j) &&(1==debug)) {
       fprintf(stdout,"[%2d]:%4s ",pid,s);
       for(i=0;i<ELMNTS;i++){
         fprintf(stdout,"%10d,", (idata) inp[i]);
       }
       fprintf(stdout,"...\n          ");
       for(i=n-ELMNTS;i<n;i++){
          fprintf(stdout,"%10d,", (idata) inp[i]);
       }
       fprintf(stdout,"\n");
       fflush(stdout);
    }
    AIBARRIER();
  } 
}

void dataseq  (idata *d, int n, int type)
{
 register int i;
 register int pid;
 pid=AIPID();
 if (pid ==0) {
    srandom(21+1001*11);
    if (type == 1) {
        for (i=0;i<n;i++) {
           d[i]= (idata ) (random() & MODULO_UINT);
        }
     } 
     else 
     if (type == 2) {
         for (i=0;i<n;i++) {
            d[i]= (idata )  ((n)*(random()/((double)INT_MAX)));
         }
     }
     else
     if (type == 3) {
         for (i=0;i<n;i++) {
            d[i]= (idata )  (i+1);
         }
     }
     else
     if (type == 4) {
         for (i=0;i<n;i++) {
            d[i]= (idata )  (17);
         }
     }
     else {
       for (i=0;i<n;i++) {
         d[i]= (idata )  (n-i);
       }
     }
 }
 AIBARRIER();
}

void seqdatacheck (idata *ori , int n)
{
  register int i,pid;
  pid=AIPID();
  if (pid == 0) {
   for(i=0;i<n-1;i++) {
     if (ori[i] > ori[i+1]) {
      fprintf(stdout,"\nError at %d : %d noteq %d\n",i,ori[i],ori[i+1]);
      fflush(stdout);
      break;
     }
   }
   fprintf(stdout," OK\n");
  }
}

void datacheck (idata *res, idata *ori , int n)
{
  register int i,pid;
  pid=AIPID();
  if (pid == 0) {
   for(i=0;i<n;i++) {
     if (res[i] != ori[i]) {
      fprintf(stdout,"\nProc %3d Error at %d : %d noteq %d\n",pid,i,res[i],ori[i]);
      fflush(stdout);
      break;
     }
   }
   fprintf(stdout," OK\n");fflush(stdout);
  }
}
  
