// Matteo Lulli, Massimo Bernaschi, Giorgio Parisi 2013
// Physics Dept. 'Sapienza', University of Rome

#ifndef MYRNG_H
#define MYRNG_H

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda.h>

typedef unsigned int RNGT;

#define MYCUDA_ERROR(functionCall)\
  {cudaError_t error_id = functionCall;\
  if(error_id != cudaSuccess){\
  fprintf(stderr, "\nError %s at line %d of %s\n",\
	  cudaGetErrorString(error_id), __LINE__, __FILE__);\
  return 1;\
  }\
  }

#define TWO31M1 0x7fffffff
#define TWO32M1 0xffffffff

#define RANDTWO31M1(seed)\
  {RNGT lo = 16807*(seed&0xffff);		\
  RNGT hi = 16807*(seed>>16);			\
  lo += (hi&0x7fff)<<16;			\
  lo += hi>>15;					\
  seed = ((lo&0x80000000)>>31)*(lo-0x7fffffff);	\
  seed += ((lo^0x80000000)>>31)*lo;		\
  }

#define RANDTWO31M1IMP(seed)\
  {RNGT lo = 16807*(seed&0xffff);		\
  RNGT hi = 16807*(seed>>16);			\
  lo += (hi&0x7fff)<<16;			\
  lo += hi>>15;					\
  seed = lo - ((-((lo&0x80000000)>>31))&0x7fffffff);\
  }


#define RANDTWO31M1BIS(seed)\
  unsigned long long int swap = (16807LL)*seed;\
  seed = (swap&0x7fffffff) + (swap>>31);\
  if(seed&0x80000000) seed = (seed&0x7fffffff) + 1;\

#define RANDTWO32(seed)\
  seed = 1664525*seed + 1013904223;\

#define PRWHEEL 64
#define PRWHEELMASK 63

#define PR32(ira, ip0, ip1, ip2, ip3) ((ira[ip0]=ira[ip1]+ira[ip2])^ira[ip3])

typedef struct{
  RNGT *h_rngSeeds, *d_rngSeeds;
  int N;
}CRNGArray_t;

typedef struct{
  RNGT *h_rngSeeds, *d_rngSeeds;
  int N;
}PR_RNGArray_t;

extern int CRNGArrayAlloc(CRNGArray_t *rngArray);
extern int CRNGArrayReset(CRNGArray_t *rngArray);
extern int CRNGArrayFree(CRNGArray_t *rngArray);
extern int CRNGArraySetURandom(CRNGArray_t *rngArray);
extern int H2DCRNGArray(CRNGArray_t *rngArray);
extern int D2HCRNGArray(CRNGArray_t *rngArray);
extern int DumpCRNGArrayDir(CRNGArray_t *rngArray, int V);
extern int DumpCRNGArray(CRNGArray_t *rngArray, unsigned long long int t, int nprocs, int mpiid);
extern int DumpCRNGArrayMpiRecover(CRNGArray_t *rngArray, unsigned long long int t, int sysN, int recoverMPIprocs);
extern int ReadCRNGArray(CRNGArray_t *rngArray, unsigned long long int t);
extern int ReadCRNGArrayMpiRecover(CRNGArray_t *rngArray, unsigned long long int t, int sysN, int recoverMPIprocs);
extern int DumpCRNGArrayOff(CRNGArray_t *rngArray, int offset, int size);
extern int ReadCRNGArrayOff(CRNGArray_t *rngArray, int offset, int size);
extern CRNGArray_t *CRNGArrayInit(int N);
extern CRNGArray_t *CRNGArrayInitPR(int N);

extern int PR_RNGArrayAlloc(PR_RNGArray_t *rngArray);
extern int PR_RNGArrayReset(PR_RNGArray_t *rngArray);
extern int PR_RNGArrayFree(PR_RNGArray_t **rngArray);
extern int PR_RNGArraySetURandom(PR_RNGArray_t *rngArray);
extern int H2DPR_RNGArray(PR_RNGArray_t *rngArray);
extern int D2HPR_RNGArray(PR_RNGArray_t *rngArray);
extern int DumpPR_RNGArray(PR_RNGArray_t *rngArray, unsigned long long int t);
extern int ReadPR_RNGArray(PR_RNGArray_t *rngArray, unsigned long long int t);
extern PR_RNGArray_t *PR_RNGArrayInit(int N, int steps);
extern int PR32Mix(PR_RNGArray_t *rngArray, int steps);
extern int PR32Evolve(PR_RNGArray_t *rngArray, int steps,
		      int ip0, int ip1, int ip2, int ip3);
extern int PR32EvolveMod(PR_RNGArray_t *rngArray, int steps,
			 int ip0, int ip1, int ip2, int ip3);

__global__ void testParisiRapuanoKernel(__restrict RNGT *ira, int steps,
					int ip, int ip1, int ip2, int ip3);

__global__ void testParisiRapuanoKernelMod(__restrict RNGT *ira, int steps,
					   int ip, int ip1, int ip2, int ip3);


#endif
