// Matteo Lulli, Massimo Bernaschi, Giorgio Parisi 2013
// Physics Dept. 'Sapienza', University of Rome

#ifndef CUDAEA3D_H
#define CUDAEA3D_H

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda.h>
#include "myRNG.h"
#include "MSC.h"

typedef unsigned int MSC;

extern struct mpiexch *interexch;
extern int nprocs, mpiid;

typedef struct{
  int L, steps, dumpN;
  int V, A, NJ, NS, replicas, sysN, kernelChoice; 
  int hV, hA, hL;
  int cardN, blockSize, dumpF, analysisF, ram, recoverF, k;
  int gpuNumber, NSBlk, NJBlk, NSBnd, NJBnd;
  int recoverMPIprocs;
  float beta;

  char devShm[200];
  char recoverDir[200];
  unsigned long long int *dumpTimes, startValue;

  MSC *h_reds0, *h_reds1, *h_reds2, *h_reds3;
  MSC *h_blues0, *h_blues1, *h_blues2, *h_blues3;
  MSC *d_reds0, *d_reds1, *d_reds2, *d_reds3;
  MSC *d_blues0, *d_blues1, *d_blues2, *d_blues3;

  MSC *h_reds0BS, *h_reds1BS, *h_reds2BS, *h_reds3BS;
  MSC *h_reds0BR, *h_reds1BR, *h_reds2BR, *h_reds3BR;
  MSC *h_blues0BS, *h_blues1BS, *h_blues2BS, *h_blues3BS;
  MSC *h_blues0BR, *h_blues1BR, *h_blues2BR, *h_blues3BR;

  MSC *d_reds0BS, *d_reds1BS, *d_reds2BS, *d_reds3BS;
  MSC *d_reds0BR, *d_reds1BR, *d_reds2BR, *d_reds3BR;
  MSC *d_blues0BS, *d_blues1BS, *d_blues2BS, *d_blues3BS;
  MSC *d_blues0BR, *d_blues1BR, *d_blues2BR, *d_blues3BR;

  MSC *h_Jpx, *h_Jpy, *h_Jpz;
  MSC *h_Jmx, *h_Jmy, *h_Jmz;
  MSC *d_Jpx, *d_Jpy, *d_Jpz;
  MSC *d_Jmx, *d_Jmy, *d_Jmz;

  MSC *h_JmxBS, *h_JpyBS, *h_JmzBS;
  MSC *h_JmxBR, *h_JpyBR, *h_JmzBR;

  MSC *d_JmxBS, *d_JpyBS, *d_JmzBS;
  MSC *d_JmxBR, *d_JpyBR, *d_JmzBR;

  int *realI_R, *realI_B;
  int *myI_R, *myI_B;
  int *myI_B2R, *myI_R2B;

}sysEA3D_t;

extern sysEA3D_t *sysInit(int argc, char **argv);
extern int dummyDirsMaker(sysEA3D_t *sys);
extern int SpinsRead(sysEA3D_t *sys, unsigned long long int t);
extern int SpinsReadDir(sysEA3D_t *sys, unsigned long long int t);
extern int SpinsReadDirMpi(sysEA3D_t *sys, unsigned long long int t, int recoverMPIprocs);
extern int SpinsReadSys(sysEA3D_t *sys, int t, int sysIndex);
extern int SpinsTransform(sysEA3D_t *sys);
extern int SpinsTransformRead(sysEA3D_t *sys);
extern int SpinsDump(sysEA3D_t *sys, unsigned long long int t);
extern int SpinsDumpDir(sysEA3D_t *sys, unsigned long long int t, int recoverMPIprocs);
extern int JRead(sysEA3D_t *sys);
extern int JReadDir(sysEA3D_t *sys);
extern int JReadDirMpi(sysEA3D_t *sys, int recoverMPIprocs);
extern int JReadSys(sysEA3D_t *sys, int sysIndex);
extern int JTransform(sysEA3D_t *sys);
extern int JTransformRead(sysEA3D_t *sys);
extern int JDump(sysEA3D_t *sys);
extern int Simulation(sysEA3D_t *sys);
extern int SimulationTest(sysEA3D_t *sys);
extern int SimulationFullRecoverTest(sysEA3D_t *sys);
extern int SimulationFullRecoverTestCongruential(sysEA3D_t *sys);
extern int testParisiRapuano(sysEA3D_t *sys);
extern int benchmark(int argc, char **argv);
extern int benchmarkSliced(int argc, char **argv);
extern int benchmarkMGPU(int argc, char **argv);
extern int benchmarkSmooth(int argc, char **argv);

__global__ void MCStepReds_Bitwise(__restrict MSC *s0, __restrict MSC *s1,
				   __restrict MSC *s2, __restrict MSC *s3, 
				   __restrict RNGT *rand);

__global__ void MCStepBlues_Bitwise(__restrict MSC *s0, __restrict MSC *s1, 
				    __restrict MSC *s2, __restrict MSC *s3, 
				    __restrict RNGT *rand);

__global__ void MCStepReds_Sliced(__restrict MSC *s0, __restrict MSC *s1,
				  __restrict MSC *s2, __restrict MSC *s3, 
				  __restrict RNGT *rand);

__global__ void MCStepBlues_Sliced(__restrict MSC *s0, __restrict MSC *s1, 
				   __restrict MSC *s2, __restrict MSC *s3, 
				   __restrict RNGT *rand);

__global__ void MCStepReds_SlicedGrid(__restrict MSC *s0, __restrict MSC *s1, 
				      __restrict MSC *s2, __restrict MSC *s3, 
				      __restrict RNGT *rand);

__global__ void MCStepBlues_SlicedGrid(__restrict MSC *s0, __restrict MSC *s1, 
				       __restrict MSC *s2, __restrict MSC *s3, 
				       __restrict RNGT *rand);

__global__ void MCStepReds_Standard(__restrict MSC *s0, __restrict MSC *s1,
				    __restrict MSC *s2, __restrict MSC *s3, 
				    __restrict RNGT *rand);

__global__ void MCStepBlues_Standard(__restrict MSC *s0, __restrict MSC *s1, 
				     __restrict MSC *s2, __restrict MSC *s3, 
				     __restrict RNGT *rand);

__global__ void MCStepReds_StandardGrid(__restrict MSC *s0, __restrict MSC *s1, 
					__restrict MSC *s2, __restrict MSC *s3, 
					__restrict RNGT *rand);

__global__ void MCStepBlues_StandardGrid(__restrict MSC *s0, __restrict MSC *s1, 
					 __restrict MSC *s2, __restrict MSC *s3, 
					 __restrict RNGT *rand);

__global__ void testParisiRapuanoKernel(__restrict RNGT *ira, int steps,
					int ip, int ip1, int ip2, int ip3);

__global__ void testParisiRapuanoKernelMod(__restrict RNGT *ira, int steps,
					   int ip, int ip1, int ip2, int ip3);

__global__ void testMinStdKernel(__restrict RNGT *rng, int steps);

#endif
