/*
	DoSample.c
		the actual sampling routine, serial and parallel,
		for the C versions of the Cuba routines
		by Thomas Hahn
		last modified 23 Sep 11 th
*/

#define MINSLICE 10
#define MINCORES 1
//#define MINCORES 2

int cubacores_;
static int *child_;

#if defined(VEGAS) || defined(SUAVE)
#define VEG_ONLY(...) __VA_ARGS__
#else
#define VEG_ONLY(...)
#endif

#ifdef DIVONNE
#define DIV_ONLY(...) __VA_ARGS__
#define LDX(ldx) ldx
#else
#define DIV_ONLY(...)
#define LDX(ldx) t->ndim
#endif

typedef struct {
  real *f;
  number n;
  VEG_ONLY(count iter;)
  DIV_ONLY(count ldx;)
} Slice;

/*********************************************************************/

static inline int readsock(int fd, void *data, size_t n) {
  ssize_t got;
  size_t remain = n;
  do got = read(fd, data, remain);
  while( got > 0 && (data += got, remain -= got) > 0 );
  return got;
}

static inline int writesock(int fd, const void *data, size_t n) {
  ssize_t got;
  size_t remain = n;
  do got = write(fd, data, remain);
  while( got > 0 && (data += got, remain -= got) > 0 );
  return got;
}

/*********************************************************************/

static inline int SampleSerial(cThis *t, number n, creal *x, real *f
  VEG_ONLY(, creal *w, ccount iter)
  DIV_ONLY(, ccount ldx))
{
  while( n-- ) {
    if( t->integrand(&t->ndim, x, &t->ncomp, f, t->userdata
          VEG_ONLY(, w++, &iter)
          DIV_ONLY(, &t->phase)) == ABORT ) return 1;
    x += LDX(ldx);
    f += t->ncomp;
  }
  return 0;
}

/*********************************************************************/

static inline void DoSample(This *t, number n, creal *x, real *f
  VEG_ONLY(, creal *w, ccount iter)
  DIV_ONLY(, ccount ldx))
{
  char s[128];
  Slice slice;

  t->neval += n;

  if( cubacores_ < MINCORES ||
      (slice.n = (n + cubacores_ - 1)/cubacores_) < MINSLICE ) {
    if( VERBOSE > 2 ) {
      sprintf(s, "sampling " NUMBER " points serially", n);
      Print(s);
    }

    if( SampleSerial(t, n, x, f
          VEG_ONLY(, w, iter)
          DIV_ONLY(, ldx)) ) longjmp(t->abort, -99);
  }
  else {
    int core, abort;

    if( VERBOSE > 2 ) {
      sprintf(s, "sampling " NUMBER " points each on %d cores",
        slice.n, cubacores_);
      Print(s);
    }

    slice.f = f;
    VEG_ONLY(slice.iter = iter;)
    DIV_ONLY(slice.ldx = ldx;)

    for( core = 0; core < cubacores_; ++core ) {
      writesock(child_[core], &slice, sizeof slice);
      VEG_ONLY(writesock(child_[core], w, slice.n*sizeof *w);)
      writesock(child_[core], x, slice.n*LDX(ldx)*sizeof *x);

      VEG_ONLY(w += n;)
      x += slice.n*LDX(ldx);
      slice.f += slice.n*t->ncomp;
      n -= slice.n;
      slice.n = IMin(slice.n, n);
    }

    abort = 0;
    for( core = cubacores_; --core >= 0; ) {
      readsock(child_[core], &slice, sizeof slice);
      if( slice.n == 0 ) abort = 1;
      else readsock(child_[core], slice.f, slice.n*t->ncomp*sizeof *f);
    }
    if( abort ) longjmp(t->abort, -99);
  }
}

/*********************************************************************/

static inline void DoChild(cThis *t, cint fd)
{
  Slice slice;

  while( readsock(fd, &slice, sizeof slice) ) {
    number n = slice.n;
    VEG_ONLY(real w[n];)
    real x[n*LDX(slice.ldx)];
    real f[n*t->ncomp];

    VEG_ONLY(readsock(fd, w, sizeof w);)
    readsock(fd, x, sizeof x);

    if( SampleSerial(t, n, x, f
          VEG_ONLY(, w, slice.iter)
          DIV_ONLY(, slice.ldx)) ) slice.n = 0;
    writesock(fd, &slice, sizeof slice);
    if( slice.n ) writesock(fd, f, sizeof f);
  }

  exit(0);
}

/*********************************************************************/

static inline void ForkCores(cThis *t)
{
  int core;

  if( cubacores_ == 0 ) {
    cchar *env = getenv("CUBACORES");
    cubacores_ = -1;
    if( env ) cubacores_ = atoi(env);
  }

  if( cubacores_ < MINCORES ) return;
  if( VERBOSE ) printf("using %d cores\n", cubacores_);

  MemAlloc(child_, cubacores_);
  for( core = 0; core < cubacores_; ++core ) {
    int fd[2];
    pid_t pid;
    assert(
      socketpair(AF_LOCAL, SOCK_STREAM, 0, fd) != -1 &&
      (pid = fork()) != -1 );
    if( pid == 0 ) {
      close(fd[0]);
      DoChild(t, fd[1]);
    }
    close(fd[1]);
    child_[core] = fd[0];
  }
}

/*********************************************************************/

static inline void WaitCores()
{
  if( cubacores_ >= MINCORES ) {
    int core;
    pid_t pid;
    for( core = 0; core < cubacores_; ++core )
      close(child_[core]);
    free(child_);
    for( core = 0; core < cubacores_; ++core )
      wait(&pid);
  }
}

