/********************************************************************************************************
 * QRNA - Comparative analysis of biological sequences 
 *         with pair hidden Markov models, pair stochastic context-free
 *        grammars, and probabilistic evolutionary  models.
 *       
 * Version 2.0.0 (JUN 2003)
 *
 * Copyright (C) 2000-2003 Howard Hughes Medical Institute/Washington University School of Medicine
 * All Rights Reserved
 * 
 *     This source code is distributed under the terms of the
 *     GNU General Public License. See the files COPYING and LICENSE
 *     for details.
 ***********************************************************************************************************/

/* nullmodel.c
 *
 * ER,  Fri Jun  4 12:11:35 CDT 1999 [St. Louis]
 * 
 * Allocation, free'ing, initialization of the nullmodel
 * 
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>

#include "funcs.h"
#include "globals.h"
#include "squid.h"
#include "structs.h"

#ifdef MEMDEBUG
#include "dbmalloc.h"
#endif

/* Function: AllocNullModel()
 * Date:     ER,  Fri Jun  4 11:59:28 CDT 1999 [St. Louis]
 *
 * Purpose:  Allocates memory for the transition and emission probs of the nullmodel
 *
 * Args:     nullmodel - nullmodel structure
 *
 * Returns:  void
 *           allocates null->xem[], null->yem[], which must be free'd by caller.
 */
struct nullmodel_s *
AllocNullModel(void)
{
  struct nullmodel_s *null;    /* nullmodel structure         */

  null      = (struct nullmodel_s *) MallocOrDie (sizeof(struct nullmodel_s));
  null->xem = (double *) MallocOrDie (sizeof(double) * 4);
  null->yem = (double *) MallocOrDie (sizeof(double) * 4);
  
  PatternNullModel(null);
  
  return null;
}

/* Function: CheckNullProbs()
 * Date:     ER, Fri Jun  4 12:07:20 CDT 1999  [St. Louis]
 *
 * Purpose:  Verify that transition and emission prob's of a nullmodel add up to one
 *
 * Args:     other - the structure for an othermodel
 *
 * Returns:  void. 
 */
void
CheckNullProbs(struct nullmodel_s *null)
{
  /* check emission prob's add up to one
   */
  CheckSingleProb(null->xem, 4);
  CheckSingleProb(null->yem, 4);
}

/* Function: ConstructNullModel()
 * Date:     ER, Fri Jun  4 11:29:08 CDT 1999 [St. Louis]
 *
 * Purpose:  Constructs a nullmodel_s
 *
 *           eta = eta_infty + (eta_zero-eta_infty) * exp [- t * log [ (eta-eta_infty)/(eta_zero-eta_infty) ]
 *
 *
 * Args:     otherparam - the list of parameters that define a othermodel           
 *
 * Returns:  (void)
 *           fills all prob's for nullmodel, log2 form 
 *           (allc'ed here, freed by caller)
 */
void
ConstructNullModel(double **pammodel_star, struct nullmodel_s *null, struct nullparam_s param, struct nullparam_s param_zero, struct nullparam_s param_infty, 
		   double tfactor, double *targetfreq, int changefreq, int verbose)
{
  double *mutpxy_null;
  double *mut5pxy_null;
  double  eta_infty;
  double  eta_zero;
  double  eta;

  eta       = param.eta;
  eta_infty = param_infty.eta;
  eta_zero  = param_zero.eta;

  if      (eta_infty <= eta && eta <= eta_zero);
  else if (eta_infty >= eta && eta >= eta_zero);
  else Die ("check your parameters for the Null model. Time zero (%f), star (%f) and infty (%f) \n", eta_zero, eta, eta_infty);
  /*
   * Also check that if eta_zero and eta are equal, then eta_infty is also the same
   */
  if ( eta_zero  - eta < 1-accuracy1 && eta_zero  - eta > -(1-accuracy1) &&
       (eta_infty - eta > 1-accuracy1 || eta_infty - eta < -(1-accuracy1))  )
    Die ("check your parameters for the Null model. Time zero (%f), star (%f) and infty (%f) \n", eta_zero, eta, eta_infty);
  
  
  if (eta_zero - eta_infty > MARGIN || eta_zero - eta_infty < -MARGIN) 
    null->eta  =  eta_infty + (eta_zero-eta_infty) *  EXP2(tfactor*(LOG2((eta-eta_infty)/(eta_zero-eta_infty))));
  else 
    null->eta = eta;
  
  null->meta = 1. - null->eta;
  
  if (null->eta < 0.0 || null->eta > 1.0 )
    Die ("Wrong trnasition probabilities  for the Null model (eta = %f)", null->eta);
  
  /* Calculate emission prob's: null.xem[], null.yem[]
   */
  ConstructTiedProbs(pammodel_star, tfactor, &mutpxy_null, &mut5pxy_null, targetfreq, changefreq, FALSE, FALSE);
  ConstructXemNullProb(mutpxy_null, null->xem);
  ConstructYemNullProb(mutpxy_null, null->yem);
  
  if (verbose) {
    PrintMutProbs (mutpxy_null,  null->xem);
    PrintMut5Probs(mut5pxy_null, null->xem);
  }
 
  /* check prob's add up to one
   */
  CheckNullProbs(null);

  /* convert to LOG2 form
   */
  NullToLog2(null);

  /* free memory */
  free(mutpxy_null);
  free(mut5pxy_null);

}

/* Function: ConstructXemNullProb()
 * Date:     ER, Fri Jun  4 11:54:37 CDT 1999 [St. Louis]
 *
 * Purpose:  Given a nullpxy[4][4] nullmodel joint probability distrubution, 
 *           marginalize to calculate null->xem[4].
 *
 * Args:    nullpxy - 4x4 A..UxA..U joint prob matrix (prealloc)
 *          xem     - 4 seqX null model emission prob (prealloc)
 *
 * Returns:  (void)
 *           Fills in null->xem (already allocated)
 */
void
ConstructXemNullProb(double *nullpxy, double *xem)
{
  int x;
  int y;

  /* Zero null model
   */
  for (x = 0; x < 4; x++)
    xem[x] = 0.0;

  /* Marginalize and average over Y positions
   */
  for (x = 0; x < 4; x++)
    for (y = 0; y < 4; y++)
      xem[x] += nullpxy[idx(x,y)];

  CheckSingleProb(xem, 4);
}

/* Function: ConstructYemNullProb()
 * Date:     ER, Fri Jun  4 11:54:37 CDT 1999 [St. Louis]
 *
 * Purpose:  Given a nullpxy[4][4] nullmodel joint probability distrubution, 
 *           marginalize to calculate null->yem[4].
 *
 * Args:    nullpxy - 4x4 A..UxA..U joint prob matrix (prealloc)
 *          yem     - 4 seqX null model emission prob (prealloc)
 *
 * Returns:  (void)
 *           Fills in null->yem (already allocated)
 */
void
ConstructYemNullProb(double *nullpxy, double *yem)
{
  int x;
  int y;

  /* Zero null model
   */
  for (x = 0; x < 4; x++)
    yem[x] = 0.0;

  /* Marginalize and average over Y positions
   */
  for (x = 0; x < 4; x++)
    for (y = 0; y < 4; y++)
      yem[x] += nullpxy[idx(y,x)];

  CheckSingleProb(yem, 4);
}

void
FreeNullModel(struct nullmodel_s *null)
{
  free(null->xem);
  free(null->yem);
  free(null);
}

double
NullAddOnePair(int cur_x, int cur_y, struct nullmodel_s *null)
{
  double sc;

  sc = 0.;

  if (cur_x < 4) sc += null->xem[cur_x] + null->meta;
  if (cur_y < 4) sc += null->yem[cur_y] + null->meta;
  
  return sc;
}

double
NullAddOneX(int cur_x, struct nullmodel_s *null)
{
  double sc;

  if (cur_x < 4) sc = null->xem[cur_x] + null->meta;
  
  return sc;
}

double
NullAddOneY(int cur_y, struct nullmodel_s *null)
{
  double sc;

  if (cur_y < 4) sc = null->yem[cur_y] + null->meta;
  
  return sc;
}

/* Function: OtherToLog2()
 * Date:     ER, Fri Jun  4 12:16:17 CDT 1999 [St. Louis]
 *
 * Purpose:  Converts transition and emission prob's of a nullmodel to log2 form
 *
 * Args:     null - the structure for an nullmodel
 *
 * Returns:  void. 
 */
void
NullToLog2(struct nullmodel_s *null)
{
  int sym;   /* symbols for emission prob's      +*/

  /* transition prob's 
   */
  null->eta  = LOG2(null->eta);
  null->meta = LOG2(null->meta);

  /* emission prob's 
   */
  for (sym = 0; sym < 4; sym++) {
      null->xem[sym] = LOG2(null->xem[sym]);
      null->yem[sym] = LOG2(null->yem[sym]);
  }
}

void
NullLog2ToOdds(struct nullmodel_s *null1, struct nullmodel_s *null2)
{
  int sym;   /* symbols for emission prob's      +*/

  /* transition prob's 
   *       (don't touch null->eta)
   */
  null1->meta -= null2->meta;
  
  /* emission prob's 
   */
  for (sym = 0; sym < 4; sym++) {
    null1->xem[sym] -= null2->xem[sym];
    null1->yem[sym] -= null2->yem[sym];
  }
}

void    
PatternNullModel(struct nullmodel_s *null) 
{
  int sym;

  /* Initialize all prob's to zero
   */
  for (sym = 0; sym < 4; sym++) {
    null->xem[sym] = 0.0;
    null->yem[sym] = 0.0;
  }
}

/* Function: PrintNullModel()
 * Date:     ER, Fri Jun  4 13:15:31 CDT 1999 [St. Louis]
 *
 * Purpose:  Print a null model
 *
 * Args:     nullmodel -- the nullmodel prob's, in log2 form
 *
 * Returns:  void. prints transition and emission probs for null model, in [0,1] form.
 */
void
PrintNullModel(struct nullmodel_s *null)
{
  int sym;   /* symbols for emission prob's      +*/

  printf("\nNull MODEL -- Transition probabilities\n");
  printf("eta   = %f\n", EXP2(null->eta)); 
  printf("1-eta = %f\n", EXP2(null->meta)); 
  
  printf("\nNull MODEL -- Emission probabilities\n");
  for (sym = 0; sym < 4; sym++) 
    printf("P^X(%d) \t\t= %f\n", sym, EXP2(null->xem[sym]));
  for (sym = 0; sym < 4; sym++) 
    printf("P^Y(%d) \t\t= %f\n", sym, EXP2(null->yem[sym]));
}


/* Function: ScoreWithNull()
 * Date:     ER, Fri Jun 11 10:27:34 CDT 1999 [St. Louis]
 *
 * Purpose:  Score a gapped sequence alignment with null model.
 *           gap = (-) = 4 are ignored.
 *
 * Args:     seqX, seqY    -- equal length sequences, ACGT /only/
 *           start         -- starting position in s1,s2 
 *           L             -- lengths of s1,s2 from start
 *           nullmodel     -- [4][4] substitution matrix
 *
 * Returns:  log likelihood, log P(seq1,seq2 | null)
 */
double
ScoreWithNull(int *seqX, int *seqY, int startX, int Lx, int startY, int Ly, struct nullmodel_s *null)
{
  int    x, y;        /* position in seqX seqY */
  double sc;

  sc = 2. * null->eta;

  for (x = 0; x < Lx; x++) if (seqX[x+startX] < 4) sc += null->xem[seqX[x+startX]] + null->meta;
  for (y = 0; y < Ly; y++) if (seqY[y+startY] < 4) sc += null->yem[seqY[y+startY]] + null->meta;

  return sc;
}
double
ScoreWithNullDiag(int *seqX, int *seqY, int start, int L, struct nullmodel_s *null)
{
  int    i, iabs;
  double sc;

  sc = 2. * null->eta;

  for (i = 0; i < L; i++) {
    iabs = i + start;

    if (seqX[iabs] < 4) sc += null->meta + null->xem[seqX[iabs]];
    if (seqY[iabs] < 4) sc += null->yem[seqY[iabs]] + null->meta;
 }

  return sc;
}

void
ScoreWithNullFJScan(int *seqX, int *seqY, int win, int j, int jmod, int l, struct nullmodel_s *null, double **fjmx)
{
  int jmodmin;
  int cur_x, cur_y;       /* nucleotides at psotion j     */

  jmodmin = (jmod-1 < 0)? jmod-1+win : jmod-1;
  
  cur_x = seqX[j];
  cur_y = seqY[j];
 
  if (l == 0 || l == 1)
    fjmx[jmod][l] = ScoreWithNullDiag(seqX, seqY, j, l, null);
  else
    fjmx[jmod][l] = fjmx[jmodmin][l-1] + NullAddOnePair(cur_x, cur_y, null);
 
}

double
ScoreWithNullX(int *seqX, int startX, int Lx, struct nullmodel_s *null)
{
  int    x;        /* position in seq */
  double sc;

  sc = null->eta;

  for (x = 0; x < Lx; x++) 
    if (seqX[x+startX] <  4) sc += null->xem[seqX[x+startX]] + null->meta;

  return sc;
}

double
ScoreWithNullY(int *seqY, int startY, int Ly, struct nullmodel_s *null)
{
  int    y;        /* position in seq */
  double sc;

  sc = null->eta;

  for (y = 0; y < Ly; y++) 
    if (seqY[y+startY] <  4) sc += null->yem[seqY[y+startY]] + null->meta;

  return sc;
}


/* Function: ScoreWithNullRev()
 * Date:     ER,Thu Jul 8 11:44:11 CDT 1999 [St. Louis]
 *
 * Purpose:  Score the rev of a gapped sequence alignment with null model.
 *           gap = (-) = 4 are ignored.
 *           trick: you do not need to reverse the sequence.
 *                      0(A) --> 3 - 0 = 3 (U)
 *                      1(C) --> 3 - 1 = 2 (G)
 *                      2(G) --> 3 - 2 = 1 (C)
 *                      3(U) --> 3 - 3 = 0 (A)
 *
 * Args:     seqX, seqY    -- equal length sequences, ACGT /only/
 *           L             -- lengths of s1,s2 
 *           nullmodel     -- [4][4] substitution matrix
 *
 * Returns:  log likelihood, log P(seq1,seq2 | null)
 */
double
ScoreWithNullRev(int *seqX, int *seqY, int startX, int Lx, int startY, int Ly, struct nullmodel_s *null)
{
  int    x, y;        /* position in seqX seqY */
  double sc;

  sc = 2. * null->eta;

  for (x = 0; x < Lx; x++) if (seqX[x+startX] < 4) sc += null->xem[3-seqX[x+startX]] + null->meta;
  for (y = 0; y < Ly; y++) if (seqY[y+startY] < 4) sc += null->yem[3-seqY[y+startY]] + null->meta;

  return sc;
}
double
ScoreWithNullRevDiag(int *seqX, int *seqY, int start, int L, struct nullmodel_s *null)
{
  int    x, xabs;        /* position in seq1,seq2 */
  double sc;

  sc = 2. * null->eta;

  for (x = 0; x < L; x++) {
    xabs = x + start;

    if (seqX[xabs] < 4) sc += null->xem[3-seqX[xabs]] + null->meta;
    if (seqY[xabs] < 4) sc += null->yem[3-seqY[xabs]] + null->meta;
  }

  return sc;
}


/* Function:  SimulateNullAlign()
 * Date:     ER, Tue Feb  1 11:22:15 CST 2000 [St. Louis]
 *
 * Purpose:  Simulate an aligment of two sequences
 *           related by the nullmodel.
 *
 * Args:     s1, s2    - the sequences 
 *           L1        - length of s1
 *           L2        - length of s2
 *           nullmodel - the null model
 *
 * Returns:  s2, an alloc'ed simulated second sequence
 */
void
SimulateNullAlign(int *s1, int *s2, int L1, int L2, struct nullmodel_s *null)
{
  int    pos;
  int    x, y;

  for (pos = 0; pos < L1; pos++) {
    x = DLog2Choose(null->xem, 4);

    s1[pos] = x;
  }

  for (pos = 0; pos < L2; pos++) {
    y = DLog2Choose(null->yem, 4);

    s2[pos] = y;
  }

}


/* Function: SimulateNullSequences()
 * Date:     SRE, Thu Jun 11 09:09:49 1998 [St. Louis]
 *
 * Purpose:  Given a sequence, simulate a second sequence
 *           related to it by the nullmodel.
 *
 * Args:     s1        - the starting sequence 
 *           L         - length of s1
 *           nullmodel - the null model
 *
 * Returns:  s2, an alloc'ed simulated second sequence
 */
char *
SimulateNullSequence(char *s1, int L, double nullmodel[4][4])
{
  double pxy[4][4];		/* P(x | y) conditionals */
  char *s2;
  int   x,y;
  int   pos;

  s2 = MallocOrDie(sizeof(char) * (L+1));
  
  /* Calculate conditionals (symmetric)
   */
  for (x = 0; x < 4; x++)
    {
      for (y = 0; y < 4; y++)
	pxy[x][y] = nullmodel[x][y] / 0.25;
      DNorm(pxy[x], 4);
    }
  
  for (pos = 0; pos < L; pos++)
    {
      x = DNAIDX(s1[pos]);
      y = DChoose(pxy[x], 4);
      s2[pos] = DNAAlphabet[y];
    }
  s2[L] = '\0';
  return s2;
}



