// **********************************
// from basic code to apps: keep this file unchanged
// **********************************

// RuleMaker.m                                        

#import "RuleMaker.h"
#import <random.h>
#import <math.h>
#import <stdlib.h> // to use calloc()

@implementation RuleMaker

- createEnd
{
   [super createEnd];

  // initial pointer to internal vector p, used to randomize
  // the learning sequence

   p=NULL;  // if p==NULL the vector has not been yet allocated, see below

   return self;                        
}

- setMatrixMult: mM
{
  matrixMult=mM;
  return self;
}

- setVectorTransFunc: vts
{
  vectorTransFunc=vts;
  return self;
}

- adaptRulesToInputOutputValuesInDataWarehouse: agentDw;

  // we are here working with a neural network y=f(B f(Ax))
  // where x and y are vectors, A and B are matrixes and f
  // is a tranformation function, here the
  // logistic one r(s)=(1+e^(-k*s))^-1

  // when introducing a constant value (the bias) as input and hidden node
  // (neuron)
  //            y=f(B f(Ax)) becomes
  //            y=f(B (1, f(A (1,x')')' )' )

{
   int i, I, j, k, iii, n, temp, row, col, colOut, initRow, epochNumber;
   float sum, delta, usingOutputsAsTargetsInCT_Relearning;

  agentDataWarehouse=agentDw;

  // getting parameters and addresses of the matrixes

  dataTrainingMatrix   = [agentDataWarehouse getDataTrainingMatrix];
  outputTrainingMatrix = [agentDataWarehouse getOutputTrainingMatrix];
  targetTrainingMatrix = [agentDataWarehouse getTargetTrainingMatrix];
  minmax=[agentDataWarehouse getMinmax];
  inputNodeNumber  = [agentDataWarehouse getInputNodeNumber];
  inputNodeNumber1 = inputNodeNumber+1;
  hiddenNodeNumber = [agentDataWarehouse getHiddenNodeNumber];
  hiddenNodeNumber1= hiddenNodeNumber+1;
  outputNodeNumber = [agentDataWarehouse getOutputNodeNumber];
  patternNumberInTrainingSet = [agentDataWarehouse
                                         getPatternNumberInTrainingSet];
  epochNumberInEachTrainingCycle  = [agentDataWarehouse
                                         getEpochNumberInEachTrainingCycle];

  wih = [agentDataWarehouse getWih];
  dwih= [agentDataWarehouse getDwih];
  who = [agentDataWarehouse getWho];
  dwho= [agentDataWarehouse getDwho];
  inputLayer  = [agentDataWarehouse getInputLayer];
  hiddenLayer = [agentDataWarehouse getHiddenLayer];
  hiddenLayerTransFuncDerivatives = [agentDataWarehouse
                                     getHiddenLayerTransFuncDerivatives];
  outputLayer = [agentDataWarehouse getOutputLayer];
  outputLayerTransFuncDerivatives = [agentDataWarehouse
                                     getOutputLayerTransFuncDerivatives];
  deltaOut = [agentDataWarehouse getDeltaOut];

  eps=[agentDataWarehouse getEps];
  alpha=[agentDataWarehouse getAlpha];
  usingRandomOrderInLearning=[agentDataWarehouse getRandomOrderInLearning];
  longTermLearningInCT_OnlyWithCompleteTrainingSet=[agentDataWarehouse
                         getLongTermLearningInCT_OnlyWithCompleteTrainingSet];
  useOutputsAsTargetsInCT_RelearningScheme=[agentDataWarehouse
                         getUseOutputsAsTargetsInCT_RelearningScheme];

  // dealing with technical consistency 
  if(patternNumberInTrainingSet>0)                 // we have targets ...
       useOutputsAsTargetsInCT_RelearningScheme=0; // so we use them
  if(patternNumberInTrainingSet < 0)      // we have here a CT case; anyway
       patternNumberInTrainingSet *= -1;  // in RuleMaster the value of
                                          // patternNumberInTrainingSet
                                          // must be >0

  k=inputNodeNumber+outputNodeNumber; // k is introduced to simplify the code


// when, in CT scheme, relearning is applied to previous Outputs treated
// as Targets, the learning cycle is repeated twice
for (usingOutputsAsTargetsInCT_Relearning=0;
     usingOutputsAsTargetsInCT_Relearning <= 
                   useOutputsAsTargetsInCT_RelearningScheme;
     usingOutputsAsTargetsInCT_Relearning++)
{       

// to build a vector of patternsa and to randomize them
   if(NULL != p)free(p); // if p has never been used, the address stored in it
                         // (as set in createEnd) NULL
                         // we are deallocating and reallocating the p vector
                         // each time RuleMaker is invoked, otherwise we
                         // would have the problem of allocating a p vector
                         // with a static number of elements (may be,
                         // in future development of the code, this number
                         // could change from agent to agent ...)
   p= (int *) calloc(patternNumberInTrainingSet,sizeof(int));
   for (i=0;i<patternNumberInTrainingSet;i++)p[i]=i;

   // targets in each pattern
   for (i=0;i<patternNumberInTrainingSet;i++)
   for (j=0;j<outputNodeNumber;j++)
   [targetTrainingMatrix R: i C: j setFrom:
                        [dataTrainingMatrix R: i C: inputNodeNumber+j]];

   initRow= [dataTrainingMatrix getFirstRow];
   // this is generally row 0, except in CT, where it can be > 0

   epochNumber=epochNumberInEachTrainingCycle;
   // CT long term relearning options
   if (longTermLearningInCT_OnlyWithCompleteTrainingSet == 1 &&
       initRow != 0) // if (T && T) we need to have a complete training set
                     // to apply relearning and we haven't it
      {initRow=patternNumberInTrainingSet-1;
                     // we are filling this matrix from bottom to top
                     // and this is the number of the last line, that is
                     // the last filled
       epochNumber=1;// excluding relearning
      }

   // first step in CT long term learning with Outputs as Targets
   if (usingOutputsAsTargetsInCT_Relearning == 0 &&
       useOutputsAsTargetsInCT_RelearningScheme == 1)
       {initRow=patternNumberInTrainingSet-1;
       epochNumber=1;}
         
// to start epochNumberInEachTrainingCycle (copied in epochNumber)
// training steps
   for (n=0; n<epochNumber;n++)
{

// to randomize pattern vector p
   for (i=patternNumberInTrainingSet; i>initRow+1; i--)
   {
   // iii randomly defined in the range 0 (or initRow) to i-1
   iii=[uniformIntRand getIntegerWithMin: initRow withMax: i-1];
   temp=p[iii];
   p[iii]=p[i-1];
   p[i-1]=temp;    // vector p is so filled in random order, reordering
   }               // its contents from the last to the first


   for (I=initRow;I<patternNumberInTrainingSet;I++)
 {
   if(usingRandomOrderInLearning==1) i=p[I];
   else                              i=I;


   // building inputLayer, i.e. the (1,x')' vector in
   // y=f(B (1, f(A (1,x')')' )' )
   [inputLayer P: 0 setFrom: 1.]; // the bias, i.e. an artificial input
                                  // always set to 1, whose multiplying
                                  // parameters in A will operate as constant
                                  // values within the summations tranformed
                                  // by f(.)
   for (j=0;j<inputNodeNumber;j++)
       [inputLayer P: j+1 setFrom: [dataTrainingMatrix R: i C: j]];

   // to build hiddenLayer, i.e. to do the A (1,x')' operation
   // wih (input to hidden) is the A matrix
   [matrixMult m1: wih m2: inputLayer  to1: hiddenLayer];

   // to calculate derivatives, applying
   // d/dsi f(A (1,x')') where si is an element of the s=A (1,x')' vector
   [vectorTransFunc from1: hiddenLayer to1Derivative:
   //                                  ^^
   //                               to1 indicates a +1 displacement
   //                               in the resulting vector
        hiddenLayerTransFuncDerivatives];
   // these derivatives will be useful in the backpropagation calculation

   // to apply the transformation function, i.e. to calculate
   // f(A (1,x')')
   [vectorTransFunc from1: hiddenLayer to1: hiddenLayer];
   // nb: do not shift this line before the previous one; this transformation
   // changes the values of the hiddenLayer vector

   // hidden bias
   [hiddenLayer P: 0 setFrom: 1.];// the bias, i.e. an artificial hidden node
                                  // always set to 1, whose multiplying
                                  // parameters in B will operate as constant
                                  // values within the summations tranformed
                                  // by f(.)

   // to build the output layer, i.e. to do the B (1, f(.)' )' operation
   // who (hidden to output) is the B matrix
   [matrixMult m1: who m2: hiddenLayer to: outputLayer];

   // to calculate derivatives, applying
   // d/dzi f(B (1, f(.)' )' ) where zi is an element of
   // the z=B (1, f(.)' )' vector
   [vectorTransFunc from: outputLayer toDerivative:
                                          outputLayerTransFuncDerivatives];
   // these derivatives will be useful in the backpropagation calculation

   // to apply the transformation function, i.e. to calculate
   // f(B (1, f(.)' )' )
   [vectorTransFunc from: outputLayer to: outputLayer];
   // nb: do not shift this line before the previous one; this transformation
   // changes the values of the hiddenLayer vector

   // outputs in each pattern
   for (j=0;j<outputNodeNumber;j++)
   [outputTrainingMatrix R: i C: j setFrom: [outputLayer P: j]];


  // Error: we use here 1/2 to simplify derivatives of a widely applied
  // measure of quadratic error;
  // as a consequence, the usual learning rate eps=0.6 is actually
  // equivalent to a 0.3 correction (due to the error definition)

  // E = 1/2 SUM (t-o)^2
  // where SUM is the summation operator, operating over j, the output index
  //       t (target) is the actual value to be approximated
  //       o is an element of y=f(B (1, f(A (1,x')')' )' ),
  //        the vectors containing the outputs of the neural network function

  // we are now calcutating the derivatives of E with respect to each
  // parameter of the function (i.e. each element of A and B)
  // dE/dw where w is an element of who (B) or wih (A)

  // dE/dwho = -(t-o) Df(B (1, f(.)' )' ) H
  // where H is a hidden node value, i.e. an element in the
  // f(A (1,x')') vector

  // we define deltaOut=(t-o) Df(B (1, f(.)' )' )

  // deltaOut determination
  for (j=0;j<outputNodeNumber;j++)
  [deltaOut P: j setFrom: ([targetTrainingMatrix R: i C: j]-
                         [outputLayer P: j])*
                         [outputLayerTransFuncDerivatives P: j] ];

  // dwho (correction of who on the basis of -dE/dwho, with eps and the
  // momentum alpha)
  for (row=0;row<outputNodeNumber; row++)
  for (col=0;col<hiddenNodeNumber1;col++)
  [dwho R: row C: col setFrom: eps*[deltaOut P: row]* [hiddenLayer P: col]
                        + alpha*[dwho R: row C: col]];

  // dE/dwih = SUM[-(t-o) Df(B (1, f(.)' )' )] Df(A (1,x')') wih I
  // where I is an input value, i.e. an element in the (1,x')' vector

  // dwih (correction of wih on the basis of -dE/dwih, with eps and the
  // momentum alpha)
  for (row=0;row<hiddenNodeNumber; row++)
  {
        sum=0;
        for (colOut=0;colOut<outputNodeNumber;colOut++)
        sum+=[deltaOut P: colOut]*[who R: colOut C: row+1];

        delta=sum * [hiddenLayerTransFuncDerivatives P: row+1];

  for (col=0;col<inputNodeNumber1;col++)
  [dwih R: row C: col setFrom: eps*delta*[inputLayer P: col]
                        + alpha*[dwih R: row C: col]];
  }

  // who and wih correction
  // 'setFromP' operates as '+='
  for (row=0;row<outputNodeNumber; row++)
  for (col=0;col<hiddenNodeNumber1;col++)
  [who R: row C: col setFromP: [dwho R: row C: col]];

  for (row=0;row<hiddenNodeNumber; row++)
  for (col=0;col<inputNodeNumber1; col++)
  [wih R: row C: col setFromP: [dwih R: row C: col]];

 }


  if (n==0) // first epoch
  {
  // errors at the end of the first epoch ...
  // this evaluation is made here to obtain the maximum correspondence to
  // that made on the verification set, when training and verification set
  // are coincident

  // an absolute correspondence is not achievable because in this routine the
  // weights are changing at each pattern examination, as an effect of
  // the learning process

  backPropagationError=0;
  proportionalError=0;

   for (i=initRow;i<patternNumberInTrainingSet;i++)
   {

   // conventional back propagation error (sum of all the output units in
   // all the patterns, i.e. epoch error, divided by the no. of patterns)
   // the error is also conventionally divided by 2

   for (j=0;j<outputNodeNumber;j++)
      backPropagationError+=pow([outputTrainingMatrix R: i C: j]-
                                [targetTrainingMatrix R: i C: j],2)/
                                (2*(patternNumberInTrainingSet-initRow));

   // initRow is normally ==0, but in CT use.we can
   //  operate with a partially empty training set, filled
   // bottom up

   // proportional error (arithmetic mean of all the output units in
   // all the patterns, i.e. epoch error)

   // this error is the ratio obtained dividing the abs value of
   // the difference of each target minus each output by one half of the
   // difference of the internal max value {i.e. [minmax R:3 C:j]} minus
   // the internal min value {i.e. [minmax R:2 C:j]} (the metrics is here
   // that of the internal values; anyway, when calculating a ratio, the
   // metrics is not relevant)

   // this is a raw measure of the error as a proportion
   // of the range (divided by 2) of validity the output values

   // we have max error=2; min error=0

   for (j=0;j<outputNodeNumber;j++){
       proportionalError+=
          ( fabs([outputTrainingMatrix R: i C: j]
               - [targetTrainingMatrix R: i C: j]) /
               (([minmax R:j+inputNodeNumber C:3] -
                 [minmax R:j+inputNodeNumber C:2])/2.)
          ) / (outputNodeNumber*(patternNumberInTrainingSet-initRow));  }
                   // the last quotient calculates the mean upon
                   // all patterns and output units


  }}

}
   // CT long term learning with Outputs as Targets, substituting 'historical'
   // Outputs to 'historical' Targets
   if (usingOutputsAsTargetsInCT_Relearning == 0 &&
       useOutputsAsTargetsInCT_RelearningScheme == 1)
          for (j=0;j<outputNodeNumber;j++)
              [dataTrainingMatrix R: patternNumberInTrainingSet-1 
	                          C: inputNodeNumber+j 
				  setFrom:
	      [outputTrainingMatrix R: patternNumberInTrainingSet-1 
	                            C: j]];
}

   return self;
}

-(float) getBackPropagationErrorInTrainingSet
{
  return backPropagationError;
}

-(float) getProportionalErrorInTrainingSet
{
  return proportionalError;
}

@end

