/* my 5th program in C under UNIX. 

TO DO
  add threshold to args
  [done] add BSC matrix computation (blackboard)




The LBG algorithm			*/

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <unistd.h>

#include "bitcount.h"

//these flags will be passed in to chose what kind of channel we have
//FIXME NOT USED YET
#define BSC 		0
#define GAUSSIAN	1

/* 
 * Simulate senging an index value over a AWGN channel and getting a 
 * analog value back
 * 
 */
double AWGNchannel ( int index, double sd )
{
   //question here, were are just adding noise to the value of
   //the index that we are sending over the channel, is that correct?
   
   return (double)index + sd * Gaussian() ;
}

double Q ( double x )
{
  if ( x >= 0 )
  {
    return 0.5 * erfc( x / sqrt(2) ) ;
  }
  else  
  {
    return 1 - Q( -1 * x ) ;
  }
   
}

/*
 * This function will calculate the probability that a bit string
 * represented by j will result from passing the bit string i over
 * a BSC with epsilon = e .
 * 
 * p(j|i) = epsilon ^ (num errors) * (1-epsilon)^(length-num errors)
 */
double bscProbability( int i, int j, int length, double e )
{
  int errorBitField = i ^ j;
  int numErrors = iterated_bitcount( errorBitField );
  return pow( e, numErrors) * pow( 1 - e , length - numErrors ); 
}

void bscCreate ( double ** array , int size , double e)
{
  int i,j;
  for ( i = 0 ; i < size ; i++ )
  {
    for ( j = 0 ; j < size ; j++ )
    {
      array[i][j] = bscProbability(i,j, (int)log2((double)size), e);
    }
  }
}

/*
 * Here we use the bsc functions since they do what we need already.
 * 
 * We are basically just replacing \epsilon with Q( 1 / \sigma_{n} )
 * 
 * sigma_n is the standard deviation of our zero mean gaussion channel noise
 *  
 */

void gaussianCreate( double ** array , int size, double sigma_n )
{
	bscCreate( array, size, Q ( 1 / sigma_n ) ) ;
}


void no_room(char * variable)
{
 printf("Ran out of memory when making room for %s.\a\n", variable);
 exit(0);
}

/* we think this gives a gaussian with mean 0, sd 1 *, correct? */
double Gaussian()
{
 // could use M_PI defined in math.h here, given to 6 more decimals.
 double x, u, radius, phase, pi = 3.14159265358979;
 
 u = drand48(); //returns between 0 and 1
 radius = sqrt(log(1.0/u));
 phase = 2.0*pi*drand48();
 x = radius*cos(phase);
 x *= sqrt(2.0);
 return(x);
}

//why not use fabs( x )??
double Abs(double x)
{
 double y;
 
 y=(x>0)?x:-x;
 return y;
}

double MaxM(double **a, int row, int col)
{
 int i, j;
 double Mx=Abs(a[0][0]);

 for (i=0; i<row; i++) {
     for (j=0; j<col; j++) {
         if (Abs(a[i][j])<=Mx) continue;
         Mx=Abs(a[i][j]);
     }
 }
 return Mx;
}

double Max(double *a, int length, int *index)
{
 int i;
 double Mx=*a;

 *index=0;
 for (i=1; i<length; i++) {
     if (a[i]<=Mx) continue;
     Mx=a[i];
     *index=i;
 }
 return Mx;
}

double Min(double *a, int length, int *index)
{
 int i;
 double Mn=a[0];
 
 *index=0;
 for (i=1; i<length; i++) {
     if (a[i]>=Mn) continue;
     Mn=a[i];
     *index=i;
 }
 return Mn;
}

int MinI(int *a, int length, int *index)
{
 int i, Mn=a[0];
 
 *index=0;
 for (i=1; i<length; i++) {
     if (a[i]>=Mn) continue;
     Mn=a[i];
     *index=i;
 }
 return Mn;
}

void check_dim_and_rate(int dim, double rate, int *N)
{
 double x;
 
 x=pow(2, dim*rate);
 *N = (int) floor(x);
 if (*N == x) return;
 printf("\aBit rate not attainable!\n");
 exit(0);
}

void zeros(double *a, int length)
{
 int i;

 for (i=0; i<length; i++)
     a[i]=0;
}

double vec_sum(double *a, int length)
{
 int i;
 double s=0;

 for (i=0; i<length; i++)
     s+=a[i];
 return s;
}

/*******************************************************
                         main

*******************************************************/
int main( int argc, char * argv[] )
{
 double **x, **y, **xx, *dist, *distance, *metric, *yDenom;
 double rate, OldDistortion, NewDistortion;
 double v, signal_power, *SDRdB, threshold;
 double rho, c, old_sample;
 double mean, mini, maxi, vari, bscError;
 int counter, experiments;
 int i, j, k, loop, t, dim, N, index, index_max=0, MaxIter = 100;
 int *m;
 char fn[] = "LGBg.out";
 FILE *fp;
 time_t start, finish;

 int c_opt = 0 ;


 //dirds testing
 //

 printf(" Q(-.1) = %f \n", Q(-0.1) );
 printf(" Q(-0.5) = %f \n", Q(-0.5) );
 
// ================ ask basic info =====================


/* dim = 1;
 t = 6000;
 rate = 2.0;			*/


 /* May use arguments from the command line.
  * If there are non, the program will run interactively and
  * ask the user for inputs.
  */
 dim  = 0;
 t    = 0;
 rate = 0;

 while ( ( c_opt = getopt( argc , argv, "d:t:r:h" ) ) != EOF )
 {
     switch ( c_opt ) 
     {
         case 'd' :
             dim = atoi( optarg );
         break ;
         case 't' :
             t = atoi( optarg );
         break ;
         case 'r' :
             rate = strtod( optarg, NULL );
         break ;
         case 'h' :
             printf ("\nzLBGg arguments\n\td\tDimension\n\tt\tNumber of "
             "training vectors\n\tr\tTarget rate\n");
             exit(0);

     } 
 }
 printf("=============== LBG with Gauss-Markov Samples ====================\n");
 
 threshold = 1.0E-3; //2.0E-4;
 experiments = 1;

 rho = 0.9;
 c = sqrt(1.0-rho*rho);


 if ( 0 >= dim )
 {
     printf("What is the VQ dimension? ");
     scanf("%d", &dim);
 }
 else
 {
     printf("VQ dimension of %d gathered from program arguments.\n",dim);
 }

 if ( 0 >= rate )
 {
     printf("Enter the target bit rate: ");
     scanf("%lf", &rate);
 }
 else
 {
     printf("Rate of %lf gathered from program arguments.\n",rate);
 }

 if ( 0 >= t )
 {
     printf("Enter the number of training vectors ");
     scanf("%d", &t);
 }
 else
 {
     printf("# training vectors of %d gathered from program arguments.\n",t);
 }

 check_dim_and_rate(dim, rate, &N);

 if ((fp=fopen(fn, "a"))==NULL) {
     printf("Cannot open data file. Program terminates...\n");
     exit(0);
 }
 else {
     fprintf(fp, "=================================================\n");
     fprintf(fp, "Quantizer:\tdim = %d\n", dim);
     fprintf(fp, "\t\trate = %.1lf bps\n", rate);
     fprintf(fp, "\t\tthreshold = %.1e\n", threshold);
     fprintf(fp, "\t\ttraining vectors = %d.\n\n", t);
 }

// ======= make room for the probArray ++++++++==========
 double ** probArray = NULL ;

 probArray=(double**)malloc( N*sizeof(double*));
 if (probArray==NULL) no_room("bsc");
 for (i=0; i<N; i++) {
     probArray[i]=(double*)malloc(N*sizeof(double));
     if (probArray[i]==NULL) no_room("bsc");
 }
 
 
/*
 double bscError = 10E-3 ;

 bscCreate( probArray, N , bscError );
 printf("\nHere is the test BSC array\n\n");
 for ( i = 0 ; i < N ; i++ )
 {
    for ( j = 0 ; j < N ; j++ )
    {
      printf( "%.6f ", probArray[i][j] );
    }
      printf("\n");
 }

 printf("\n");
 */
 

// ======= make room for the training samples ==========
 
 x=(double**)malloc(dim*sizeof(double*));
 if (x==NULL) no_room("x");
 for (i=0; i<dim; i++) {
     x[i]=(double*)malloc(t*sizeof(double));
     if (x[i]==NULL) no_room("x");
 }

// ========= make room for the codevectors =============

 y=(double**)malloc(dim*sizeof(double*));
 if (y==NULL) no_room("y");
 for (i=0; i<dim; i++) {
     y[i]=(double*)malloc(N*sizeof(double));
     if (y[i]==NULL) no_room("y");
 }
 
// ========== make room for the distortions =============

 dist=(double*)malloc(N*sizeof(double));
 if (dist==NULL) no_room("dist");

// ========== Signal-to-Distortion Ratio =============

 SDRdB=(double*)malloc(experiments*sizeof(double));
 if (SDRdB==NULL) no_room("SDRdB");
 
// ========== make room for the frequencies =============

 m=(int*)malloc(N*sizeof(int));
 if (m==NULL) no_room("m");

// =========== make room for the distances ==============

 distance=(double*)malloc(N*sizeof(double));
 if (distance==NULL) no_room("distance");

// ======== make room for cluster accumulator ===========

 xx=(double**)malloc(dim*sizeof(double*));
 if (xx==NULL) no_room("xx");
 for (i=0; i<dim; i++) {
     xx[i]=(double*)malloc(N*sizeof(double));
     if (xx[i]==NULL) no_room("xx");
 }
  
// =========== make room for the denominator ==============
 /*
  * this is to take care of the new sums we have to compute
  * for the bsc centroid condition
  */
 yDenom=(double*)malloc(dim*sizeof(double));
 if (yDenom==NULL) no_room("yDenom");
  
// =========== make room for the metrics ==============

 metric=(double*)malloc(N*sizeof(double));
 if (metric==NULL) no_room("metric");

// =============== begin computations ===================


 for (counter = 0; counter<experiments; counter++) {
 
     /* seed the random number generator */
     start = time(0);
     srand48(start%1000);

     /* Fill up training samples with Gaussians*/
     old_sample = Gaussian();
     for (j=0; j<t; j++) 
     {
         for (i=0; i<dim; i++) 
         {
             
             /* x[i][j] is i^th component in j^th training vector */
             x[i][j] = rho*old_sample + c*Gaussian();
             old_sample = x[i][j];
         }
     }

     /* 
      * fill up codevectors randomly 
      * these will be the values of the centroids 
      * ie the representation of the region
      */
     for (i=0; i<dim; i++) 
         for (j=0; j<N; j++)
             y[i][j] = 2*(drand48()-0.5); //in (-1,1)

     /* make this big to start so we have something to do */
     OldDistortion = 1E10;

     /* this is the transition error probability of the bsc */
     bscError = 10E-3 ;

     /* fill up a transition probability matrix to reference later */
     bscCreate( probArray, N , bscError );

     /* this is where the real work is done */
     for (loop=1; loop<=MaxIter; loop++) 
     {
         for (k=0; k<N; k++) 
         {
             
             /* 
              * set frequencies(m_ and cluster accumulator (xx) to 0 
              * FIXME is m the number of samples in a quant region? 
              * FIXME is xx the sum of the distortions of the region?
              */
             m[k]=0;

             for (i=0; i<dim; i++)
                 xx[i][k]=0;
         }

         zeros(dist, N);
         NewDistortion = 0;
         for (i=0; i<t; i++) //every training vector 
         {
             /*if ( 0 == i % 100)
                 printf("Training vector %d now.\n",i);
              */
             for (j=0; j<N; j++) //every region
             {
                 distance[j]=0;
                 for (k=0; k<dim; k++)
                     /* 
                      * distortion measure 
                      *
                      * y is codevectors
                      * x are samples
                      * mean square error is stored for region j
                      */
                     //distance[j]+=((x[k][i]-y[k][j])*(x[k][i]-y[k][j]));
                     distance[j]+= pow( x[k][i]-y[k][j] , 2 );
             }
             for (j=0;j<N;j++) // every region
             {
                 metric[j] = 0;
                 for (k=0;k<N;k++) //every region
                 {
                     //metric[j] += bscProbability( k, j, log2(N), bscError) *
                     metric[j] += probArray[k][j] * distance[k]; 
                 }
             }
             /* 
              * we are putting the i^th training vector into region 
              * with index "index"
              */

              /* 
               * v holds value of smallest distance
               * index holds the region index of that smallest value
               */
             
             /* 
              * this was changed for going from VQ to COVQ
              *
              * v=Min(distance, N, &index);
              */
             v=Min(metric, N, &index);

             /* increase distance for that region by v, the distance */
             dist[index]+=v;

             /* note that we have one more vector in this region */
             m[index]++;

             /* 
              * add this vector's values to the accumulator so that
              * later we can find the average value in this region
              */
             for (k=0; k<dim; k++) 
                 xx[k][index]+=x[k][i];
         }
         
         /* if any region has no vectors in it, take a vector from the region
          * with the most distortion and put it in there.
          */
         
         /* 
          * we don't this while loop here for COVQ because we now allow, 
          * for empty regions.
          *
         
         while (!MinI(m, N, &index)) 
         {
             / * find the region with the highest (MAXimum) total error * /
             v = Max(dist, N, &index_max);

             / * replace the code vector with minimum error 
              * by putting in it's place a slightly changed
              * value of the 
              * multiply by something in (0.9, 1.1)
              * /
             for (k=0; k<dim; k++) 
                 y[k][index]=y[k][index_max]*(1+0.2*(drand48()-0.5));
             m[index]++;

             if (loop != MaxIter) 
                 dist[index_max]/=4;
         }
         */

         /* average distortion per dimension per training vector */
         NewDistortion = vec_sum(dist, N)/(dim*t);         
         
         printf("Loop %d, distortion = %.10lf. ", loop, NewDistortion);
         
         if (Abs(OldDistortion-NewDistortion)<threshold*OldDistortion) break;
         
         printf("change = %.10lf.\n", OldDistortion-NewDistortion);
         
         OldDistortion = NewDistortion;

         /* set the new codevector to be the centriod of the region */
         /* centroid condition */
         for (j=0; j<N; j++)// region of codevector 
         {
           zeros( yDenom , dim );
           for(k=0;k<N;k++) // counter for looping over all other regions
           {
             for (i=0; i<dim; i++)
             {
                
                /*
                 * we used to have a while loop making sure that there
                 * were no empty regions, but if we have an empty region
                 * we had better not divide by the number of vectors in that
                 * region!

                 * note that for P(j|i) we look up probArray[i][j] to stay
                 * within convention for the array / table structure of
                 * inputs in rows and outputs in columns

                 */
                if ( m[k] != 0 )
                {
                    //FIXME Js and Ks are dicey!
                    y[i][j] += probArray[j][k]* xx[i][k];
                    yDenom[i] += probArray[j][k] * m[k] ;
                }
                else
                {
                  printf("divide by zero (empty region)!\n");
                }
             } //i
           }//k
           for ( i=0; i < dim ; i++ )
           {
              y[i][j] /= yDenom[i] ;
           }
         }//j
     } //for loop
     
     signal_power = 0;
     for (i=0; i<t; i++)
         for (j=0; j<dim; j++)
             signal_power += (x[j][i]*x[j][i]);
             
     /* average power per training vector per dimension */
     signal_power /= (dim*t);

     /* that counter runs through the number of experiments */
     SDRdB[counter] = 10.0*log10(signal_power/NewDistortion);	

// ============== display outputs and quit ==================

     finish = time(0);
/*
     printf("Codebook:\n");
     for (i=0; i<dim; i++) {
         for (j=0; j<N; j++)
             printf("y(%d, %d)=%.3lf\t", i, j, y[i][j]);
         printf("\n");
     }							*/
 
     printf("VQ dimension = %d, Rate = %.3lf bps, SDR = %.3lf dB.\n", dim, rate, SDRdB[counter]);
     printf("Execution time was %.0lf second(s).\n\n", difftime(finish, start));

     i = difftime(finish, start);
     fprintf(fp, "%d iterations at %d minute(s) and %d second(s). ", loop, i/60, i%60);
     fprintf(fp, "SDR = %.2lf dB.\n", SDRdB[counter]);
 }
 
 mean = 0;
 mini = 400;
 maxi = -10;
 vari = 0;
 
 for (i = 0; i < experiments; i++) {
     mean += SDRdB[i];
     vari += (SDRdB[i]*SDRdB[i]);
     if (mini > SDRdB[i]) mini = SDRdB[i];
     if (maxi < SDRdB[i]) maxi = SDRdB[i];
 }
 
 mean /= experiments;
 vari /= experiments;
 vari -= (mean*mean);
 
 fprintf(fp, "\n\nStatistics:\n");
 fprintf(fp, "\t\tSDR_min = %.2lf dB.\n", mini);
 fprintf(fp, "\t\tSDR_max = %.2lf dB.\n", maxi);
 fprintf(fp, "\t\tSDR_mean = %.2lf dB.\n", mean);
 fprintf(fp, "\t\tSDR_var = %.3lf dB^2.\n\n\n", vari); 
} //main
