#include "include/lts.h"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>

/* Least trimmed squares estimates for univariate location and variance.
   Jussi Tohka jussi.tohka@cs.tut.fi April 29th 2002

Input   : data is a vector containing n samples. Data samples are 
          expected to be truly real valued (i.e too many samples having
          the same value might lead to problems. 
Output  : mean and variance of the data.
The algorithm (exact) is described in
P.J. Rousseeuw and A.M. Leroy: Robust Regression and Outlier Detection
John Wiley & Sons 1987.

040827 Kaisa Sederholm
       Replaced function compare_reals with fuction ltsQSort
       Included function main
040831 KS
       definition of elem_type removed and 
       elem_type variables replaced by normal double variables
2005-01-05 Vesa Oikonen
       Added Doxygen style comments.
2005-01-07 KS
       Removed functions related to calculation of median. Now using functions 
       from median.c
2005-04-26 CL
       Merged with libtpcimgp
2005-06-10 KS
       Added comments
       (removed from libtpcimgp into libtpcmodel)

*****************************************************************************/
#include "include/lts.h"
#include "include/median.h"
/****************************************************************************/
/* local function definitions */
int ltsQSort(const void *par1, const void *par2);
/****************************************************************************/

/** Least trimmed squares estimates for univariate location and variance.
    Data samples are expected to be truly real valued (i.e too many samples having
    the same value might lead to problems. 
    Written by Jussi Tohka jussi.tohka@cs.tut.fi April 29th 2002.
    The algorithm (exact) is described in
    P.J. Rousseeuw and A.M. Leroy: Robust Regression and Outlier Detection
    John Wiley & Sons 1987.
\return Returns 0, if successful.
 */
int least_trimmed_square(
  /** Vector of n sample values;  */
  double data[],
  /** Number of samples */
  long int n,
  /** Output: Mean of sample values */
  double *mean,
  /** Output: Variance of sample values */
  double *variance
) {
  int i,j,h,h2;
  double score,best_score,loc,best_loc,old_sum,new_sum,medd;
  double old_power_sum,new_power_sum;
  double* scaled_data;

  h = n - n/2;
  h2 = n/2;

  qsort(data, n, sizeof(double),ltsQSort); 

  
  old_sum = 0;
  old_power_sum = 0.0;
  for(i = 0;i < h;i++) {
    old_sum = old_sum + data[i];
    old_power_sum = old_power_sum + data[i]*data[i];
  }

  loc = old_sum/h;  
  /* For better understanding of the algorithm: 
    O(N^2) implementation of the algorithm would compute score as:
    score = 0.0;
    for(i = 0;i < h;i++) {
      score = score + (data[i] - loc)*(data[i] - loc);
    } 
    But there is a faster way to this: */
  
  score = old_power_sum - old_sum*loc; 

  best_score = score;
  best_loc = loc;
  
  for(j = 1;j < h2 + 1;j++) {
    new_sum = old_sum - data[j - 1] + data[h - 1 + j];
    old_sum = new_sum;
    loc = old_sum/h;
    new_power_sum = old_power_sum - data[j - 1]*data[j - 1] 
                  + data[h - 1 + j]*data[h - 1 + j];
    old_power_sum = new_power_sum;
    score = old_power_sum - old_sum*loc; 

    if(score < best_score) {
      best_score = score;
      best_loc = loc;
    }
  }  
  *mean = best_loc;

  /* For the variance, it is needed to calculate the ellipsoid covering one half of samples. 
     This is not implemented optimally here because data has already been sorted. */

  scaled_data = malloc(n*sizeof(double));
  if(scaled_data == NULL) return(1);
  for(i = 0; i < n ;i++) {
    scaled_data[i] = (data[i] - best_loc)*((h - 1)/best_score)*(data[i] - best_loc);
  }
  medd = dmedian(scaled_data,n);
  free(scaled_data);
  *variance = (best_score/(h - 1))*(medd/CHI2INV_1);
  return(0);
}

/** Compares two numbers
\return Returns the -1 if value1<value2, 1 if value1>value2 and 0 otherwise
*/

int ltsQSort(
    /** value nr 1*/
    const void *par1, 
    /** value nr 2*/
    const void *par2)
{
  if( *((double*)par1) < *((double*)par2)) return(-1);
  else if( *((double*)par1) > *((double*)par2)) return(1);
  else return(0);
}


/*
 * Algorithm from N. Wirth's book, implementation by N. Devillard.
 * This code in public domain.
 */


