TPCCLIB
Loading...
Searching...
No Matches
lts.c
Go to the documentation of this file.
1
11/*****************************************************************************/
12#include "libtpcmodel.h"
13/*****************************************************************************/
14/* local function definitions */
15int ltsQSort(const void *par1, const void *par2);
16/*****************************************************************************/
17
18/*****************************************************************************/
31 double data[],
33 long int n,
35 double *mean,
37 double *variance
38) {
39 int i, j, h, h2;
40 double score, best_score, loc, best_loc, old_sum, new_sum, medd;
41 double old_power_sum, new_power_sum;
42 double *scaled_data;
43
44 h = n - n/2;
45 h2 = n/2;
46
47 qsort(data, n, sizeof(double),ltsQSort);
48
49 old_sum=old_power_sum=0.0;
50 for(i=0; i<h; i++) {
51 old_sum = old_sum + data[i];
52 old_power_sum = old_power_sum + data[i]*data[i];
53 }
54
55 loc = old_sum/h;
56 /* For better understanding of the algorithm:
57 O(N^2) implementation of the algorithm would compute score as:
58 score = 0.0;
59 for(i = 0;i < h;i++) {
60 score = score + (data[i] - loc)*(data[i] - loc);
61 }
62 But there is a faster way to this: */
63 score = old_power_sum - old_sum*loc;
64 best_score = score;
65 best_loc = loc;
66 for(j=1; j<h2+1; j++) {
67 new_sum = old_sum - data[j-1] + data[h-1+j];
68 old_sum = new_sum;
69 loc = old_sum/h;
70 new_power_sum = old_power_sum - data[j-1]*data[j-1]
71 + data[h-1+j]*data[h-1+j];
72 old_power_sum = new_power_sum;
73 score = old_power_sum - old_sum*loc;
74 if(score < best_score) {
75 best_score = score;
76 best_loc = loc;
77 }
78 }
79 *mean = best_loc;
80
81 /* For the variance, it is needed to calculate the ellipsoid covering
82 one half of samples. This is not implemented optimally here because
83 data has already been sorted. */
84 scaled_data = malloc(n*sizeof(double));
85 if(scaled_data == NULL) return(1);
86 for(i=0; i<n; i++)
87 scaled_data[i] = (data[i]-best_loc)*((h-1)/best_score)*(data[i]-best_loc);
88 medd = dmedian(scaled_data, n);
89 free(scaled_data);
90 *variance = (best_score/(h-1))*(medd/CHI2INV_1);
91 return(0);
92}
93/*****************************************************************************/
94
95/*****************************************************************************/
101 const void *par1,
103 const void *par2
104) {
105 if( *((double*)par1) < *((double*)par2)) return(-1);
106 else if( *((double*)par1) > *((double*)par2)) return(1);
107 else return(0);
108}
109/*****************************************************************************/
110
111/*****************************************************************************/
112
Header file for libtpcmodel.
double dmedian(double *data, int n)
Definition median.c:48
int mean(double *x, double *y, int nr, double *xmean, double *xsd, double *ymean, double *ysd)
Definition pearson.c:341
int ltsQSort(const void *par1, const void *par2)
Definition lts.c:99
int least_trimmed_square(double data[], long int n, double *mean, double *variance)
Definition lts.c:27