TPCCLIB
Loading...
Searching...
No Matches
pearson.c
Go to the documentation of this file.
1
5#include "libtpcmodel.h"
6/******************************************************************************/
7
8/******************************************************************************/
16 double *x,
18 double *y,
20 int nr,
22 double *k,
24 double *kSD,
26 double *b,
28 double *bSD,
30 double *r,
32 double *ySD
33) {
34 int i;
35 double e, f, g, meanx, meany, kk, bb, ke, be, rr, sumsdcy=0.0;
36 double sumx=0.0, sumy=0.0, sumsx=0.0, sumsdx=0.0, sumsdy=0.0, sumdxdy=0.0;
37 double sumxy=0.0, sumsy=0.0;
38
39
40 if(0) {
41 fprintf(stdout, "pearson(x[], y[], %d, *k, *kSD, *b, *bSD, *r, *ySD)\n", nr);
42 fflush(stdout);
43 }
44 /* Check that there is some data */
45 if(x==NULL || y==NULL || nr<2) return(1);
46
47 /* If only 2 datapoints */
48 if(nr==2) {
49 f=x[1]-x[0]; if(fabs(f)<1.0E-50) return(1);
50 *k=(y[1]-y[0])/f; *b=y[0]-(*k)*x[0];
51 *kSD=*bSD=*ySD=0.; *r=1.;
52 return(0);
53 }
54
55 /* Calculate (x,y) sums and means */
56 for(i=0; i<nr; i++) {
57 sumx+=x[i]; sumy+=y[i];
58 sumsx+=x[i]*x[i]; sumsy+=y[i]*y[i];
59 sumxy+=x[i]*y[i];
60 }
61 meanx=sumx/(double)nr;
62 meany=sumy/(double)nr;
63 /* and then based on means */
64 for(i=0; i<nr; i++) {
65 f=x[i]-meanx; sumsdx+=f*f;
66 g=y[i]-meany; sumsdy+=g*g;
67 sumdxdy+=f*g;
68 }
69 if(sumsdx<1.0e-50 || sumsdy<1.0e-50) return(3);
70 /* Regression coefficient */
71 kk=sumdxdy/sumsdx; *k=kk;
72 /* Intercept with y axis */
73 bb=(sumsdx*sumy - sumx*sumdxdy)/((double)nr*sumsdx); *b=bb;
74 /* Errors */
75 for(i=0; i<nr; i++) {
76 f=kk*x[i]+bb-y[i];
77 sumsdcy+=f*f;
78 }
79 /* Deviation of y values */
80 if(sumsdcy<=1.0e-12) e=0.0; else e=sqrt(sumsdcy/(double)(nr-2)); *ySD=e;
81 /* SD of slope and intercept */
82 ke=e/sqrt(sumsdx); be=e/sqrt((double)nr-sumx*sumx/sumsx);
83 *kSD=ke; *bSD=be;
84 be=sqrt(sumsdcy/(double)(nr-2))/sqrt((double)nr-(sumx*sumx)/sumsx);
85 /* Pearson's correlation coefficient */
86 rr=(sumxy-((sumx*sumy)/(double)nr)) /
87 sqrt((sumsx-sumx*sumx/(double)nr)*(sumsy-sumy*sumy/(double)nr));
88 /* Correct for small sample size */
89 if(nr>4) rr*=1.0+(1.0-rr*rr)/(double)(2*(nr-4));
90 *r=rr;
91 if(0) {
92 fprintf(stdout, "k=%14.7e +- %14.7e\n", kk, ke);
93 fprintf(stdout, "b=%14.7e +- %14.7e\n", bb, be);
94 fprintf(stdout, "r=%14.7e ySD=%14.7e\n", rr, e);
95 fflush(stdout);
96 }
97
98 return(0);
99}
100/******************************************************************************/
101
102/******************************************************************************/
111 double *x,
113 double *y,
115 char *is,
117 int nr,
119 double *k,
121 double *kSD,
123 double *b,
125 double *bSD,
127 double *r,
129 double *ySD
130) {
131 int i, j;
132 double *nx, *ny;
133
134 /* Allocate memory for new data pointers */
135 nx=(double*)calloc(nr, sizeof(double)); if(nx==NULL) return 1;
136 ny=(double*)calloc(nr, sizeof(double)); if(ny==NULL) {free((char*)nx); return 1;}
137
138 /* Copy data to pointers */
139 for(i=0, j=0; i<nr; i++) if(is[i]) {nx[j]=x[i]; ny[j]=y[i]; j++;}
140
141 /* Use pearson() */
142 i=pearson(nx, ny, j, k, kSD, b, bSD, r, ySD); free((char*)nx); free((char*)ny);
143 return (i);
144}
145/******************************************************************************/
146
147/******************************************************************************/
156 double *x,
158 double *y,
160 int nr,
162 double *k,
164 double *kSD,
166 double *b,
168 double *bSD,
170 double *r,
172 double *ySD
173) {
174 int i, j;
175 double *nx, *ny;
176
177 /* Allocate memory for new data pointers */
178 nx=(double*)calloc(nr, sizeof(double)); if(nx==NULL) return 1;
179 ny=(double*)calloc(nr, sizeof(double)); if(ny==NULL) {free((char*)nx); return 1;}
180
181 /* Copy data to pointers */
182 for(i=0, j=0; i<nr; i++)
183 if(!isnan(x[i]) && !isnan(y[i])) {nx[j]=x[i]; ny[j]=y[i]; j++;}
184
185 /* Use pearson() */
186 i=pearson(nx, ny, j, k, kSD, b, bSD, r, ySD); free((char*)nx); free((char*)ny);
187 return (i);
188}
189/******************************************************************************/
190
191/******************************************************************************/
200 double *x,
202 double *y,
204 int nr,
206 double start,
208 double end,
210 double *k,
212 double *kSD,
214 double *b,
216 double *bSD,
218 double *r,
220 double *ySD
221) {
222 int i, j;
223 double *nx, *ny;
224
225 if(0) {
226 fprintf(stdout, "pearson4(x[], y[], %d, %g, %g, *k, *kSD, *b, *bSD, *r, *ySD)\n", nr,start,end);
227 fflush(stdout);
228 }
229 /* Allocate memory for new data pointers */
230 if((nx=(double*)calloc(nr, sizeof(double)))==NULL) return -3;
231 if((ny=(double*)calloc(nr, sizeof(double)))==NULL) {free((char*)nx); return -3;}
232
233 /* Copy data to pointers */
234 for(i=0, j=0; i<nr; i++)
235 if(x[i]>=start && x[i]<=end && !isnan(x[i]) && !isnan(y[i])) {
236 nx[j]=x[i]; ny[j]=y[i]; j++;}
237
238 /* Use pearson() */
239 i=pearson(nx, ny, j, k, kSD, b, bSD, r, ySD);
240 free((char*)nx); free((char*)ny);
241 return i;
242}
243/******************************************************************************/
244
245/******************************************************************************/
254 double *x,
256 double *y,
258 int nr,
260 int min_nr,
262 int *first,
264 int *last,
266 double *k,
268 double *kSD,
270 double *b,
272 double *bSD,
274 double *r,
276 double *ySD
277) {
278 int i, n, m, x1, x2, b1, b2, bm;
279 double *nx, *ny;
280 double tk, tkSD, tb, tbSD, tr, tySD;
281
282
283 if(0) {
284 fprintf(stdout, "best_pearson(x, y, %d, %d, %d, %d, k, kSD, b, bSD, r, ySD)\n",
285 nr, min_nr, *first, *last);
286 fflush(stdout);
287 }
288 /* Remove NaN's and those outside range first-last */
289 /* Allocate memory for new data pointers */
290 nx=(double*)calloc(nr, sizeof(double)); if(nx==NULL) return 0;
291 ny=(double*)calloc(nr, sizeof(double)); if(ny==NULL) {free((char*)nx); return 0;}
292 /* Copy data to pointers */
293 if(*last>nr-1) *last=nr-1;
294 if(*first<0) *first=0;
295 for(i=*first, n=0; i<=*last; i++)
296 if(!isnan(x[i]) && !isnan(y[i])) {nx[n]=x[i]; ny[n]=y[i]; n++;}
297
298 /* Check that we have enough points */
299 if(n<2 || n<min_nr) {free((char*)nx); free((char*)ny); return 0;}
300 if(n==min_nr) {
301 i=pearson(nx, ny, n, k, kSD, b, bSD, r, ySD);
302 free((char*)nx); free((char*)ny);
303 if(i) return 0; else return n;
304 }
305
306 /* OK, let's start */
307 *k=*kSD=*b=*bSD=*r=*ySD=0.; b1=b2=bm=0;
308 for(x1=0, m=n; n-x1>min_nr; x1++) {
309 /*printf("I x1=%i x2=%i m=%i n=%i\n", x1, x2, m, n);*/
310 for(x2=n-1, m=x2-x1+1; m>=min_nr; x2--, m--) {
311 if(pearson(nx+x1, ny+x1, m, &tk, &tkSD, &tb, &tbSD, &tr, &tySD))
312 continue;
313 if((tr>*r) ||
314 (tr==*r && m>bm) ||
315 (tr==*r && m==bm && x1>b1) ||
316 (tr==*r && m==bm && x1==b1 && tk>*k) ) {
317 bm=m; b1=x1; b2=x2;
318 *k=tk; *kSD=tkSD; *b=tb; *bSD=tbSD; *r=tr; *ySD=tySD;
319 }
320 /*printf("Fit range: %2i-%2i ; best fit: %2i-%2i\n", x1, x2, b1, b2);*/
321 }
322 }
323
324 /* Set first&last */
325 for(i=*first; i<=*last; i++)
326 if(x[i]==nx[b1] && y[i]==ny[b1]) {*first=i; break;}
327 for(i=*last; i>=*first; i--)
328 if(x[i]==nx[b2] && y[i]==ny[b2]) {*last=i; break;}
329 /*printf("FIRST=%i LAST=%i\n", *first, *last);*/
330
331 free((char*)nx); free((char*)ny);
332 return bm;
333}
334/******************************************************************************/
335
336/******************************************************************************/
343 double *x,
345 double *y,
347 int nr,
349 double *xmean,
351 double *xsd,
353 double *ymean,
355 double *ysd
356) {
357 int i, n;
358 double sumsqr, sqrsum;
359
360 for(i=0, sumsqr=sqrsum=0.0, n=0; i<nr; i++) if(!isnan(x[i]) && !isnan(y[i])) {
361 sumsqr+=x[i]*x[i]; sqrsum+=x[i]; n++;}
362 if(n<=0) return -1;
363 *xmean=sqrsum/(double)n;
364 if(n==1) *xsd=0.0; else {
365 sqrsum*=sqrsum;
366 *xsd=sqrt( (sumsqr - sqrsum/(double)n) / (double)(n-1) );
367 }
368 for(i=0, sumsqr=sqrsum=0.0, n=0; i<nr; i++) if(!isnan(x[i]) && !isnan(y[i])) {
369 sumsqr+=y[i]*y[i]; sqrsum+=y[i]; n++;}
370 if(n<=0) return -1;
371 *ymean=sqrsum/(double)n;
372 if(n==1) *ysd=0.0; else {
373 sqrsum*=sqrsum;
374 *ysd=sqrt( (sumsqr - sqrsum/(double)n) / (double)(n-1) );
375 }
376 return 0;
377}
378/******************************************************************************/
379
380/******************************************************************************/
389 double *x,
391 double *y,
393 int n,
395 double *m,
397 double *c
398) {
399 double xsum=0.0, ysum=0.0, x2sum=0.0, xysum=0.0, delta;
400 int i, nn=0;
401
402 /* Check the data */
403 if(x==NULL || y==NULL) return(1);
404 if(n<2) return(2);
405
406 /* Compute */
407 for(i=0, nn=0; i<n; i++) {
408 if(isnan(x[i]) || isnan(y[i])) continue;
409 xsum+=x[i]; ysum+=y[i]; x2sum+=x[i]*x[i]; xysum+=x[i]*y[i]; nn++;
410 }
411 if(nn<2) return(2);
412 delta=(double)nn*x2sum - xsum*xsum; if(delta==0.0) return(3);
413 *m=((double)nn*xysum - xsum*ysum)/delta;
414 *c=(x2sum*ysum - xsum*xysum)/delta;
415 return(0);
416}
417/******************************************************************************/
418
419/******************************************************************************/
426 double *x,
428 double *y,
430 int n,
432 int slope_n,
434 double *m,
436 double *c,
438 double *xi,
441 double *xh
442) {
443 int i, ret, i_at_max=0;
444 double slope, ic, max_slope, ic_at_max=0.0;
445
446 /* Check the data */
447 if(x==NULL || y==NULL) return(1);
448 if(n<2) return(2);
449 if(slope_n>n) return(3);
450
451 /* Compute */
452 max_slope=-1.0E200;
453 for(i=0; i<=n-slope_n; i++) {
454 ret=regr_line(x+i, y+i, slope_n, &slope, &ic); if(ret) continue;
455 if(slope>max_slope) {max_slope=slope; ic_at_max=ic; i_at_max=i;}
456 } /* next slope */
457 if(max_slope==-1.0E200) return(10);
458 if(m!=NULL) *m=max_slope;
459 if(c!=NULL) *c=ic_at_max;
460 if(xi!=NULL) {if(max_slope!=0.0) *xi=-ic_at_max/max_slope; else *xi=0.0;}
461 if(xh!=NULL) {
462 *xh=0.0; for(i=i_at_max; i<i_at_max+slope_n; i++) *xh+=x[i];
463 *xh/=(double)slope_n;
464 }
465
466 return(0);
467}
468/******************************************************************************/
469
470/******************************************************************************/
477 double *x,
479 double *y,
481 int n,
483 int slope_n,
486 double x_start,
488 double *m,
490 double *c,
492 double *xi,
494 double *xh
495) {
496 int i, ret, i_at_max=0;
497 double slope, ic, max_slope, ic_at_max=0.0;
498
499 /* Check the data */
500 if(x==NULL || y==NULL) return(1);
501 if(n<2) return(2);
502 if(slope_n>n) return(3);
503
504 /* Compute */
505 max_slope=-1.0E200;
506 for(i=0; i<=n-slope_n; i++) if(x[i]>=x_start) {
507 ret=regr_line(x+i, y+i, slope_n, &slope, &ic); if(ret) continue;
508 if(slope>max_slope) {max_slope=slope; ic_at_max=ic; i_at_max=i;}
509 } /* next slope */
510 if(max_slope==-1.0E200) return(10);
511 if(m!=NULL) *m=max_slope;
512 if(c!=NULL) *c=ic_at_max;
513 if(xi!=NULL) {if(max_slope!=0.0) *xi=-ic_at_max/max_slope; else *xi=0.0;}
514 if(xh!=NULL) {
515 *xh=0.0; for(i=i_at_max; i<i_at_max+slope_n; i++) *xh+=x[i];
516 *xh/=(double)slope_n;
517 }
518
519 return(0);
520}
521/******************************************************************************/
522
523/******************************************************************************/
Header file for libtpcmodel.
int highest_slope_after(double *x, double *y, int n, int slope_n, double x_start, double *m, double *c, double *xi, double *xh)
Definition pearson.c:475
int highest_slope(double *x, double *y, int n, int slope_n, double *m, double *c, double *xi, double *xh)
Definition pearson.c:424
int pearson4(double *x, double *y, int nr, double start, double end, double *k, double *kSD, double *b, double *bSD, double *r, double *ySD)
Calculate slope and intercept of a line and Pearson's correlation coefficient.
Definition pearson.c:198
int regr_line(double *x, double *y, int n, double *m, double *c)
Definition pearson.c:387
int pearson2(double *x, double *y, char *is, int nr, double *k, double *kSD, double *b, double *bSD, double *r, double *ySD)
Definition pearson.c:109
int pearson(double *x, double *y, int nr, double *k, double *kSD, double *b, double *bSD, double *r, double *ySD)
Definition pearson.c:14
int pearson3(double *x, double *y, int nr, double *k, double *kSD, double *b, double *bSD, double *r, double *ySD)
Definition pearson.c:154
int mean(double *x, double *y, int nr, double *xmean, double *xsd, double *ymean, double *ysd)
Definition pearson.c:341
int best_pearson(double *x, double *y, int nr, int min_nr, int *first, int *last, double *k, double *kSD, double *b, double *bSD, double *r, double *ySD)
Definition pearson.c:252