TPCCLIB
Loading...
Searching...
No Matches
regression.c File Reference

Regression line fitting. More...

#include "tpcclibConfig.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "tpcextensions.h"
#include "tpclinopt.h"

Go to the source code of this file.

Functions

int fitLine (double *x, double *y, const int n, double *m, double *c)
int fitLinePearson (double *x, double *y, const int n, double *m, double *msd, double *c, double *csd, double *d, double *dsd, double *r, double *ysd)
int highestSlope (double *x, double *y, const int n, const int slope_n, double x_start, double *m, double *yi, double *xi, double *xh)

Detailed Description

Regression line fitting.

Author
Vesa Oikonen

Definition in file regression.c.

Function Documentation

◆ fitLine()

int fitLine ( double * x,
double * y,
const int n,
double * m,
double * c )

Calculate regression line slope (m) and y axis intercept (c).

See also
fitLinePearson, statMeanSD, doubleMean, statSortDouble, highestSlope, linRegr3p
Returns
Returns the number of samples used in the regression, 0 in case of an error.
Parameters
xPointer to an array of x axis values. NaN's and infinite values are ignored.
yPointer to an array of y axis values. NaN's and infinite values are ignored.
nThe number of samples (length of x[] and y[]).
mPointer where calculated slope is written; enter NULL if not needed.
cPointer where calculated y axis intercept is written; enter NULL if not needed.

Definition at line 23 of file regression.c.

34 {
35 /* Check the data */
36 if(x==NULL || y==NULL) return(0);
37 if(m!=NULL) *m=nan("");
38 if(c!=NULL) *c=nan("");
39 if(n<1) return(0);
40
41 double xsum=0.0, ysum=0.0, x2sum=0.0, xysum=0.0;
42 int nn=0;
43 for(int i=0; i<n; i++) if(isfinite(x[i]) && isfinite(y[i])) {
44 xsum+=x[i]; ysum+=y[i]; x2sum+=x[i]*x[i]; xysum+=x[i]*y[i]; nn++;
45 } //printf("nn=%d\n", nn);
46 // printf("xsum=%g ysum=%g x2sum=%g xysum=%g\n", xsum, ysum, x2sum, xysum);
47 if(nn<1) return(0);
48 if(nn==1) {
49 double mm=ysum/xsum;
50 if(isfinite(mm)) {
51 if(m!=NULL) *m=mm;
52 if(c!=NULL) *c=0.0;
53 return(nn);
54 } else
55 return(0);
56 }
57 double delta=(double)nn*x2sum - xsum*xsum; //printf("delta=%g\n", delta);
58 if(delta==0.0) return(0);
59 if(m!=NULL) *m=((double)nn*xysum - xsum*ysum)/delta;
60 if(c!=NULL) *c=(x2sum*ysum - xsum*xysum)/delta;
61 return(nn);
62}

Referenced by highestSlope().

◆ fitLinePearson()

int fitLinePearson ( double * x,
double * y,
const int n,
double * m,
double * msd,
double * c,
double * csd,
double * d,
double * dsd,
double * r,
double * ysd )

Calculate regression line slope (m), y axis intercept (c), their SDs, and Pearson's correlation coefficient (r).

See also
fitLine, statMeanSD, doubleMean, statSortDouble
Returns
Returns the number of samples used in the regression, 0 in case of an error.
Todo
Check SD of x axis intercept against some other program.
Parameters
xPointer to an array of x axis values. NaN's and infinite values are ignored.
yPointer to an array of y axis values. NaN's and infinite values are ignored.
nThe number of samples (length of x[] and y[]).
mPointer where calculated slope is written; enter NULL if not needed.
msdPointer where SD of slope is written; enter NULL if not needed.
cPointer where calculated y axis intercept is written; enter NULL if not needed.
csdPointer where SD of y axis intercept is written; enter NULL if not needed.
dPointer where calculated x axis intercept is written; enter NULL if not needed.
dsdPointer where SD of x axis intercept is written; enter NULL if not needed.
rPointer where Pearson's correlation coefficient is written; enter NULL if not needed.
ysdPointer where residual variance of y values is written; enter NULL if not needed.

Definition at line 72 of file regression.c.

95 {
96 /* Check the data */
97 if(x==NULL || y==NULL) return(0);
98 if(m!=NULL) *m=nan("");
99 if(c!=NULL) *c=nan("");
100 if(d!=NULL) *d=nan("");
101 if(msd!=NULL) *msd=nan("");
102 if(csd!=NULL) *csd=nan("");
103 if(dsd!=NULL) *dsd=nan("");
104 if(r!=NULL) *r=nan("");
105 if(ysd!=NULL) *ysd=nan("");
106 if(n<1) return(0);
107
108 double xsum=0.0, ysum=0.0, x2sum=0.0, y2sum=0.0, xysum=0.0;
109 int nn=0;
110 for(int i=0; i<n; i++) if(isfinite(x[i]) && isfinite(y[i])) {
111 xsum+=x[i]; ysum+=y[i]; x2sum+=x[i]*x[i]; y2sum+=y[i]*y[i]; xysum+=x[i]*y[i]; nn++;
112 }
113 if(nn<1) return(0);
114 if(nn==1) {
115 double mm=ysum/xsum;
116 if(isfinite(mm)) {
117 if(m!=NULL) *m=mm;
118 if(c!=NULL) *c=0.0;
119 if(d!=NULL) {*d=0.0; if(fabs(mm)<1.0E-10) *d=nan("");}
120 if(msd!=NULL) *msd=0.0;
121 if(csd!=NULL) *csd=0.0;
122 if(dsd!=NULL) {*dsd=0.0; if(fabs(mm)<1.0E-10) *dsd=nan("");}
123 if(r!=NULL) *r=1.0;
124 if(ysd!=NULL) *ysd=0.0;
125 return(nn);
126 } else
127 return(0);
128 }
129 double xmean=xsum/(double)nn;
130 double ymean=xsum/(double)nn;
131 double dx2sum=0.0, /*dy2sum=0.0,*/ dxdysum=0.0;
132 for(int i=0; i<n; i++) if(isfinite(x[i]) && isfinite(y[i])) {
133 double f=x[i]-xmean; dx2sum+=f*f;
134 double g=y[i]-ymean; /*dy2sum+=g*g;*/
135 dxdysum+=f*g;
136 }
137 // slope and intercept
138 double slope=dxdysum/dx2sum;
139 double ic=(dx2sum*ysum - xsum*dxdysum)/((double)nn*dx2sum);
140 if(!isfinite(slope) || !isfinite(ic)) return(0);
141 if(m!=NULL) *m=slope;
142 if(c!=NULL) *c=ic;
143 double xic=-ic/slope; if(!isfinite(xic)) xic=nan("");
144 if(d!=NULL) *d=xic;
145 // Errors
146 double ry2sum=0.0;
147 for(int i=0; i<n; i++) if(isfinite(x[i]) && isfinite(y[i])) {
148 double f=slope*x[i]+ic-y[i];
149 ry2sum+=f*f;
150 }
151 double ye=sqrt(ry2sum/(double)(nn-2)); if(!isfinite(ye)) ye=0.0;
152 if(ysd!=NULL) *ysd=ye;
153 double slopee=ye/sqrt(dx2sum); if(!isfinite(slopee)) slopee=0.0;
154 double ice=ye/sqrt((double)nn-xsum*xsum/x2sum); if(!isfinite(ice)) ice=0.0;
155 if(msd!=NULL) *msd=slopee;
156 if(csd!=NULL) *csd=ice;
157 if(dsd!=NULL) { // SD of x axis intercept
158 double xice=fabs(ye/slope)*sqrt((1.0/(double)nn) + ymean*ymean/(slope*slope*dx2sum));
159 if(!isfinite(xice)) xice=nan("");
160 *dsd=xice;
161 }
162 // Pearson's correlation coefficient
163 double rr=(xysum-((xsum*ysum)/(double)nn)) /
164 sqrt((x2sum-xsum*xsum/(double)nn)*(y2sum-ysum*ysum/(double)nn));
165 // and correct for small sample size
166 if(nn>4) rr*=1.0+(1.0-rr*rr)/(double)(2*(nn-4));
167 if(!isfinite(rr)) rr=0.0; else rr=fabs(rr);
168 if(r!=NULL) *r=rr;
169
170 return(nn);
171}

◆ highestSlope()

int highestSlope ( double * x,
double * y,
const int n,
const int slope_n,
double x_start,
double * m,
double * yi,
double * xi,
double * xh )

Find the regression line with the highest slope for x,y data.

See also
fitLine
Returns
Return 0 if no errors were found.
Parameters
xPointer to an array of x axis values. NaN's and infinite values are ignored. Data must be sorted by increasing x, and overlapping x values may cause problem.
yPointer to an array of y axis values. NaN's and infinite values are ignored. Data is not modified.
nThe number of samples (length of x[] and y[]).
slope_nThe number of samples used to fit the line; must be at least 2.
x_startEstimation start x value, samples with smaller x are ignored; can usually be set to zero.
mPointer where calculated max slope is written; NULL if not needed.
yiPointer where calculated y axis intercept is written; NULL if not needed.
xiPointer where calculated x axis intercept is written; NULL if not needed. This could be used as an estimate of radioactivity appearance time in TAC data, but you must then check that the max slope m is positive.
xhPointer where the place (x) of the highest slope is written; NULL if not needed.

Definition at line 179 of file regression.c.

202 {
203 /* Check the data */
204 if(x==NULL || y==NULL || n<2 || slope_n<2) return(1);
205 if(m!=NULL) *m=nan("");
206 if(yi!=NULL) *yi=nan("");
207 if(xi!=NULL) *xi=nan("");
208 if(xh!=NULL) *xh=nan("");
209
210 /* Make copy of original data */
211 double xx[n], yy[n]; int nn=0;
212 for(int i=0; i<n; i++)
213 if(isfinite(x[i]) && isfinite(y[i])) {
214 if(isfinite(x_start) && x[i]<x_start) continue;
215 xx[nn]=x[i]; yy[nn]=y[i]; nn++;
216 }
217 if(nn<slope_n) return(2);
218
219 /* Compute regression lines */
220 double max_slope=nan(""), ic_at_max=nan(""), slope, ic;
221 int i_at_max=0;
222 for(int i=0; i<=nn-slope_n; i++) {
223 if(fitLine(xx+i, yy+i, slope_n, &slope, &ic)<slope_n) continue;
224 if(!isfinite(slope) || !isfinite(ic)) continue;
225 if(isnan(max_slope) || slope>max_slope) {
226 max_slope=slope; ic_at_max=ic; i_at_max=i;
227 }
228 }
229 if(!isfinite(max_slope)) return(3);
230 if(m!=NULL) *m=max_slope;
231 if(yi!=NULL) *yi=ic_at_max;
232 if(xi!=NULL) {if(max_slope!=0.0) *xi=-ic_at_max/max_slope;}
233 if(xh!=NULL) {
234 *xh=0.0; for(int i=i_at_max; i<i_at_max+slope_n; i++) *xh+=xx[i];
235 *xh/=(double)slope_n;
236 }
237
238 return(0);
239}
int fitLine(double *x, double *y, const int n, double *m, double *c)
Definition regression.c:23