tpcclib-doc/v2/nnlsq_8c_source.html

/*****************************************************************************/

#include "tpcclibConfig.h"

/*****************************************************************************/

#include <stdio.h>

#include <stdlib.h>

#include <math.h>

/*****************************************************************************/

#include "tpcextensions.h"

/*****************************************************************************/

#include "tpclinopt.h"

/*****************************************************************************/


/*****************************************************************************/

// Comment this out to NOT test for buffer overflow

//#define TEST_BUFOVERFLOW 1

/*****************************************************************************/


/*****************************************************************************/

/* Local function definitions */

int nnlsq_lss_h12(int mode, int lpivot, int l1, int m, double *u, double *up, double *cm);

void nnlsq_lss_g1(double a, double b, double *cterm, double *sterm, double *sig);

/*****************************************************************************/


/*****************************************************************************/


int nnlsq(

  NNLSQDATA *d,

  int verbose

) {

  if(verbose>0) {printf("%s()\n", __func__); fflush(stdout);}


  /* Check the data */

  if(d==NULL || d->m<1 || d->n<1 || d->a==NULL || d->b==NULL ||

     d->x==NULL || d->w==NULL || d->zz==NULL || d->index==NULL)

       return(2);

  if(d->depf<1.0E-08 || d->depf>0.5) return(3);


  /* Initialize */

  for(int ni=0; ni<d->n; ni++) d->x[ni]=0.0;

  for(int ni=0; ni<d->n; ni++) d->index[ni]=ni;

  d->rnorm=nan("");

  int iz1=0;

  int iz2=d->n-1;

  int nsetp=0;

  int npp1=0;


  /* Main loop; quit if all coefficients are already in the solution or

     if M cols of A have been triangulated */

  double up=0.0;

  int itermax, iter=0;

  if(d->iternr>=3) itermax=d->iternr; else itermax=3*d->n;

  int j=0, jj=0;

  while(iz1<=iz2 && nsetp<d->m) {

    /* Compute components of the dual (negative gradient) vector W[] */

    for(int iz=iz1; iz<=iz2; iz++) {

      int ni=d->index[iz];

      double sm=0.;

      for(int mi=npp1; mi<d->m; mi++) sm+=d->a[ni][mi]*d->b[mi];

      d->w[ni]=sm;

    }


    double wmax;

    int izmax=0;

    while(1) {


      /* Find largest positive W */

      wmax=0.0;

      for(int iz=iz1; iz<=iz2; iz++) {

        int i=d->index[iz];

        if(d->w[i]>wmax) {wmax=d->w[i]; izmax=iz;}

      }


      /* Terminate if wmax<=0.; it indicates satisfaction of the Kuhn-Tucker conditions */

      if(wmax<=0.0) break;


      /* The sign of W[j] is ok for j to be moved to set P.

         Begin the transformation and check new diagonal element to avoid near linear dependence. */

      j=d->index[izmax];

      double asave=d->a[j][npp1];

//      up=0.0;

      if(nnlsq_lss_h12(1, npp1, npp1+1, d->m, d->a[j], &up, NULL)) return(2);

      double unorm=0.0;

      if(nsetp!=0) for(int mi=0; mi<nsetp; mi++) unorm+=d->a[j][mi]*d->a[j][mi];

      unorm=sqrt(unorm);

      double e=unorm+fabs(d->a[j][npp1])*d->depf;

      if((e-unorm)>0.0) {

        /* Col j is sufficiently independent. Copy B into ZZ, update ZZ

           and solve for ztest ( = proposed new value for X[j] ) */

        for(int mi=0; mi<d->m; mi++) d->zz[mi]=d->b[mi];

        nnlsq_lss_h12(2, npp1, npp1+1, d->m, d->a[j], &up, d->zz);

        double ztest=d->zz[npp1]/d->a[j][npp1];

        /* See if ztest is positive */

        if(ztest>0.) break;

      }


      /* Reject j as a candidate to be moved from set Z to set P. Restore

         A[npp1,j], set W[j]=0., and loop back to test dual coefficients again */

      d->a[j][npp1]=asave; d->w[j]=0.;

    } /* while(1) */

    if(wmax<=0.0) break;


    /* Index j=INDEX[izmax] has been selected to be moved from set Z to set P.

       Update B and indices, apply householder transformations to cols in

       new set Z, zero sub-diagonal elements in col j, set W[j]=0. */

    for(int mi=0; mi<d->m; mi++) d->b[mi]=d->zz[mi];

    d->index[izmax]=d->index[iz1]; d->index[iz1]=j; iz1++; nsetp=npp1+1; npp1++;

    if(iz1<=iz2)

      for(int jz=iz1; jz<=iz2; jz++) {

        jj=d->index[jz];

        nnlsq_lss_h12(2, nsetp-1, npp1, d->m, d->a[j], &up, d->a[jj]);

      }

    if(nsetp!=d->m) for(int mi=npp1; mi<d->m; mi++) d->a[j][mi]=0.;

    d->w[j]=0.;


    /* Solve the triangular system; store the solution temporarily in Z[] */

    for(int mi=0; mi<nsetp; mi++) {

      int ip=nsetp-(mi+1);

      if(mi!=0) for(int ii=0; ii<=ip; ii++) d->zz[ii]-=d->a[jj][ii]*d->zz[ip+1];

      jj=d->index[ip]; d->zz[ip]/=d->a[jj][ip];

    }


    /* Secondary loop begins here */

    while(++iter<itermax) {

      /* See if all new constrained coefficients are feasible; if not, compute alpha */

      double alpha=2.0;

      for(int ip=0; ip<nsetp; ip++) {

        int ni=d->index[ip];

        if(d->zz[ip]<=0.) {

          double t=-d->x[ni]/(d->zz[ip]-d->x[ni]);

          if(alpha>t) {alpha=t; jj=ip-1;}

        }

      }


      /* If all new constrained coefficients are feasible then still alpha==2.

         If so, then exit from the secondary loop to main loop */

      if(alpha==2.0) break;


      /* Use alpha (0.<alpha<1.) to interpolate between old X and new ZZ */

      for(int ip=0; ip<nsetp; ip++) {

        int ni=d->index[ip]; d->x[ni]+=alpha*(d->zz[ip]-d->x[ni]);

      }


      /* Modify A and B and the INDEX arrays to move coefficient i from set P to set Z. */

      int pfeas=1;

      int k=d->index[jj+1];

      do {

        d->x[k]=0.;

        if(jj!=(nsetp-1)) {

          jj++;

          for(int ni=jj+1; ni<nsetp; ni++) {

            int ii=d->index[ni]; d->index[ni-1]=ii;

            double ss, cc;

            nnlsq_lss_g1(d->a[ii][ni-1], d->a[ii][ni], &cc, &ss, &d->a[ii][ni-1]);

            d->a[ii][ni]=0.0;

            for(int nj=0; nj<d->n; nj++) if(nj!=ii) {

              /* Apply procedure G2 (CC,SS,A(J-1,L),A(J,L)) */

              double temp=d->a[nj][ni-1];

              d->a[nj][ni-1]=cc*temp+ss*d->a[nj][ni];

              d->a[nj][ni]=-ss*temp+cc*d->a[nj][ni];

            }

            /* Apply procedure G2 (CC,SS,B(J-1),B(J)) */

            double temp=d->b[ni-1];

            d->b[ni-1]=cc*temp+ss*d->b[ni];

            d->b[ni]=-ss*temp+cc*d->b[ni];

          }

        }

        npp1=nsetp-1; nsetp--; iz1--; d->index[iz1]=k;


        /* See if the remaining coefficients in set P are feasible; they should be because of

           the way alpha was determined. If any are infeasible it is due to round-off error.

           Any that are non-positive will be set to zero and moved from set P to set Z. */

        pfeas=1;

        for(jj=0; jj<nsetp; jj++) {

          k=d->index[jj]; if(d->x[k]<=0.) {pfeas=0; break;}

        }

      } while(pfeas==0);


      /* Copy B[] into zz[], then solve again and loop back */

      for(int mi=0; mi<d->m; mi++) d->zz[mi]=d->b[mi];

      for(int mi=0; mi<nsetp; mi++) {

        int ip=nsetp-(mi+1);

        if(mi!=0) for(int ii=0; ii<=ip; ii++) d->zz[ii]-=d->a[jj][ii]*d->zz[ip+1];

        jj=d->index[ip]; d->zz[ip]/=d->a[jj][ip];

      }

    } /* end of secondary loop */


    if(iter>=itermax) break;

    for(int ip=0; ip<nsetp; ip++) {int k=d->index[ip]; d->x[k]=d->zz[ip];}

  } /* end of main loop */


  if(npp1>=d->m) for(int ni=0; ni<d->n; ni++) d->w[ni]=0.;


  /* Compute the norm of the final residual vector (sum-of-squares) */

  d->rnorm=0.0;

  for(int mi=npp1; mi<d->m; mi++) d->rnorm+=(d->b[mi]*d->b[mi]);


  d->iternr=iter;

  if(verbose>2) printf("  %d iterations.\n", iter);

  if(iter>=itermax) {

    if(verbose>1) printf("  max iterations reached.\n");

    return(1);

  }

  return(0);

} /* nnlsq */


/*****************************************************************************/


/*****************************************************************************/


int nnlsq_lss_h12(

  int mode,

  int lpivot,

  int l1,

  int m,

  double *u,

  double *up,

  double *cm

) {

  /* Check parameters */

  if(mode!=1 && mode!=2) return(1);

  if(u==NULL || up==NULL) return(2);

  if(lpivot<0 || l1<0 || m<0) return(3);

  if(lpivot>=m || lpivot>=l1 || l1>m) return(4);


  double cl = fabs(u[lpivot]);

  if(mode==2 && cl<=0.) return(0);


  if(mode==1) {   /* Construct the transformation */

      /* trying to compensate overflow */

    for(int j=l1; j<m; j++) {  // finding maximum

      cl = fmax(fabs(u[j]), cl);

    }

    // zero vector?

    if(cl<=0.) return(0);


    double clinv=1.0/cl;

    // cl = sqrt( (u[pivot]*clinv)^2 + sigma(i=l1..m)( (u[i]*clinv)^2 ) )

    double d1=u[lpivot]*clinv;

    double sm=d1*d1;

    for(int j=l1; j<m; j++) {

      double d2=u[j]*clinv;

      sm+=d2*d2;

    }

    cl*=sqrt(sm);

    if(u[lpivot] > 0.) cl=-cl;

    *up = u[lpivot] - cl;

    u[lpivot]=cl;

  }


  // no vectors where to apply? only change pivot vector!

  double b=(*up)*u[lpivot];


  /* b must be non-positive here; if b>=0., then return */

  if(b>=0.0) return(0); // was if(b==0) before 2013-06-22


  // Transform the cm vector, if requested

  if(cm!=NULL) {

    double sm = cm[lpivot] * (*up);

    for(int k=l1; k<m; k++) sm += cm[k] * u[k];

    if(sm!=0.0) {

      sm *= (1.0/b);

      cm[lpivot] += sm*(*up);

      for(int k=l1; k<m; k++) cm[k] += u[k]*sm;

    }

  }


  return(0);

} /* nnlsq_lss_h12 */

/*****************************************************************************/


/*****************************************************************************/

void nnlsq_lss_g1(double a, double b, double *cterm, double *sterm, double *sig)

{

  double d1, xr, yr;


  if(fabs(a)>fabs(b)) {

    xr=b/a; d1=xr; yr=hypot(d1, 1.0); d1=1./yr;

    *cterm=copysign(d1, a);

    *sterm=(*cterm)*xr; *sig=fabs(a)*yr;

  } else if(b!=0.) {

    xr=a/b; d1=xr; yr=hypot(d1, 1.0); d1=1./yr;

    *sterm=copysign(d1, b);

    *cterm=(*sterm)*xr; *sig=fabs(b)*yr;

  } else {

    *sig=0.; *cterm=0.; *sterm=1.;

  }

} /* nnlsq_lss_g1 */

/*****************************************************************************/


/*****************************************************************************/


void nnlsqDataInit(

  NNLSQDATA *d

) {

  if(d==NULL) return;

  d->n = d->m = 0;

  d->a = NULL;

  d->b = d->x = d->w = d->zz = d->_data = NULL;

  d->index = NULL;

  d->rnorm = 0.0;

  d->iternr = 0;

  d->depf = 0.01; // default

}


/*****************************************************************************/


/*****************************************************************************/


void nnlsqDataFree(

  NNLSQDATA *d

) {

  if(d==NULL) return;

  if(d->n<1) return;

  if(d->m<1) return;


#ifdef TEST_BUFOVERFLOW

  fprintf(stderr, "testing if buffer overflow happened\n"); fflush(stderr);

  if(d->index[d->n]!=999) fprintf(stderr, "BUFFER OVERFLOW 1\n");

  if(!isnan(d->_data[d->m*d->n])) fprintf(stderr, "BUFFER OVERFLOW 2\n");

  if(!isnan(d->b[d->m])) fprintf(stderr, "BUFFER OVERFLOW 3\n");

  if(!isnan(d->x[d->n])) fprintf(stderr, "BUFFER OVERFLOW 4\n");

  if(!isnan(d->w[d->n])) fprintf(stderr, "BUFFER OVERFLOW 5\n");

  if(!isnan(d->zz[d->m])) fprintf(stderr, "BUFFER OVERFLOW 6\n");

  fprintf(stderr, "tested buffer overflow\n"); fflush(stderr);

#endif


  free(d->a);

  free(d->index);

  free(d->_data);

  // then set everything to zero or NULL again

  nnlsqDataInit(d);

}


/*****************************************************************************/


/*****************************************************************************/


int nnlsqDataAllocate(

  NNLSQDATA *d,

  const int n,

  const int m

) {

  if(d==NULL) return(TPCERROR_FAIL);

  /* Delete any previous contents */

  nnlsqDataFree(d);

  /* If no memory is requested, then return fail */

  if(n<1 || m<1) return(TPCERROR_FAIL);


  /* Allocate memory for all double arrays and matrix */

  int s = n*m + m + n + n + m;

#ifdef TEST_BUFOVERFLOW

  s+=5;

#endif

  d->_data=(double*)malloc(sizeof(double)*s);

  if(d->_data==NULL) return(TPCERROR_OUT_OF_MEMORY);

  for(int i=0; i<s; i++) d->_data[i]=nan("");

  /* Allocate memory for matrix pointers */

  d->a=(double**)malloc(sizeof(double*)*n);

  if(d->a==NULL) {free(d->_data); return(TPCERROR_OUT_OF_MEMORY);}

  /* Set up matrix a and double vectors */

  for(int i=0; i<n; i++) d->a[i] = d->_data + i*m;

  d->b =  d->_data + n*m;

  d->x =  d->_data + n*m + m;

  d->w =  d->_data + n*m + m + n;

  d->zz = d->_data + n*m + m + n + n;

#ifdef TEST_BUFOVERFLOW

  d->b++; d->x+=2; d->w+=3; d->zz+=4;

#endif


  /* Allocate memory for integer array */

#ifdef TEST_BUFOVERFLOW

  d->index=(int*)malloc(sizeof(int)*(1+n));

  d->index[n] = 999;

#else

  d->index=(int*)malloc(sizeof(int)*n);

#endif

  if(d->index==NULL) {free(d->_data); free(d->a); return(TPCERROR_OUT_OF_MEMORY);}


  /* Set data sizes */

  d->n=n;

  d->m=m;


  return(TPCERROR_OK);

}


/*****************************************************************************/


/*****************************************************************************/


int nnlsqWght(

  NNLSQDATA *d,

  double *weight

) {

  if(d==NULL || d->n<1 || d->m<1 || d->a==NULL || d->b==NULL || weight==NULL) return(1);


  /* Check that weights are not zero and get the square roots of them to w[] */

  double w[d->m];

  for(int mi=0; mi<d->m; mi++) {

    if(weight[mi]<=1.0e-20) w[mi]=0.0;

    else w[mi]=sqrt(weight[mi]);

  }


  /* Multiply rows of matrix A and elements of vector b with weights*/

  for(int mi=0; mi<d->m; mi++) {

    for(int ni=0; ni<d->n; ni++) {

      d->a[ni][mi]*=w[mi];

    }

    d->b[mi]*=w[mi];

  }


  return(0);

}


/*****************************************************************************/


/*****************************************************************************/


int nnlsqWghtSquared(

  NNLSQDATA *d,

  double *sweight

) {

  if(d==NULL || d->n<1 || d->m<1 || d->a==NULL || d->b==NULL || sweight==NULL) return(1);


  /* Multiply rows of matrix A and elements of vector b with weights */

  for(int mi=0; mi<d->m; mi++) {

    for(int ni=0; ni<d->n; ni++) {

      d->a[ni][mi]*=sweight[mi];

    }

    d->b[mi]*=sweight[mi];

  }


  return(0);

}


/*****************************************************************************/


/*****************************************************************************/

nnlsqDataFree
void nnlsqDataFree(NNLSQDATA *d)
Definition nnlsq.c:378

nnlsqDataAllocate
int nnlsqDataAllocate(NNLSQDATA *d, const int n, const int m)
Definition nnlsq.c:411

nnlsqDataInit
void nnlsqDataInit(NNLSQDATA *d)
Definition nnlsq.c:357

nnlsqWghtSquared
int nnlsqWghtSquared(NNLSQDATA *d, double *sweight)
Definition nnlsq.c:506

nnlsqWght
int nnlsqWght(NNLSQDATA *d, double *weight)
Definition nnlsq.c:470

nnlsq
int nnlsq(NNLSQDATA *d, int verbose)
Definition nnlsq.c:55

NNLSQDATA
Definition tpclinopt.h:110

NNLSQDATA::rnorm
double rnorm
Definition tpclinopt.h:136

NNLSQDATA::x
double * x
Definition tpclinopt.h:123

NNLSQDATA::index
int * index
Definition tpclinopt.h:131

NNLSQDATA::b
double * b
Definition tpclinopt.h:121

NNLSQDATA::a
double ** a
Definition tpclinopt.h:118

NNLSQDATA::n
int n
Definition tpclinopt.h:112

NNLSQDATA::_data
double * _data
Definition tpclinopt.h:134

NNLSQDATA::iternr
int iternr
Definition tpclinopt.h:139

NNLSQDATA::m
int m
Definition tpclinopt.h:114

NNLSQDATA::zz
double * zz
Definition tpclinopt.h:129

NNLSQDATA::depf
double depf
Definition tpclinopt.h:141

NNLSQDATA::w
double * w
Definition tpclinopt.h:127

tpcextensions.h
Header file for library libtpcextensions.

TPCERROR_FAIL
@ TPCERROR_FAIL
General error.
Definition tpcextensions.h:192

TPCERROR_OUT_OF_MEMORY
@ TPCERROR_OUT_OF_MEMORY
Cannot allocate memory.
Definition tpcextensions.h:193

TPCERROR_OK
@ TPCERROR_OK
No error.
Definition tpcextensions.h:191

tpclinopt.h
Header file for libtpclinopt.