/*****************************************************************************
 
  qr.c  (c) 2003,2004 Turku PET Centre
 
  This file contains the routines needed in the use of QR 
  decomposition when solving least squares problems.
  
  These routines are based on the code of Gerard Jungman and Brian Gough
  provided in the GSL library (http://sources.redhat.com/gsl/)
 
  Version:
  2003-08-28 Kaisa Sederholm
  2003-09-10 KS
      Changes in the memory allocations so that all working space  
      memory can be allocated in the calling program.
  2003-09-26 KS
      Moved the calculation of residual norm from qr to qr_solve.
  2003-10-21 VO
      Tiny changes to prevent compiler warnings.
  2004-09-17 VO
    Doxygen style comments.

 
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
/****************************************************************************/
#include "include/hholder.h"
#include "include/qr.h"
/****************************************************************************/
/** Global variables for this routine */
int qr_M;
int qr_N;
int qr_MNmin;
/****************************************************************************/

/****************************************************************************
 * Algorithm QR
 * 
 * Solves a matrix form least square problem 
 * min||A x - b|| => A x =~ b     (A is m*n matrix, m>=n)
 * using the QR decomposition for overdetermined systems
 *
 * Instead of pointers for working space, NULL can be given to let this
 * function to allocate and free the required memory.
 *
\return Function returns 0 if succesful and 1 in case of invalid problem
   dimensions or memory allocation error.
*/
int qr(
  /** On entry, a[m][n] contains the m by n matrix A.
      On exit, a[][] contains the QR factorization */
  double **A,
  /** Dimensions of matrix A are a[m][n] */
  int m,
  /** Dimensions of matrix A are a[m][n] */
  int n,
  /** B[] is an m-vector containing the right-hand side vector b. */
  double *B,
  /** On exit, x[] will contain the solution vector x. */
  double *X,
  /** On exit, rnorm contains the Euclidean norm of the residual vector */
  double *rnorm,
  /** On exit, tau[] will contain the householder coefficients. */
  double *tau,
  /** An m-array of working space, res[]. On output contains
      residual b - Ax */
  double *res,
  /** m*n array of working space */ 
  double **wws,
  /** 2m-array of working space */
  double *ws
) {

  int i;
  double *qrRes, *qrTau, **qrWws, *qrWs, *chain;
  

  /* Check the parameters and data */
  if(m<=0 || n<=0 || A==NULL || B==NULL || X==NULL)
    return(1);
  /* Allocate memory for working space, if required */
  if(tau!=NULL)
    qrTau=tau;
  else
    qrTau=(double*)calloc(n, sizeof(double));
  if(res!=NULL)
    qrRes=res;
  else
    qrRes=(double*)calloc(m, sizeof(double));
  if(wws!=NULL) {
    qrWws=wws; chain=(double*)NULL;
  } else {
    qrWws=(double**)malloc(m * sizeof(double*));
    chain=(double*)malloc(m*n * sizeof(double));
    for(i=0; i<m; i++) qrWws[i]=chain + i*n;
  }
  if(ws!=NULL)
    qrWs=ws;
  else
    qrWs=(double*)calloc(2*m, sizeof(double));
  /* if(indexp!=NULL) index=indexp; else index=(int*)calloc(n, sizeof(int));*/
  if(qrTau==NULL || qrRes==NULL || qrWws==NULL || qrWs==NULL)
    return(1);
  if(m<n)
    return(1);

  /* Form the householder decomposition and solve the least
     square problem */

  qr_decomp(A, m, n, qrTau, qrWws, qrWs);
  /*for(i=0; i<m; i++){
    for(j=0; j<n; j++){
      printf("%f ",A[i][j]);
    }
    printf("\n");
    }*/

  qr_solve(A, m, n, qrTau, B, X, qrRes, rnorm, qrWws, qrWs);


  /* Free working space, if it was allocated here */
  if(tau==NULL)
    free(qrTau);
  if(res==NULL)
    free(qrRes);
  if(wws==NULL)
    {free(qrWws); free(chain);}
  if(ws==NULL)
    free(qrWs);
  return 0;
} /* qr */


/** Factorise a general M x N matrix A into
 *  
 *   A = Q R
 *
 * where Q is orthogonal (M x M) and R is upper triangular (M x N).
 *
 * Q is stored as a packed set of Householder vectors in the
 * strict lower triangular part of the input matrix A and a set of
 * coefficients in vector tau.
 *
 * R is stored in the diagonal and upper triangle of the input matrix.
 *
 * The full matrix for Q can be obtained as the product
 *
 *       Q = Q_1 Q_2 .. Q_k  and it's transform as the product
 *
 *       Q^T = Q_k .. Q_2 Q_1
 *
 * where k = min(M,N) and
 *
 *       Q_i = (I - tau_i * h_i * h_i^T)
 *
 * and where h_i is a Householder vector
 *
 *       h_i = [1, A(i+1,i), A(i+2,i), ... , A(M,i)]
 *
 * This storage scheme is the same as in LAPACK.  
 *
 * NOTICE! The calling program must take care that pointer tau is 
 * of size n. 
\return Function returns 0 if ok.
 */

int
qr_decomp (
  /** contains coefficient matrix A (m*n) as input and
      factorisation QR as output */
  double **a,
  /** nr of rows in matrix A */
  int M,
  /** nr of columns in matrix A */
  int N,
  /** n-vector for householder coefficients*/
  double *tau,
  /** m*n array of working space */
  double **cchain,
  /** 2m array of working space */
  double *chain
) {
  int i, m, n;
  double *subvector, **submatrix, *helpvector;
  double tau_i;

  qr_M=M;
  qr_N=N;
  qr_MNmin=qr_N;
  
  /* Local variables */
  subvector=chain;
  helpvector=chain + qr_M;
  submatrix=cchain;

  for (i = 0; i < qr_MNmin; i++) {
    /* Compute the Householder transformation to reduce the j-th
     * column of the matrix a to a multiple of the j-th unit vector. 
            * Householder vector h_i is saved in the lower triangular part
            * of the column and Householder coefficient tau_i in the
            * vector tau. 
            */

    for(m=i; m<qr_M; m++) {
      subvector[m-i]= a[m][i];
    }
    tau_i = householder_transform(subvector, qr_M-i, helpvector);
    
    tau[i]=tau_i;
    for(m=i; m<qr_M; m++) {
      a[m][i]=subvector[m-i];
    }

    /* Apply the transformation to the remaining columns
              to get upper triangular part of matrix R  */

    if (i + 1 < qr_N) {
      for(m=i; m<qr_M; m++) {
        for(n=i+1; n<qr_N; n++) {
          submatrix[m-i][n-i-1]=a[m][n];
        }
      }

      householder_hm (tau_i, subvector, submatrix, qr_M-i, qr_N-i);

      for(m=i; m<qr_M; m++) {
        for(n=i+1; n<qr_N; n++) {
          a[m][n]=submatrix[m-i][n-i-1];
        }
      }

    }
  }

  return 0;
}


/** Find the least squares solution to the overdetermined system
 *
 *   A x = b 
 *  
 * for m >= n using the QR factorisation A = Q R. 
 * qr_decomp() must be used prior to this function in order to form
 * the QR factorisation of A.
 * Solution is formed in the following order: 
 * QR x = b  =>  R x = Q^T b  =>  x = R^-1 (Q^T b)
 * 
 * NOTICE! The calling program must take care that pointers b, x
 * and residual are of the right size.
\return Function returns 0 if ok.
 */

int
qr_solve(
  /** m*n matrix containing householder vectors of A*/
  double **QR,
  /** nr of rows in matrix A */
  int M,
  /** nr of columns in matrix A */
  int N,
  /** vector containing householder coefficients tau*/
  double *tau,
  /** Contains m-vector b of A x = b */
  double *b,
  /** solution vector x of length n */
  double *x,
  /** residual vector of length m */
  double *residual,
  /** norm^2 of the residual vector */
  double *resNorm,
  /** m*n array of the working space*/
  double **cchain,
  /** 2m lenght array of the working space */ 
  double *chain
) {
  int m, n;
  double **Rmatrix;

  qr_M=M;
  qr_N=N;
  qr_MNmin=qr_N;

  /* Local variable */
  Rmatrix=cchain;

  /* Get matrix R from the upper triangular part of QR
     First the rows N - M-1 are eliminated from Rmatrix*/

  for(m=0; m<qr_N; m++) {
    for(n=0; n<qr_N; n++) {
      Rmatrix[m][n]=QR[m][n];
    }
  }

  for(m=0; m<qr_M; m++) {
    residual[m]=b[m];
  }

  /* Compute b = Q^T b */

  qr_QTvec (QR, tau, residual, chain);
  /*for(m=0; m<qr_M; m++){
    printf("%f ", residual[m]);}
    printf("\n");*/
  /* Solve R x = b by computing x = R^-1 b */

  for(n=0; n<qr_N; n++) {
    x[n]=residual[n];
    /*printf("%f ", x[n]);*/
  }
  /*printf("\n");*/
  qr_invRvec (Rmatrix, x);

  /* Compute residual = b - A x = Q (Q^T b - R x) */

  for(n=0; n<qr_N; n++) {
    residual[n]=0.0;
  }

  /* Compute residual= Q*residual */

  qr_Qvec(QR, tau, residual, chain);

  /* Compute norm^2 of the residual vector */

  for(m=0, *resNorm=0.0; m<qr_M; m++){
    *resNorm +=residual[m]*residual[m];
  }

  return 0;
}


/** Form the product Q^T v  from householder vectors saved
 * in the lower triangel of QR matrix and householder 
 * coefficients saved in vector tau.
\return Returns 0 if ok. 
 */

int
qr_QTvec (double **QR, double *tau, double *v, double *help) {

  int i, m;
  double ti, *h, *w;

 /* Local variables */
  h=help;
  w=help+qr_M;

  /* compute Q^T v */

  for (i = 0; i < qr_MNmin; i++) {
    for(m=i; m<qr_M; m++) {
      h[m-i] = QR[m][i];
      w[m-i] = v[m];
    }

    ti = tau[i];
    householder_hv (ti, qr_M-i, h, w);
    for(m=i; m<qr_M; m++) {
      v[m]=w[m-i];
    }
  }

  return 0;
}

/** Form the product Q v  from householder vectors saved
 * in the lower triangel of QR matrix and householder 
 * coefficients saved in vector tau.
\return Returns 0 if ok. 
 */

int
qr_Qvec (double **QR, double *tau, double *v, double *help) {

  int i, m;
  double ti, *h, *w;

  /* Local variables */
  h=help;
  w=help+qr_M;

  /* compute Q v */
  for (i = qr_MNmin-1; i > -1; i--) {
    for(m=i; m<qr_M; m++) {
      h[m-i] = QR[m][i];
      w[m-i] = v[m];
    }

    ti = tau[i];
    householder_hv (ti, qr_M-i, h, w);
    for(m=i; m<qr_M; m++) {
      v[m]=w[m-i];
    }
  }
 return 0;
}

/** Form the product R^-1 v.  (R is saved
 * in the upper triangel of QR matrix.)
\return Returns 0 if ok. 
 */

int qr_invRvec(double **A, double *X) {


  int i, j;
  double tmp;

  if (qr_N == 0)
    return 0;

  /* form  x := inv( A )*x */

  /* backsubstitution */

  X[qr_N-1] = X[qr_N-1] / A[qr_N - 1][qr_N - 1];

  for (i = qr_N - 2; i > -1 ; i--) {
    tmp = X[i];

    for (j = i + 1; j < qr_N; j++) {
      tmp -= A[i][j] * X[j];
    }

    X[i] = tmp / A[i][i];
  }
  return 0;
}

/****************************************************************************/
/**
  Algorithm for weighting the problem that is given to qr-algorithm.
  Square roots of weights are used because in qr the difference
  w*A-w*b is squared.
\return Algorithm returns zero if successful, 1 if arguments are inappropriate.
 
*/
int qr_weight(int N, int M, double **A, double *b, double *weight) {
  int n, m;
  double *w;

  /* Check the arguments */
  if(N<1 || M<1 || A==NULL || b==NULL || weight==NULL)
    return(1);

  /* Allocate memory */
  w=(double*)malloc(M*sizeof(double));
  if(w==NULL)
    return(2);

  /* Check that weights are not zero and get the square roots of them to w[] */
  for(m=0; m<M; m++) {
    if(weight[m]<=1.0e-20)
      w[m]=0.0;
    else
      w[m]=sqrt(weight[m]);
  }

  /* Multiply rows of matrix A and elements of vector b with weights*/
  for(m=0; m<M; m++) {
    for(n=0; n<N; n++) {
      A[m][n]*=w[m];
    }
    b[m]*=w[m];
  }

  free(w);
  return(0);
}
/****************************************************************************/
