/** @file simplex.c
 *  @brief Downhill simplex nonlinear optimization.
 */
/*****************************************************************************/
#include "tpcclibConfig.h"
/*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <string.h>
/*****************************************************************************/
#include "tpcextensions.h"
/*****************************************************************************/
#include "tpcnlopt.h"
/*****************************************************************************/

/*****************************************************************************/
/** Nelder-Mead (downhill simplex) optimization. 

    If the number of free parameters (not fixed) is one, then bracketing method is used instead
    of downhill simplex.

    @pre Initiate the contents of the nlo data structure.
    @post The last objective function call is usually not done with the best parameter estimates;
    if objective function simulates data that you need, you must call the function with the final
    parameters.
    @return enum tpcerror (TPCERROR_OK when successful).
    @author Vesa Oikonen
    @sa nlopt1D, nloptPowellBrent, nloptSimplexARRS
 */
int nloptSimplex(
  /** Pointer to NLOPT structure. 
      @pre Initial guess must be given in x[].
      Initial step sizes must be given in xdelta[]. 
      Constraints xlower[] and xupper[] are required, but since the
      implementation is very simplified, limits should be as wide as possible. 
      Parameter tolerances are used as stopping criteria: Simplex stops if centroid differs from 
      best point less than xtol[], for each parameter; if set to zero, then only stopping rule is 
      the iteration number.
   */
  NLOPT *nlo,
  /** Maximum number of iterations; set to zero to use the default. */
  unsigned int maxIter,
  /** Pointer to status data; enter NULL if not needed. */
  TPCSTATUS *status
) {
  FILE *fp=stdout;
  int verbose=0; if(status!=NULL) {verbose=status->verbose; fp=status->fp;}
  if(verbose>1) fprintf(fp, "%s(NLOPT, %d, status)\n", __func__, maxIter);
  if(nlo==NULL) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_FAIL);
    return TPCERROR_FAIL;
  }
  if(nlo->totalNr<1 || nlo->xfull==NULL || nlo->_fun==NULL) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
    return TPCERROR_NO_DATA;
  }

  /* Check that initial values are inside limits */
  for(unsigned int i=0; i<nlo->totalNr; i++)
    if(nlo->xfull[i]<nlo->xlower[i] || nlo->xfull[i]>nlo->xupper[i] ||
       !isfinite(nlo->xfull[i]) || !isfinite(nlo->xlower[i]) || !isfinite(nlo->xupper[i]))
    {
      if(verbose>2) {
        fprintf(fp, "parameter %d failed: %e <= %e <= %e\n",
                1+i, nlo->xlower[i], nlo->xupper[i], nlo->xfull[i]);
        fflush(fp);
      }
      statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_INVALID_VALUE);
      return TPCERROR_INVALID_VALUE;
    }

  /* If only one parameter free for fitting, then use 1-dimensional method instead */
  if(nlo->totalNr<2 || nlo->totalNr-nloptFixedNr(nlo)<2) {
    if(verbose>2) fprintf(fp, "going for one-dimensional method.\n");
    return(nlopt1D(nlo, maxIter, NULL));
  }

  /* Initiate the simplex */
  if(verbose>10) fprintf(fp, "Simplex initialization\n");
  unsigned int parNr=nlo->totalNr;
  unsigned int sn=parNr+1; // sn>=3 because parNr >=2 verified before
  double simplexP[sn+2][parNr]; // including room for first and second new point
  double simplexC[parNr];
  double simplexR[sn+2]; // including room for first and second new point
  for(unsigned int i=0; i<(sn+2); i++) simplexR[i]=nan("");
  if(verbose>16) fprintf(fp, "simplexR[0..%d]\n", sn+2);
  unsigned int newPnt=sn;
  unsigned int newPnt2=newPnt+1;
  /* Check the maximum iteration number */
  if(maxIter<2) maxIter=500;

  /* Fill with initial guesses */
  for(unsigned int j=0; j<sn; j++)
    for(unsigned int i=0; i<parNr; i++)
      simplexP[j][i]=nlo->xfull[i];

  for(unsigned int j=1; j<sn; j++) { // initial guess is kept as such in initial simplex
    for(unsigned int i=0; i<parNr; i++) {
      simplexP[j][i] = simplexP[j-1][i];
      if((j-1)==i && nlo->xupper[i]>nlo->xlower[i]) {
        simplexP[j][i] += nlo->xdelta[i];
        if(simplexP[j][i]<nlo->xlower[i] || simplexP[j][i]>nlo->xupper[i]) {
          /* parameter would exceed limits, try the other direction */
          simplexP[j][i] -= 2.0*nlo->xdelta[i];
          /* check again */
          if(simplexP[j][i]<nlo->xlower[i] || simplexP[j][i]>nlo->xupper[i]) {
            /* stupid limits or delta */
            if(verbose>2) {
              fprintf(fp, "failed limits %e,%e or delta: %e\n", nlo->xlower[i], nlo->xupper[i], nlo->xdelta[i]);
              fflush(fp);
            }
            statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_INVALID_VALUE);
            return TPCERROR_INVALID_VALUE;
          }
        }
      }
    }
  }
  /* Calculate initial simplex function values */
  for(unsigned int j=0; j<sn; j++) {
    if(verbose>13) {
      fprintf(fp, "x[%d][]=", j);
      for(unsigned int i=0; i<parNr; i++) fprintf(fp, "%g ", simplexP[j][i]);
      fprintf(fp, "\n");
    }
    simplexR[j] = (*nlo->_fun)(parNr, simplexP[j], nlo->fundata);
    nlo->funCalls++;
    if(verbose>12) fprintf(fp, "  R[%d]=%g\n", j, simplexR[j]);
    if(nlo->usePList) {
      int ret=nloptAddP(nlo, simplexP[j], simplexR[j]);
      if(ret!=TPCERROR_OK) {
        statusSet(status, __func__, __FILE__, __LINE__, ret);
        return(ret);
      }
    }
  }
  double initialR=simplexR[0];

  /* Find the max and min response measured */
  unsigned int best=0, nextBest=0, worst=0;
  {
    double max, min, min2;
    max=min=simplexR[0]; best=worst=0; // initial guess
    for(unsigned int j=1; j<sn; j++) {
      if(!isfinite(max) || simplexR[j] > max) {max=simplexR[j]; worst=j;}
      if(!isfinite(min) || simplexR[j] < min) {min=simplexR[j]; best=j; }
    }
    /* Find the 2nd best, too */
    min2=max; nextBest=worst;
    for(unsigned int j=0; j<sn; j++)
      if(j!=best && (simplexR[j] < min2)) {min2=simplexR[j]; nextBest=j;}
    if(verbose>14) {
      fprintf(fp, "  best := %g (%d)\n", min, best);
      fprintf(fp, "  next best := %g (%d)\n", min2, nextBest);
      fprintf(fp, "  worst := %g (%d)\n", max, worst);
    }
  }

  /* Simplex iterations */
  if(verbose>10) fprintf(fp, "Simplex iterations\n");
  double prevBestR=simplexR[best];
  unsigned int iterNr=0, stopTolerance=0, stopR=0;
  do {
    iterNr++;
    if(verbose>13) {fprintf(fp, "\niteration := %d\n", iterNr); fflush(fp);}
    if(verbose>16) {
      for(unsigned int j=0; j<sn; j++) {
        fprintf(fp, "simplexP[%d][]=", j);
        for(unsigned int i=0; i<parNr; i++) fprintf(fp, "%g ", simplexP[j][i]);
        fprintf(fp, " --> %g ", simplexR[j]);
        if(j==best) fprintf(fp, " (best)\n");
        else if(j==nextBest) fprintf(fp, " (next best)\n");
        else if(j==worst) fprintf(fp, " (worst)\n");
        else fprintf(fp, "\n");
        fflush(fp);
      }
    }
    /* Calculate centroid of all measurements except the worst */
    for(unsigned int i=0; i<parNr; i++) {
      simplexC[i]=0.0; unsigned int n=0;
      for(unsigned int j=0; j<sn; j++)
        if(j!=worst && isfinite(simplexP[j][i])) {simplexC[i]+=simplexP[j][i]; n++;}
      simplexC[i]/=(double)(n);
      //Centroid is allowed to step over limits, because it is only used to move points
    }
    if(verbose>15) {
      fprintf(fp, "centroid x[]=");
      for(unsigned int i=0; i<parNr; i++) fprintf(fp, "%g ", simplexC[i]);
      fprintf(fp, "\n"); fflush(fp);
    }

    /* Stopping rule: If simplex points do not differ more than tolerance, then that can mean
       the end is near */
    {
      unsigned int k;
      for(k=0; k<parNr; k++) {
        if(fabs(simplexC[k]-simplexP[worst][k])>0.5*nlo->xtol[k]) break;
        if(fabs(simplexC[k]-simplexP[best][k])>0.5*nlo->xtol[k]) break;
        if(fabs(simplexC[k]-simplexP[nextBest][k])>0.5*nlo->xtol[k]) break;
      }
      if(k==parNr) stopTolerance++; else stopTolerance=0;
    }
    /* If two stopping rules are filled at the same time, then stop */
    if(stopTolerance>=parNr && stopR>=parNr) { // break the do - while loop
      if(verbose>2) {fprintf(fp, "  stopping because simplex is not improving.\n"); fflush(fp);}
      break;
    }
    /* Create new point as centroid reflected away from worst */
    double f=1.0;
    for(unsigned int i=0; i<parNr; i++)
      simplexP[newPnt][i] = simplexC[i] + f*(simplexC[i]-simplexP[worst][i]);
    nloptForceLimits(parNr, nlo->xlower, nlo->xupper, simplexP[newPnt]);
    /* and compute the response */
    simplexR[newPnt] = (*nlo->_fun)(parNr, simplexP[newPnt], nlo->fundata);
    nlo->funCalls++;
    if(verbose>15) {
      fprintf(fp, "new point x[%d][]=", newPnt);
      for(unsigned int i=0; i<parNr; i++) fprintf(fp, "%g ", simplexP[newPnt][i]);
      fprintf(fp, "\n  -> R[%d]=%g\n", newPnt, simplexR[newPnt]);
    }
    if(nlo->usePList) {
      int ret=nloptAddP(nlo, simplexP[newPnt], simplexR[newPnt]);
      if(ret!=TPCERROR_OK) {statusSet(status, __func__, __FILE__, __LINE__, ret); return(ret);}
    }
    /* Depending on the response, decide what to do next */
    if(simplexR[newPnt] < simplexR[best]) {
      if(verbose>14) fprintf(fp, "  new is better than best\n");
      /* If this new point is better than previous best, then expand in this direction */
      f=2.0;
      for(unsigned int i=0; i<parNr; i++)
        simplexP[newPnt2][i] = simplexC[i] + f*(simplexC[i]-simplexP[worst][i]);
      nloptForceLimits(parNr, nlo->xlower, nlo->xupper, simplexP[newPnt2]);
      simplexR[newPnt2] = (*nlo->_fun)(parNr, simplexP[newPnt2], nlo->fundata);
      nlo->funCalls++;
      if(nlo->usePList) {
        int ret=nloptAddP(nlo, simplexP[newPnt2], simplexR[newPnt2]);
        if(ret!=TPCERROR_OK) {statusSet(status, __func__, __FILE__, __LINE__, ret); return(ret);}
      }
      if(simplexR[newPnt2] < simplexR[newPnt]) {
        if(verbose>14) fprintf(fp, "  expanded (%g) is better than new\n", simplexR[newPnt2]);
        for(unsigned int i=0; i<parNr; i++) simplexP[worst][i]=simplexP[newPnt2][i];
        simplexR[worst]=simplexR[newPnt2];
      } else {
        if(verbose>14) fprintf(fp, "  expanded (%g) is no better than new\n", simplexR[newPnt2]);
        for(unsigned int i=0; i<parNr; i++) simplexP[worst][i]=simplexP[newPnt][i];
        simplexR[worst]=simplexR[newPnt];
      }
    } else if(!isfinite(simplexR[newPnt]) || simplexR[newPnt] > simplexR[worst]) {
      if(verbose>14) fprintf(fp, "  new is worse than worst\n");
      /* If new point is worse than previous worst, measure point halfway between 
         the worst and centroid */
      f=-0.5;
      for(unsigned int i=0; i<parNr; i++)
        simplexP[newPnt2][i] = simplexC[i] + f*(simplexC[i]-simplexP[worst][i]);
      nloptForceLimits(parNr, nlo->xlower, nlo->xupper, simplexP[newPnt2]);
      simplexR[newPnt2] = (*nlo->_fun)(parNr, simplexP[newPnt2], nlo->fundata);
      nlo->funCalls++;
      if(nlo->usePList) {
        int ret=nloptAddP(nlo, simplexP[newPnt2], simplexR[newPnt2]);
        if(ret!=TPCERROR_OK) {statusSet(status, __func__, __FILE__, __LINE__, ret); return(ret);}
      }
      if(simplexR[newPnt2] < simplexR[newPnt] && isfinite(simplexR[newPnt2])) {
        if(verbose>14) fprintf(fp, "  halfway (%g) is better than new\n", simplexR[newPnt2]);
        for(unsigned int i=0; i<parNr; i++) simplexP[worst][i]=simplexP[newPnt2][i];
        simplexR[worst]=simplexR[newPnt2];
      } else if(isfinite(simplexR[newPnt])) {
        if(verbose>14) fprintf(fp, "  halfway (%g) is no better than new\n", simplexR[newPnt2]);
        for(unsigned int i=0; i<parNr; i++) simplexP[worst][i]=simplexP[newPnt][i];
        simplexR[worst]=simplexR[newPnt];
      }
    } else if(simplexR[newPnt] > simplexR[nextBest]) {
      if(verbose>14) fprintf(fp, "  new is worse than next best\n");
      /* If newest response is worse than next best point but better than worst, 
         measure response halfway between centroid and the newest point */
      f=0.5;
      for(unsigned int i=0; i<parNr; i++)
        simplexP[newPnt2][i] = simplexC[i] + f*(simplexP[newPnt][i]-simplexC[i]);
      nloptForceLimits(parNr, nlo->xlower, nlo->xupper, simplexP[newPnt2]);
      simplexR[newPnt2] = (*nlo->_fun)(parNr, simplexP[newPnt2], nlo->fundata);
      nlo->funCalls++;
      if(nlo->usePList) {
        int ret=nloptAddP(nlo, simplexP[newPnt2], simplexR[newPnt2]);
        if(ret!=TPCERROR_OK) {statusSet(status, __func__, __FILE__, __LINE__, ret); return(ret);}
      }
      if(simplexR[newPnt2] < simplexR[newPnt]) {
        if(verbose>14) fprintf(fp, "  halfway (%g) is better than new\n", simplexR[newPnt2]);
        for(unsigned int i=0; i<parNr; i++) simplexP[worst][i]=simplexP[newPnt2][i];
        simplexR[worst]=simplexR[newPnt2];
      } else if(isfinite(simplexR[newPnt])) {
        if(verbose>14) fprintf(fp, "  halfway (%g) is worse than new\n", simplexR[newPnt2]);
        for(unsigned int i=0; i<parNr; i++) simplexP[worst][i]=simplexP[newPnt][i];
        simplexR[worst]=simplexR[newPnt];
      }
    } else if(isfinite(simplexR[newPnt])) {
      if(verbose>14) fprintf(fp, "  new is worse than the best but better than the next best\n");
      /* If none of the above is true, then replace the second best point with this */
      for(unsigned int i=0; i<parNr; i++) simplexP[worst][i]=simplexP[newPnt][i];
      simplexR[worst]=simplexR[newPnt];
    }

    /* Find the max and min response measured */
    {
      double max, min, min2;
      max=min=simplexR[0]; best=worst=0;
      for(unsigned int j=1; j<sn; j++) {
        if(!isfinite(max) || simplexR[j] > max) {max=simplexR[j]; worst=j;}
        if(!isfinite(min) || simplexR[j] < min) {min=simplexR[j]; best=j; }
      }
      /* Find the 2nd best, too */
      min2=max; nextBest=worst;
      for(unsigned int j=0; j<sn; j++)
        if(j!=best && (simplexR[j] < min2)) {min2=simplexR[j]; nextBest=j;}
      if(verbose>14) {
        fprintf(fp, "  best := %g (%d)\n", min, best);
        fprintf(fp, "  next best := %g (%d)\n", min2, nextBest);
        fprintf(fp, "  worst := %g (%d)\n", max, worst);
      }
    }

    /* Stopping rule: count how many consequential times the R has not improved */
    if(simplexR[best]<prevBestR) stopR=0; else stopR++;
    prevBestR=simplexR[best];

  } while(iterNr<maxIter);

  if(verbose>2) {
    if(iterNr>=maxIter) fprintf(fp, "  stopped because max iterations reached.\n");
    fprintf(fp, "  %d iterations\n", iterNr);
    fprintf(fp, "  R[%d]=%g\n", best, simplexR[best]);
    fflush(fp);
  }

#if(0)
  /* One more function call with the best parameters, to make sure that
     if objective function simulates data, it is simulated with the best fit */
   simplexR[best] = (*nlo->_fun)(parNr, simplexP[best], nlo->fundata);
#endif

  /* Check the objective function value */
  if(!isfinite(simplexR[best])) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_BAD_FIT);
    return(TPCERROR_BAD_FIT);
  }

  /* Copy optimized parameters over initial values */
  if(simplexR[best]<=initialR) {
    for(unsigned int i=0; i<parNr; i++) nlo->xfull[i]=simplexP[best][i];
    nlo->funval=simplexR[best];
  } else {
    /* This happens only in case of a bug */
    if(1 || verbose>0) fprintf(fp, "nloptSimplex() gives worse R than initially!\n");
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_BAD_FIT);
    return(TPCERROR_BAD_FIT);
  }

  statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_OK);
  return TPCERROR_OK;
}
/*****************************************************************************/

/*****************************************************************************/
/** Accumulative Restarting Random start-point Nelder-Mead (downhill simplex) optimization. 

    @pre Uses rand(), therefore set seed for a new series of pseudo-random numbers; 
    to produce truly random numbers (not just pseudo-random), do srand(time(0)) before 
    calling this function.
    @return enum tpcerror (TPCERROR_OK when successful).
    @author Vesa Oikonen
    @sa nloptSimplex, nlopt1D, nloptPowellBrent
 */
int nloptSimplexARRS(
  /** Pointer to NLOPT structure. 
      @pre Constraints xlower[] and xupper[] are required. 
      Parameter tolerances xtol[] are required, and used as stopping criteria.
      Other stopping criteria are the iteration number and the NLOPT maxFunCalls.
      Initial guess can be given in x[], but it is not obligatory.
      Initial step sizes (xdelta[]) are not used. 
      @sa nloptInit, nloptFree, nloptAllocate
   */
  NLOPT *nlo,
  /** Maximum number of iterations; set to zero to use the default. */
  unsigned int maxIter,
  /** Pointer to status data; enter NULL if not needed. */
  TPCSTATUS *status
) {
  FILE *fp=stdout;
  int verbose=0; if(status!=NULL) {verbose=status->verbose; fp=status->fp;}
//verbose=10;

  if(verbose>1) fprintf(fp, "%s(NLOPT, %d, status)\n", __func__, maxIter);
  if(nlo==NULL) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_FAIL); return TPCERROR_FAIL;}
  if(nlo->totalNr<1 || nlo->xfull==NULL || nlo->_fun==NULL) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA); return TPCERROR_NO_DATA;}

  /* Check if any of the parameters are fixed */
  unsigned int fixedParNr=nloptLimitFixedNr(nlo);
  if(verbose>2 && fixedParNr>0) fprintf(fp, "fixedParNr := %d\n", fixedParNr);
  unsigned int fittedParNr=nlo->totalNr-fixedParNr;
  if(verbose>2) fprintf(fp, "fittedParNr := %d\n", fittedParNr);
  if(fittedParNr<1) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_INVALID_VALUE); 
    return TPCERROR_INVALID_VALUE;
  }

  /* Check the tolerations */
  for(unsigned int i=0; i<nlo->totalNr; i++) {
    if(nlo->xlower[i]>=nlo->xupper[i]) {nlo->xtol[i]=0.0; continue;}
    if(!(nlo->xtol[i]>0.0)) {
      if(verbose>0) {fprintf(stderr, "Error: invalid xtol[].\n"); fflush(stderr);}
      statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_INVALID_VALUE);
      return TPCERROR_INVALID_VALUE;
    }
  }


  /* Set maxIter, if necessary */
  if(maxIter==0) maxIter=3+fittedParNr;
  if(verbose>2) {fprintf(fp, "maxIter := %u\n", maxIter); fflush(fp);}

  /* Store the original tolerances, and use smaller tolerance with downhill simplex */
  unsigned int dim=nlo->totalNr;
  double tol[dim];
  for(unsigned int i=0; i<dim; i++) {tol[i]=nlo->xtol[i]; nlo->xtol[i]*=0.1;}

  /*
   *  Set initial xdelta based on parameter limits and tolerances
   */
  for(unsigned int i=0; i<dim; i++) {
    double d=nlo->xupper[i]-nlo->xlower[i];
    if(d>0.0) nlo->xdelta[i]=2.0*tol[i]+0.15*d; else nlo->xdelta[i]=0.0;
  }

  /*
   *  Initialize the sample list with random points
   */

  /* Allocate memory */
  unsigned int sampleNr=30*fittedParNr;
  if(verbose>2) {fprintf(fp, "sampleNr := %u\n", sampleNr); fflush(fp);}
  nlo->usePList=1; nlo->funCalls=0;
  nlo->plist=(double*)malloc(sampleNr*(dim+1)*sizeof(double));
  if(nlo->plist==NULL) { // will be freed with nloptFree()
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_OUT_OF_MEMORY);
    return TPCERROR_OUT_OF_MEMORY;
  }

  /* If initial guess was given by user, create random points with Gaussian distribution around it */
  int initGuessAvailable=1;
  for(unsigned int i=0; i<nlo->totalNr; i++) {
    if(!isfinite(nlo->xfull[i])) {initGuessAvailable=0; break;}
    if(nlo->xfull[i]<nlo->xlower[i]) {initGuessAvailable=0; break;}
    if(nlo->xfull[i]>nlo->xupper[i]) {initGuessAvailable=0; break;}
  }
  nlo->funval=(*nlo->_fun)(dim, nlo->xfull, nlo->fundata);
  if(isfinite(nlo->funval)) {
    if(verbose>2) {
      fprintf(fp, "Initial guess: ");
      for(unsigned int i=0; i<dim; i++) fprintf(fp, "%g ", nlo->xfull[i]);
      fprintf(fp, "=> %e\n", nlo->funval); fflush(fp);
    }
    doubleCopy(nlo->plist, nlo->xfull, dim);
    nlo->plist[dim]=nlo->funval; nlo->funCalls++;
  } else {
    if(verbose>2) {fprintf(fp, "invalid initial guess\n"); fflush(fp);}
    initGuessAvailable=0;
  }
  if(initGuessAvailable) {
    for(unsigned int pi=1; pi<sampleNr; pi++) {
      nloptGaussianPoint(&nlo->plist[pi*(dim+1)], nlo->plist, nlo->xdelta, 
                         nlo->xlower, nlo->xupper, dim, NULL);
      nlo->plist[pi*(dim+1)+dim]=(*nlo->_fun)(dim, &nlo->plist[pi*(dim+1)], nlo->fundata);
      nlo->funCalls++;
    }
  }

  /* If valid initial point was not given, fill the list with random points */
  if(!initGuessAvailable) {
    for(unsigned int pi=0; pi<sampleNr; pi++) {
      nloptRandomPoint(&nlo->plist[pi*(dim+1)], nlo->xlower, nlo->xupper, dim, NULL);
      nlo->plist[pi*(dim+1)+dim]=(*nlo->_fun)(dim, &nlo->plist[pi*(dim+1)], nlo->fundata);
      nlo->funCalls++;
    }
  }

  /* Sort samples based on the evaluated function value */
  if(nloptSortP(nlo)!=TPCERROR_OK) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_FAIL); return TPCERROR_FAIL;}
  if(verbose>10) nloptPrintP(nlo, 0, fp);

  /* Downhill simplex from the best point so far */
  doubleCopy(nlo->xfull, nlo->plist, dim);
  if(verbose>4) {
    fprintf(fp, "LO: initial point with deltas:\n");
    for(unsigned int i=0; i<dim; i++) fprintf(fp, "\t%e\t%e\n", nlo->xfull[i], nlo->xdelta[i]);
  }
  if(nloptSimplex(nlo, 100*dim, status)!=TPCERROR_OK) {
    if(verbose>1) {fprintf(fp, "  LO failed\n"); fflush(fp);}
    return(TPCERROR_BAD_FIT);
  } else {
    if(verbose>4) {
      fprintf(fp, "Point after LO:");
      for(unsigned int i=0; i<dim; i++) fprintf(fp, " %e ", nlo->xfull[i]);
      fprintf(fp, " => %e\n", nlo->funval); fflush(fp);
    }
    if(verbose>5) fprintf(fp, "funCalls=%d\n", nlo->funCalls);
  }

  /*
   *  Start iterations
   */
  unsigned int iterNr=0; // loop index
  while(iterNr<maxIter && nlo->funCalls<nlo->maxFunCalls) {
    iterNr++; 
    if(verbose>4) {fprintf(fp, "-----------------------------\nIteration %d\n", iterNr); fflush(fp);}

    /* Sort samples based on the evaluated function value */
    if(nloptSortP(nlo)!=TPCERROR_OK) {
      statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_FAIL); return TPCERROR_FAIL;}
    if(verbose>12) nloptPrintP(nlo, 20, fp);
    if(verbose>5) {
      fprintf(fp, "best point so far:");
      for(unsigned int i=0; i<dim; i++) fprintf(fp, " %e", nlo->plist[i]);
      fprintf(fp, " => %e\n", nlo->plist[dim]); fflush(fp);
    }

    /* Calculate the parameter means and SDs from the best part of points so far */
    if(nloptMeanP(nlo, nlo->funCalls/(1+iterNr), nlo->xfull, nlo->xdelta)!=TPCERROR_OK) {
      statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_BAD_FIT); return TPCERROR_BAD_FIT;}
    /* Make sure that SD is not zero, unless parameter is fixed */
    for(unsigned int i=0; i<dim; i++) {
      if(nlo->xupper[i]>nlo->xlower[i]) {
        if(nlo->xdelta[i]<0.1*nlo->xtol[i]) nlo->xdelta[i]=drandExponential(0.05*nlo->xtol[i]);
      } else {
        nlo->xdelta[i]=0.0;
      }
    }

    /* Downhill simplex from the best point so far */
    doubleCopy(nlo->xfull, nlo->plist, dim);
    if(verbose>6) {
      fprintf(fp, "LO: initial point with deltas:\n");
      for(unsigned int i=0; i<dim; i++) fprintf(fp, "\t%e\t%e\n", nlo->xfull[i], nlo->xdelta[i]);
    }
    if(nloptSimplex(nlo, 100*dim, status)!=TPCERROR_OK) {
      if(verbose>1) {fprintf(fp, "  LO failed\n"); fflush(fp);}
      break;
    } else {
      if(verbose>6) {
        fprintf(fp, "Point after LO:");
        for(unsigned int i=0; i<dim; i++) fprintf(fp, " %e ", nlo->xfull[i]);
        fprintf(fp, " => %e\n", nlo->funval); fflush(fp);
      }
    }

    if(verbose>5) fprintf(fp, "funCalls=%d\n", nlo->funCalls);

    /* If SDs were smaller than tolerances, then stop */
    unsigned int i; for(i=0; i<dim; i++) if(fabs(nlo->xdelta[i])>tol[i]) break;
    if(i==dim) {
      if(verbose>1) fprintf(fp, "\n Required tolerance reached.\n");
      break;
    }

  } // next iteration

  /* Check the reason for loop exist */
  if(iterNr>=maxIter) {
    if(verbose>1) fprintf(fp, "\n Exceeded the maximum number for loops.\n");
  }
  if(nlo->funCalls>=nlo->maxFunCalls) {
    if(verbose>1) fprintf(fp, "\n Exceeded the maximum number for function calls.\n");
  }

  /* Sort samples based on the evaluated function value */
  if(nloptSortP(nlo)!=TPCERROR_OK) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_FAIL); return TPCERROR_FAIL;}

  /* Get the best point so far */
  for(unsigned int i=0; i<dim; i++) nlo->xfull[i]=nlo->plist[i];
  nlo->funval=nlo->plist[dim];

  /* Put back the original tolerances */
  for(unsigned int i=0; i<dim; i++) nlo->xtol[i]=tol[i];

  statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_OK);
  return TPCERROR_OK;
}
/*****************************************************************************/

/*****************************************************************************/
