/** @file mpso.c
    @brief Multi particle swarm optimization.
    @copyright (c) Turku PET Centre
    @todo More testing.
 */
/*****************************************************************************/
#include "tpcclibConfig.h"
/*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <string.h>
/*****************************************************************************/
#include "tpcextensions.h"
#include "tpcrand.h"
#include "tpcstatist.h"
/*****************************************************************************/
#include "tpcnlopt.h"
/*****************************************************************************/

/*****************************************************************************/
/** MPSO particle */
typedef struct MPSO_PARTICLE {
  /** Particle position */
  double *position;
  /** Particle velocity */
  double *velocity;
  /** Cost */
  double cost;
  /** Best-ever position */
  double *bestPosition;
  /** Best-ever cost */
  double bestCost;
  /** How long ago the bestCost was found */
  unsigned int since;
} MPSO_PARTICLE;

/** MPSO swarm */
typedef struct MPSO_SWARM {
  /** Particles */
  MPSO_PARTICLE *particle;
  /** Best-ever swarm position */
  double *bestPosition;
  /** Best-ever swarm cost */
  double bestCost;
  /** How long ago the bestCost was found */
  unsigned int since;
} MPSO_SWARM;

/** MPSO multi-swarm */
typedef struct MPSO_MULTISWARM {
  /** Number of swarms */
  unsigned int sn;
  /** Number of particles */
  unsigned int pn;
  /** Dimension (number of parameters) */
  unsigned int dim;
  /** List of Swarms */
  MPSO_SWARM *swarm;
  /** Best-ever multi-swarm position */
  double *bestPosition;
  /** Best-ever multi-swarm cost */
  double bestCost;
  /** How long ago the bestCost was found */
  unsigned int since;
  /** Data structure needed by Mersenne Twister MT19937.
      @sa mertwiInit, mertwiInitWithSeed64, mertwiRandomDouble1
   */
  MERTWI mt;
} MPSO_MULTISWARM;
/*****************************************************************************/

/*****************************************************************************/
/** Calculate the MPSO swarm particle mean and SD */
void nloptMPSOpMean(
  /** Pointer to MPSO data structure. */
  MPSO_MULTISWARM *ms,
  /** Index [0..n-1] of swarm to analyze. */
  unsigned int si,
  /** Pointer to array[ms.dim] for writing means into; NULL if not needed. */
  double *mean,
  /** Pointer to array[ms.dim] for writing SDs into; NULL if not needed. */
  double *sd
) {
  if(ms==NULL || ms->dim<1 || ms->sn<1 || ms->pn<1 || si>=ms->sn) return;
  for(unsigned int i=0; i<ms->dim; i++) {
    double a[ms->pn];
    for(unsigned int pi=0; pi<ms->pn; pi++) a[pi]=ms->swarm[si].particle[pi].position[i];
    double *amean=NULL, *asd=NULL;
    if(mean!=NULL) amean=mean+i;
    if(sd!=NULL) asd=sd+i;
    statMeanSD(a, ms->pn, amean, asd, NULL);
  }
}
/** Calculate the mean and SD of absolute MPSO swarm particle velocity */
void nloptMPSOabsvMean(
  /** Pointer to MPSO data structure. */
  MPSO_MULTISWARM *ms,
  /** Index [0..n-1] of swarm to analyze. */
  unsigned int si,
  /** Pointer to array[ms.dim] for writing means of absolute velocities into; NULL if not needed. */
  double *mean,
  /** Pointer to array[ms.dim] for writing SDs of absolute velocities into; NULL if not needed. */
  double *sd
) {
  if(ms==NULL || ms->dim<1 || ms->sn<1 || ms->pn<1 || si>=ms->sn) return;
  for(unsigned int i=0; i<ms->dim; i++) {
    double a[ms->pn];
    for(unsigned int pi=0; pi<ms->pn; pi++) a[pi]=fabs(ms->swarm[si].particle[pi].velocity[i]);
    double *amean=NULL, *asd=NULL;
    if(mean!=NULL) amean=mean+i;
    if(sd!=NULL) asd=sd+i;
    statMeanSD(a, ms->pn, amean, asd, NULL);
  }
}
/*****************************************************************************/

/*****************************************************************************/
/** Multi particle swarm optimization (MPSO).
    @post The last objective function call is usually not done with the best parameter estimates;
    if objective function simulates data that you need, you must call the function with the final
    parameters.
    @return enum tpcerror (TPCERROR_OK when successful).
    @author Vesa Oikonen
    @sa nloptSimplex, nloptIATGO
    @todo Better stopping criteria.
 */
int nloptMPSO(
  /** Pointer to NLOPT data. 
      Counter funCalls is initially set to zero, and then increased here.
      Parameter maxFunCalls is used as one of the stopping criteria.
      Parameters xtol[] is used as one of the stopping criteria.
   */
  NLOPT *nlo,
  /** Maximum number of iterations; set to zero to use the default; 10 is the minimum. */
  unsigned int maxIter,
  /** Number of swarms; set to zero to use the default; 2 is the minimum. */
  unsigned int nSwarms,
  /** Number of particles per swarm; set to zero to use the default; 5 is the minimum. */
  unsigned int nParticles,
  /** Inertia; weight (0-1) for keeping particles own velocity and direction;
      enter NaN to use the default. */
  double wInertia,
  /** Particle independence; weight how much particle is drawn towards the best point in its own
      memory; enter NaN to use the default. */
  double wParticle,
  /** Gravitation to the swarm; weight how much particle is drawn towards the best point in its own
      swarm memory; enter NaN to use the default. */
  double wSwarm,
  /** Gravitation to the global optimum; weight how much particle is drawn towards the best point 
      the memory of all the swarms. Set to zero for keeping swarms independent on each other. */
  double wGlobal,
  /** Probability of particle death and rebirth at random position; must be less than 0.1. */
  double pDeath,
  /** Probability of two particles switching swarms; must be less than 0.1. */
  double pImmigration,
  /** Use local optimization (Nelder-Mead downhill simplex) intermittently; using it (1) does not 
      much increase the speed than not using it (0), but may increase the success rate. */
  const int doLocal,
  /** Pointer to status data; enter NULL if not needed. */
  TPCSTATUS *status
) {
  FILE *fp=stdout;
  int verbose=0; if(status!=NULL) {verbose=status->verbose; fp=status->fp;}
  if(verbose>0) {
    fprintf(fp, "\n%s(NLOPT, %d, status)\n", __func__, doLocal);
    fflush(fp);
  }

  if(nlo==NULL) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_FAIL);
    return TPCERROR_FAIL;
  }
  if(nlo->totalNr<1 || nlo->xfull==NULL || nlo->_fun==NULL) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
    return TPCERROR_NO_DATA;
  }
  if(nlo->maxFunCalls<100) {
    if(verbose>0) {fprintf(stderr, "Error: too low limit for function calls.\n"); fflush(stderr);}
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_INVALID_VALUE);
    return TPCERROR_INVALID_VALUE;
  }

  /* Check if any of the parameters are fixed */
  unsigned int dim=nlo->totalNr; // Nr of parameters
  unsigned int fixedParNr=nloptLimitFixedNr(nlo);
  if(verbose>1 && fixedParNr>0) fprintf(fp, "fixedParNr := %d\n", fixedParNr);
  unsigned int fittedParNr=nlo->totalNr-fixedParNr;
  if(verbose>2) fprintf(fp, "fittedParNr := %d\n", fittedParNr);
  if(fittedParNr<1) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
    return TPCERROR_NO_DATA;
  }

  /* Check the tolerations */
  for(unsigned int i=0; i<nlo->totalNr; i++) {
    if(nlo->xlower[i]>=nlo->xupper[i]) {nlo->xtol[i]=0.0; continue;}
    if(!(nlo->xtol[i]>0.0)) {
      if(verbose>0) {fprintf(stderr, "Error: invalid xtol[].\n"); fflush(stderr);}
      statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_INVALID_VALUE);
      return TPCERROR_INVALID_VALUE;
    }
  }



  if(verbose>2) fprintf(fp, "\nInitializing MPSO\n");

  if(maxIter<10) maxIter=1000; // max nr of loops (150) 
  if(nSwarms<2) nSwarms=2+fittedParNr; // nr of swarms (3)
  if(nParticles<5) nParticles=5+2*fittedParNr; // nr of particles (25)
  if(!(wInertia>0.0)) wInertia=0.333; // inertia (0.333)
  if(!(wParticle>0.0)) wParticle=1.00; // particle / cognitive (1.40)
  if(!(wSwarm>0.0)) wSwarm=1.60; // swarm / social (1.40)
  if(!isfinite(wGlobal)) wGlobal=0.40; // multi-swarm / global (0.40)
  if(!isfinite(pDeath) || pDeath>=0.1) // probability of particle death-rebirth (0.005)
    pDeath=0.005; else if(pDeath<0.0) pDeath=0.0;
  if(!isfinite(pImmigration) || pImmigration>=0.1) // probability of particle immigration (0.025)
    pImmigration=0.025; else if(pImmigration<0.0) pImmigration=0.0;
  /* Initiate the number of function calls */
  if(nlo->usePList) { // not supported with this function; would crash if used anyway
    nlo->usePList=0;
    if(nlo->funCalls>0) free(nlo->plist);
    nlo->plist=NULL;
  }
  nlo->funCalls=0; 

  unsigned int pFrustration=10; // Nr of non-successful movements when particle becomes frustrated
  unsigned int nFrustration=15; // Nr of non-successful movements when swarm becomes frustrated

  if(verbose>3) {
    fprintf(fp, "maxIter := %u\n", maxIter);
    fprintf(fp, "maxFunCalls := %u\n", nlo->maxFunCalls);
    fprintf(fp, "nSwarms := %u\n", nSwarms);
    fprintf(fp, "nParticles := %u\n", nParticles);
    fprintf(fp, "dim := %u\n", dim);
    fprintf(fp, "wInertia := %g\n", wInertia);
    fprintf(fp, "wParticle := %g\n", wParticle);
    fprintf(fp, "swarm/social (wSwarm) := %g\n", wSwarm);
    fprintf(fp, "multi-swarm/global (wGlobal) := %g\n", wGlobal);
    fprintf(fp, "probability of death := %g\n", pDeath);
    fprintf(fp, "probability of immigration := %g\n", pImmigration);
    fprintf(fp, "pFrustration := %u\n", pFrustration);
    fprintf(fp, "nFrustration := %u\n", nFrustration);
    fflush(fp);
  }

  /* Allocate memory */
  MPSO_MULTISWARM multi;
  multi.dim=dim; multi.sn=nSwarms; multi.pn=nParticles;
  multi.bestPosition=calloc(dim, sizeof(double));
  multi.swarm=calloc(nSwarms, sizeof(MPSO_SWARM));
  for(unsigned int si=0; si<nSwarms; si++) {
    multi.swarm[si].particle=calloc(nParticles, sizeof(MPSO_PARTICLE));
    multi.swarm[si].bestPosition=calloc(dim, sizeof(double));
    for(unsigned int pi=0; pi<nParticles; pi++) {
      multi.swarm[si].particle[pi].position=calloc(dim, sizeof(double));
      multi.swarm[si].particle[pi].velocity=calloc(dim, sizeof(double));
      multi.swarm[si].particle[pi].bestPosition=calloc(dim, sizeof(double));
    }
  }
  /* Initialize Mersenne Twister MT19937 */
  mertwiInit(&multi.mt); mertwiInitWithSeed64(&multi.mt, mertwiSeed64());



  /* Check if initial guess is valid */
  int initGuessAvailable=1;
  double initGuessCost=nan("");
  for(unsigned int i=0; i<dim; i++) {
    if(isnan(nlo->xfull[i])) {initGuessAvailable=0; break;}
    if(nlo->xfull[i]<nlo->xlower[i]) {initGuessAvailable=0; break;}
    if(nlo->xfull[i]>nlo->xupper[i]) {initGuessAvailable=0; break;}
  }
  if(initGuessAvailable) {
    initGuessCost=(*nlo->_fun)(dim, nlo->xfull, nlo->fundata); nlo->funCalls++;
    if(!isfinite(initGuessCost)) initGuessAvailable=0;
    else if(verbose>3) {
      fprintf(fp, "valid initial guess with cost=%g\n", initGuessCost); fflush(fp);
    }
  }

  int carpetFill=0;

  if(carpetFill) {
    /* Fill the swarms with random parameters and their costs */
    multi.bestCost=nan("");
    for(unsigned int si=0; si<nSwarms; si++) {
      for(unsigned int pi=0; pi<nParticles; pi++) {
        nloptRandomPoint(multi.swarm[si].particle[pi].position, nlo->xlower, nlo->xupper, dim, &multi.mt);
        nloptRandomPoint(multi.swarm[si].particle[pi].velocity, nlo->xlower, nlo->xupper, dim, &multi.mt);
        for(unsigned int i=0; i<dim; i++)
          multi.swarm[si].particle[pi].velocity[i]-=multi.swarm[si].particle[pi].position[i];
        multi.swarm[si].particle[pi].cost=
          (*nlo->_fun)(dim, multi.swarm[si].particle[pi].position, nlo->fundata);
        nlo->funCalls++;
        /* Currently, this is the best position of the particle */
        multi.swarm[si].particle[pi].bestCost=multi.swarm[si].particle[pi].cost;
        doubleCopy(multi.swarm[si].particle[pi].bestPosition, 
                   multi.swarm[si].particle[pi].position, dim);
        multi.swarm[si].particle[pi].since=0;
        /* Set the best cost in this swarm */
        if(pi==0) {
          if(si==0 && initGuessAvailable) { // if initial guess available, use it here
            multi.swarm[si].bestCost=initGuessCost;
            doubleCopy(multi.swarm[si].bestPosition, nlo->xfull, dim);
          } else {
            multi.swarm[si].bestCost=multi.swarm[si].particle[pi].cost;
            doubleCopy(multi.swarm[si].bestPosition, multi.swarm[si].particle[pi].position, dim);
          }
        } else {
          if(!isfinite(multi.swarm[si].bestCost) || 
             multi.swarm[si].bestCost>multi.swarm[si].particle[pi].cost)
          {
            multi.swarm[si].bestCost=multi.swarm[si].particle[pi].cost;
            doubleCopy(multi.swarm[si].bestPosition, multi.swarm[si].particle[pi].position, dim);
          }
        }
      } // next particle in this swarm
      /* Set the best cost of all swarms */
      if(si==0) {
        multi.bestCost=multi.swarm[si].bestCost;
        doubleCopy(multi.bestPosition, multi.swarm[si].bestPosition, dim);
      } else {
        if(!isfinite(multi.bestCost) || multi.bestCost>multi.swarm[si].bestCost) {
          multi.bestCost=multi.swarm[si].bestCost;
          doubleCopy(multi.bestPosition, multi.swarm[si].bestPosition, dim);
        }
      }
      multi.swarm[si].since=0;
    } // next swarm 
    multi.since=0;
  } else {
    /* For each swarm, make one random particle inside the limits */
    unsigned int si=0;
    if(initGuessAvailable) // if initial guess is available, use it as particle for first swarm
      doubleCopy(multi.swarm[si++].particle[0].position, nlo->xfull, dim);
    for(; si<nSwarms; si++)
      nloptRandomPoint(multi.swarm[si].particle[0].position, nlo->xlower, nlo->xupper, dim, &multi.mt);
    /* Then add other particles around it with Gaussian distribution */
    double sd[dim];
    for(unsigned int i=0; i<dim; i++) sd[i]=0.13*(nlo->xupper[i]-nlo->xlower[i]);
    for(unsigned int si=0; si<nSwarms; si++) {
      for(unsigned int pi=1; pi<nParticles; pi++) {
        nloptGaussianPoint(multi.swarm[si].particle[pi].position,
                           multi.swarm[si].particle[0].position, sd, nlo->xlower, nlo->xupper, dim,
                           &multi.mt);
      }
    }
    /* Add random velocity and direction for each particle */
    for(unsigned int i=0; i<dim; i++) sd[i]=0.23*(nlo->xupper[i]-nlo->xlower[i]);
    double mvel[dim]; for(unsigned int i=0; i<dim; i++) mvel[i]=0.0;
    for(unsigned int si=0; si<nSwarms; si++) {
      for(unsigned int pi=0; pi<nParticles; pi++) {
        nloptGaussianPoint(multi.swarm[si].particle[pi].velocity,
                           mvel, sd, NULL, NULL, dim, &multi.mt);
      }
    }
    /* Calculate the cost of each particle, and find the best positions */
    multi.bestCost=nan("");
    for(unsigned int si=0; si<nSwarms; si++) {
      multi.swarm[si].bestCost=nan("");
      for(unsigned int pi=0; pi<nParticles; pi++) {
        multi.swarm[si].particle[pi].cost=
          (*nlo->_fun)(dim, multi.swarm[si].particle[pi].position, nlo->fundata);
        nlo->funCalls++;
        /* Currently, this is the best position of the particle */
        multi.swarm[si].particle[pi].bestCost=multi.swarm[si].particle[pi].cost;
        doubleCopy(multi.swarm[si].particle[pi].bestPosition, 
                   multi.swarm[si].particle[pi].position, dim);
        multi.swarm[si].particle[pi].since=0;
        /* How about best in the swarm? */
        if(!isfinite(multi.swarm[si].bestCost) || 
           multi.swarm[si].bestCost>multi.swarm[si].particle[pi].cost)
        {
          multi.swarm[si].bestCost=multi.swarm[si].particle[pi].cost;
          doubleCopy(multi.swarm[si].bestPosition, 
                     multi.swarm[si].particle[pi].position, dim);
        }
      }
      /* Does this swarm has the best particle of all? */
      if(!isfinite(multi.bestCost) || multi.bestCost>multi.swarm[si].bestCost) {
        multi.bestCost=multi.swarm[si].bestCost;
        doubleCopy(multi.bestPosition, multi.swarm[si].bestPosition, dim);
      }
      multi.swarm[si].since=0;
    }
    multi.since=0;
  }


  /* Inside the loop, save the previous best position to check if fit is improving */
  double lastBestPosition[dim];
  unsigned int movementCounter=0; // counter for stopped movement
  unsigned int convergenceCounter=0; // counter for converged swarms

  unsigned int stopCostLimit=20, stopCost=0;
  double lastBestCost=multi.bestCost;

  unsigned int iterNr=0; // loop index
  while(iterNr<maxIter && nlo->funCalls<nlo->maxFunCalls) {
    iterNr++; 
    if(verbose>4) {
      fprintf(fp, "-----------------------------\nloop %d\n", iterNr);
      fprintf(fp, "bestCost=%g since %u iterations\n", multi.bestCost, multi.since);
      if(verbose>5) fprintf(fp, "function_calls_so_far=%d\n", nlo->funCalls);
      fflush(fp);
    }


#if(0)
    /* Check if each swarm has concentrated inside tolerance */ 
    unsigned int swarmsConcentrated=0;
    for(unsigned int si=0; si<nSwarms; si++) {
      double /*pmean[dim],*/ psd[dim], vmean[dim], vsd[dim];
      nloptMPSOpMean(&multi, si, pmean, psd);
      nloptMPSOabsvMean(&multi, si, vmean, vsd);
      if(verbose>5) {
        fprintf(fp, "swarm %d bestCost=%g since %u\n", 1+si, 
                multi.swarm[si].bestCost, multi.swarm[si].since);
        if(verbose>6) {
          fprintf(fp, "  bestPosition: %g", multi.swarm[si].bestPosition[0]);
          for(unsigned int i=1; i<dim; i++) fprintf(fp, ", %g", multi.swarm[si].bestPosition[i]);
          fprintf(fp, "\n");
        }
        if(verbose>9 || iterNr==1) {
          for(unsigned int i=0; i<dim; i++)
            fprintf(fp, "    parameter %d: meanPosition %g +- %g\n", 1+i, pmean[i], psd[i]);
          for(unsigned int i=0; i<dim; i++)
            fprintf(fp, "    parameter %d: meanAbsVelocity %g +- %g\n", 1+i, vmean[i], vsd[i]);
        }
        fflush(fp);
      }
      /* Swarm particle mean and best particle ever inside tolerance? */
      /* Parameter SD below tolerance? */
      unsigned int i=0;
      for(i=0; i<dim; i++) {
        if(fabs(pmean[i] - multi.swarm[si].bestPosition[i]) > 0.01*nlo->xtol[i]) break;
        if(psd[i]>0.02*nlo->xtol[i]) break;
      }
      if(i==dim) {
        if(verbose>5) fprintf(fp, "  swarm %d particles inside tolerance.\n", 1+si);
        swarmsConcentrated++;
      }
    }
    if(swarmsConcentrated==nSwarms) {
      if(verbose>2) fprintf(fp, "  all swarm particles shrunk inside swarm tolerance.\n");
      break;
    }
#endif


    /* Go through all swarms, and their particles */
    unsigned int swarmsConcentrated=0;
    multi.since++;
    for(unsigned int si=0; si<nSwarms; si++) {
      multi.swarm[si].since++;

      /* Calculate statistics of the swarm */
      double pmean[dim], psd[dim], vmean[dim], vsd[dim];
      nloptMPSOpMean(&multi, si, pmean, psd);
      nloptMPSOabsvMean(&multi, si, vmean, vsd);
      if(verbose>5) {
        fprintf(fp, "swarm %d bestCost=%g since %u\n", 1+si, 
                multi.swarm[si].bestCost, multi.swarm[si].since);
        if(verbose>6) {
          fprintf(fp, "  bestPosition: %g", multi.swarm[si].bestPosition[0]);
          for(unsigned int i=1; i<dim; i++) fprintf(fp, ", %g", multi.swarm[si].bestPosition[i]);
          fprintf(fp, "\n");
        }
        if(verbose>7 || iterNr==1) {
          for(unsigned int i=0; i<dim; i++)
            fprintf(fp, "    parameter %d: meanPosition %g +- %g\n", 1+i, pmean[i], psd[i]);
          for(unsigned int i=0; i<dim; i++)
            fprintf(fp, "    parameter %d: meanAbsVelocity %g +- %g\n", 1+i, vmean[i], vsd[i]);
        }
        fflush(fp);
      }
      /* Swarm particle mean and best-ever particle inside tolerance? */
      /* Parameter SD below tolerance? */
      {
        unsigned int i=0;
        for(i=0; i<dim; i++) {
          if(fabs(pmean[i] - multi.swarm[si].bestPosition[i]) > 0.01*nlo->xtol[i]) break;
          if(psd[i]>0.02*nlo->xtol[i]) break;
        }
        if(i==dim) {
          if(verbose>5) fprintf(fp, "  swarm %d particles inside tolerance.\n", 1+si);
          swarmsConcentrated++;
        }
      }



      /* Swarm can get frustrated if no advancement for a long time */
      int swarmFrustrated=0;
      if(multi.swarm[si].since>=nFrustration) {
        swarmFrustrated=1; multi.swarm[si].since=0;
        if(verbose>5) {fprintf(fp, "swarm is frustrated\n"); fflush(fp);}
      }

      /* After every 10 loops, try Nelder-Mead, if requested */
      if(doLocal && !swarmFrustrated && (iterNr%10)==0) {
        if(verbose>5) {fprintf(fp, "local optimization:\n"); fflush(fp);}
        /* Best-ever position as starting point */
        doubleCopy(nlo->xfull, multi.swarm[si].bestPosition, dim);
        /* Deltas based on swarm particle SD, but making sure that above zero, unless parameter is fixed */
        for(unsigned int i=0; i<dim; i++) {
          if(!(nlo->xupper[i]>nlo->xlower[i])) {nlo->xdelta[i]=0.0; continue;}
          if(psd[i]>nlo->xtol[i]) {nlo->xdelta[i]=psd[i]; continue;}
          nlo->xdelta[i]=mertwiRandomExponential(&multi.mt, nlo->xtol[i]);
        }
        if(verbose>7) {
          fprintf(fp, "initial guesses and deltas, with cost %g:\n", multi.swarm[si].bestCost);
          for(unsigned int i=0; i<dim; i++) fprintf(fp, "\t%e\t%e\n", nlo->xfull[i], nlo->xdelta[i]);
          fflush(fp);
        }
        if(nloptSimplex(nlo, 50*dim, NULL)==0) {
          double cost=nlo->funval;
          if(cost<multi.swarm[si].bestCost) {
            multi.swarm[si].bestCost=cost;
            doubleCopy(multi.swarm[si].bestPosition, nlo->xfull, dim);
            multi.swarm[si].since=0;
            if(verbose>7) {
              fprintf(fp, "results: %e", multi.swarm[si].bestPosition[0]);
              for(unsigned int i=1; i<dim; i++) fprintf(fp, ", %e", multi.swarm[si].bestPosition[i]);
              fprintf(fp, ", with cost: %g\n", cost); fflush(fp);
            }
          } else {
            if(verbose>10) fprintf(fp, "local optimization did not provide lower cost (%e)\n", cost);
          }
        } else {
          if(verbose>10) fprintf(fp, "local optimization failed.\n");
        }
      }

      /* Go through the particles of this swarm */
      for(unsigned int pi=0; pi<nParticles; pi++) {
    
        if(verbose>14) {
          fprintf(fp, "      particle %d bestCost=%g since %u\n", 1+pi, 
                  multi.swarm[si].particle[pi].bestCost, multi.swarm[si].particle[pi].since);
          fflush(fp);
        }

        /* Cast lots for whether it is time to kill particle and create a new;
           but if cost is NaN, then most definitely kill it. */
        int killed=0;
        if((iterNr>3 && mertwiRandomDouble1(&multi.mt)<pDeath)
           || isnan(multi.swarm[si].particle[pi].cost))
        {
          killed=1; multi.swarm[si].particle[pi].since=0;
          /* Replace current position and velocity with random position and velocity */
          if(verbose>12) {fprintf(fp, "death of particle %u\n", 1+pi); fflush(fp);}

          nloptRandomPoint(multi.swarm[si].particle[pi].position, nlo->xlower, nlo->xupper, dim, &multi.mt);
          nloptRandomPoint(multi.swarm[si].particle[pi].velocity, nlo->xlower, nlo->xupper, dim, &multi.mt);
          for(unsigned int i=0; i<dim; i++)
            multi.swarm[si].particle[pi].velocity[i]-=multi.swarm[si].particle[pi].position[i];
          /* cost */
          multi.swarm[si].particle[pi].cost=
            (*nlo->_fun)(dim, multi.swarm[si].particle[pi].position, nlo->fundata);
          nlo->funCalls++;
          /* Check if this is the best position ever for this particle */  
          if(multi.swarm[si].particle[pi].bestCost>multi.swarm[si].particle[pi].cost) {
            if(verbose>10) printf("Miracle: resurrection led to a new best!\n");
            multi.swarm[si].particle[pi].bestCost=multi.swarm[si].particle[pi].cost;
            doubleCopy(multi.swarm[si].particle[pi].bestPosition, 
                       multi.swarm[si].particle[pi].position, dim);
          }
        }

        /* Cast lots for whether it is time to immigrate */
        int immigrated=0;
        if(iterNr>5 && !killed && nSwarms>1 && mertwiRandomDouble1(&multi.mt)<pImmigration) {
          immigrated=1; multi.swarm[si].particle[pi].since=0;
          /* Swap particle with a random particle in different swarm */
          if(verbose>12) {fprintf(fp, "  immigration\n"); fflush(fp);}
          unsigned int sj;
          if(nSwarms<4) {
            sj=si+1; if(sj>=nSwarms) sj=si-1;
          } else {
            do {sj=mertwiRandomInt63(&multi.mt)/(INT64_MAX/(nSwarms-1)+1);} while(si==sj);
          }
          if(verbose>12) fprintf(fp, "  swapping particle %d between swarms %d and %d\n", 1+pi, 1+si, 1+sj);
          for(unsigned int i=0; i<dim; i++) {
            /* position */
            double d=multi.swarm[si].particle[pi].position[i];
            multi.swarm[si].particle[pi].position[i]=multi.swarm[sj].particle[pi].position[i];
            multi.swarm[sj].particle[pi].position[i]=d;
            /* velocity */
            d=multi.swarm[si].particle[pi].velocity[i];
            multi.swarm[si].particle[pi].velocity[i]=multi.swarm[sj].particle[pi].velocity[i];
            multi.swarm[sj].particle[pi].velocity[i]=d;
          }
          /* cost */
          double d=multi.swarm[si].particle[pi].cost;
          multi.swarm[si].particle[pi].cost=multi.swarm[sj].particle[pi].cost;
          multi.swarm[sj].particle[pi].cost=d;
          /* also the particles memory of its best ever cost and position follows it to another swarm */
          d=multi.swarm[si].particle[pi].bestCost;
          multi.swarm[si].particle[pi].bestCost=multi.swarm[sj].particle[pi].bestCost;
          multi.swarm[sj].particle[pi].bestCost=d;
          for(unsigned int i=0; i<dim; i++) {
            double d=multi.swarm[si].particle[pi].bestPosition[i];
            multi.swarm[si].particle[pi].bestPosition[i]=multi.swarm[sj].particle[pi].bestPosition[i];
            multi.swarm[sj].particle[pi].bestPosition[i]=d;
          }
          /* Is the immigrated particle the best in its new swarm? */
          if(multi.swarm[si].bestCost>multi.swarm[si].particle[pi].bestCost) {
            multi.swarm[si].bestCost=multi.swarm[si].particle[pi].bestCost;
            doubleCopy(multi.swarm[si].bestPosition, multi.swarm[si].particle[pi].bestPosition, dim);
          }
          if(multi.swarm[sj].bestCost>multi.swarm[sj].particle[pi].bestCost) {
            multi.swarm[sj].bestCost=multi.swarm[sj].particle[pi].bestCost;
            doubleCopy(multi.swarm[sj].bestPosition, multi.swarm[sj].particle[pi].bestPosition, dim);
          }
        }

        /* If particle does not advance, it gets frustrated */
        int frustrated=0;
        if(multi.swarm[si].particle[pi].since>=nFrustration && !killed && !immigrated) {
          frustrated=1;
        }
        if(!frustrated && !killed && !immigrated && multi.swarm[si].particle[pi].since>2) {
          /* If particle has the worst position in the swarm, it will get frustrated, too */
          frustrated=1;
          for(unsigned int pj=0; pj<nParticles; pj++) if(pi!=pj)
            if(multi.swarm[si].particle[pj].cost>multi.swarm[si].particle[pi].cost) {
              frustrated=0; break;}
        }
        if(frustrated) {
          multi.swarm[si].particle[pi].since=0;
          /* Particle gets frustrated and will turn towards the swarm mean */
          if(verbose>12) {
            fprintf(fp, "  particle %u became frustrated\n", 1+pi);
            fprintf(fp, "  current position: %g", multi.swarm[si].particle[pi].position[0]);
            for(unsigned int i=1; i<dim; i++) fprintf(fp, ", %g", multi.swarm[si].particle[pi].position[i]);
            fprintf(fp, " -> %g\n", multi.swarm[si].particle[pi].cost);
            fflush(fp);
          }
        }


        /* Update velocity in every dimension */
        if(!frustrated) {
          for(unsigned int i=0; i<dim; i++) if(nlo->xupper[i]>nlo->xlower[i]) {
            multi.swarm[si].particle[pi].velocity[i] =
              wInertia*multi.swarm[si].particle[pi].velocity[i];
            multi.swarm[si].particle[pi].velocity[i] += wParticle*mertwiRandomDouble1(&multi.mt)*
              (multi.swarm[si].particle[pi].bestPosition[i] - multi.swarm[si].particle[pi].position[i]);
            multi.swarm[si].particle[pi].velocity[i] += wSwarm*mertwiRandomDouble1(&multi.mt)*
              (multi.swarm[si].bestPosition[i] - multi.swarm[si].particle[pi].position[i]);
            if(wGlobal>0.0)
              multi.swarm[si].particle[pi].velocity[i] += wGlobal*mertwiRandomDouble1(&multi.mt)*
               (multi.bestPosition[i] - multi.swarm[si].particle[pi].position[i]);
          }
          if(swarmFrustrated) {
            /* Additional random panic movement, if swarm is frustrated */
            for(unsigned int i=0; i<dim; i++) if(nlo->xupper[i]>nlo->xlower[i]) {
              double v=mertwiRandomExponential(&multi.mt, nlo->xtol[i]);
              if(multi.swarm[si].particle[pi].velocity[i]>0.0)
                multi.swarm[si].particle[pi].velocity[i]+=v;
              else multi.swarm[si].particle[pi].velocity[i]-=v;
            }
          }
        } else {
          /* Frustrated particle turns towards the swarm mean */
          for(unsigned int i=0; i<dim; i++) if(nlo->xupper[i]>nlo->xlower[i]) {
            multi.swarm[si].particle[pi].velocity[i] =
              0.11*wInertia*multi.swarm[si].particle[pi].velocity[i] +
              0.89*(pmean[i] - multi.swarm[si].particle[pi].position[i]);
          }
        }
        if(wGlobal<0.0 && nSwarms>1) { // Swarms reject each other; not recommended
          for(unsigned int sj=0; sj<nSwarms; sj++) if(si!=sj) {
            for(unsigned int i=0; i<dim; i++) if(nlo->xupper[i]>nlo->xlower[i]) {
              multi.swarm[si].particle[pi].velocity[i] += wGlobal*mertwiRandomDouble1(&multi.mt)*
               (multi.swarm[sj].bestPosition[i] - multi.swarm[si].particle[pi].position[i]);
            }
          }
        }
        if(verbose>15 || (verbose>12 && frustrated)) {
          fprintf(fp, "  updated velocities: %g", multi.swarm[si].particle[pi].velocity[0]);
          for(unsigned int i=1; i<dim; i++)
            fprintf(fp, ", %g", multi.swarm[si].particle[pi].velocity[i]);
          fprintf(fp, "\n");
        }
      
        /* Update position */
        for(unsigned int i=0; i<dim; i++)
          multi.swarm[si].particle[pi].position[i]+=multi.swarm[si].particle[pi].velocity[i];
        /* check that new position is inside limits */
        for(unsigned int i=0; i<dim; i++)
          if(multi.swarm[si].particle[pi].position[i]<nlo->xlower[i]) {  
            multi.swarm[si].particle[pi].position[i]=nlo->xlower[i];
            /* reduce velocity and change direction */
            multi.swarm[si].particle[pi].velocity[i]*=-0.1;
          } else if(multi.swarm[si].particle[pi].position[i]>nlo->xupper[i]) {  
            multi.swarm[si].particle[pi].position[i]=nlo->xupper[i];
            /* reduce velocity and change direction */
            multi.swarm[si].particle[pi].velocity[i]*=-0.1;
          }
        if(verbose>15 || (verbose>12 && frustrated)) {
          fprintf(fp, "  updated positions: %g", multi.swarm[si].particle[pi].position[0]);
          for(unsigned int i=1; i<dim; i++) fprintf(fp, ", %g", multi.swarm[si].particle[pi].position[i]);
          fprintf(fp, "\n");
        }
        
      
        /* Update cost */
        multi.swarm[si].particle[pi].cost=
          (*nlo->_fun)(dim, multi.swarm[si].particle[pi].position, nlo->fundata);
        nlo->funCalls++;
        if(verbose>15 || (verbose>12 && frustrated)) 
          fprintf(fp, "  updated_cost=%g\n", multi.swarm[si].particle[pi].cost);


        /* Check if this is the best position ever for this particle */  
        if(multi.swarm[si].particle[pi].bestCost>multi.swarm[si].particle[pi].cost) {
          multi.swarm[si].particle[pi].bestCost=multi.swarm[si].particle[pi].cost;
          doubleCopy(multi.swarm[si].particle[pi].bestPosition, 
                     multi.swarm[si].particle[pi].position, dim);
          multi.swarm[si].particle[pi].since=0;
        } else {
          multi.swarm[si].particle[pi].since++;
        }

        /* Check if the best position of this particle is the best ever in this swarm?
           Note: check this separately, because of possible immigrations. */
        if(multi.swarm[si].bestCost>multi.swarm[si].particle[pi].bestCost) {
          multi.swarm[si].bestCost=multi.swarm[si].particle[pi].bestCost;
          doubleCopy(multi.swarm[si].bestPosition, multi.swarm[si].particle[pi].bestPosition, dim);
          multi.swarm[si].since=0;
        }

      } // next particle
        
      /* Is the best particle of this swarm the best particle in all swarms? */
      if(multi.bestCost>multi.swarm[si].bestCost) {
        multi.bestCost=multi.swarm[si].bestCost;
        doubleCopy(multi.bestPosition, multi.swarm[si].bestPosition, dim);
        multi.since=0;
      }
      
    } // next swarm

    /* Check if each swarm has concentrated inside tolerance */ 
    if(swarmsConcentrated==nSwarms) {
      if(verbose>2) fprintf(fp, "  all swarm particles shrunk inside swarm tolerance.\n");
      break;
    }


    /* Stop if the best particle in each swarm is inside tolerances of parameters;
       all swarms have then converged to the same position. */
    if(nSwarms>1 && iterNr>10) {
      int swarmsConverged=1;
      for(unsigned int i=0; i<dim; i++) {
        for(unsigned int si=0; si<nSwarms; si++) {
          double d=fabs(multi.bestPosition[i]-multi.swarm[si].bestPosition[i]);
          if(d>nlo->xtol[i]) {swarmsConverged=0; convergenceCounter=0; break;}
        }
        if(!swarmsConverged) break;
      }
      if(swarmsConverged) {
        convergenceCounter++;
        if(convergenceCounter>5) {
          if(verbose>3) fprintf(fp, "all swarms converged into one minimum.\n");
//          break;
        }
      }
    }


    /* Stop if the best overall position stops moving */
    if(iterNr>=20) {
      int moved=0;
      for(unsigned int i=0; i<dim; i++) {
        double d=fabs(lastBestPosition[i]-multi.bestPosition[i]);
        if(d>0.1*nlo->xtol[i]) {moved=1; movementCounter=0; break;}
      }
      if(moved==0) {
        movementCounter++;
        if(movementCounter>10) {
          if(verbose>3) 
            fprintf(fp, "best position did not improve in %d last loops.\n", movementCounter);
#if(0)
          if(verbose>80 && movementCounter>10) {
            fprintf(fp, "Why does the movement not continue?\n");
            for(unsigned int si=0; si<nSwarms; si++) {
              fprintf(fp, "Swarm %d : bestCost=%g\n", 1+si, multi.swarm[si].bestCost);
              /* Search the currently best particle to get its also its velocity */
              unsigned int pBest=0; 
              double pBestCost=multi.swarm[si].particle[0].cost;
              for(unsigned int pi=1; pi<nParticles; pi++)
                if(multi.swarm[si].particle[pi].cost<pBestCost) {
                  pBestCost=multi.swarm[si].particle[pi].cost; pBest=pi;
                }
              fprintf(fp, "  currentBestCost=%g\n", pBestCost);
              fprintf(fp, "\tDim\toPos\tcPos\tcVel\n");
              for(unsigned int i=0; i<dim; i++) {
                fprintf(fp, "\t%d\t%g\t%g\t%g\n", 1+i, multi.swarm[si].bestPosition[i],
                   multi.swarm[si].particle[pBest].position[i], 
                   multi.swarm[si].particle[pBest].velocity[i]);
              }
            }
          }
#endif
        }
      }
      if(movementCounter>20+dim) {
        fprintf(fp, "best position did not move markedly in %d last loops.\n", movementCounter);
        break;
      }
    }
    doubleCopy(lastBestPosition, multi.bestPosition, dim);


    /* Check that cost is improving */
    if(multi.bestCost<lastBestCost) {
      lastBestCost=multi.bestCost;
      stopCost=0;
    } else {
      stopCost++;
    }
    if(stopCost>=stopCostLimit) {
      if(verbose>1) fprintf(fp, "Cost did not improve in %d last iterations.\n", stopCost);
      break;
    }


  } // next loop

  /* Check the reason for loop exit */
  if(iterNr>=maxIter) {
    if(verbose>1) fprintf(fp, "exceeded the maximum number for loops.\n");
  }
  if(nlo->funCalls>=nlo->maxFunCalls) {
    if(verbose>1) fprintf(fp, "exceeded the maximum number for function calls.\n");
  }

  /* Copy optimized parameters over initial values */
  doubleCopy(nlo->xfull, multi.bestPosition, dim);

  /* Analyze the swarms */
  if(verbose>2) {
    fprintf(fp, "\n---- MPSO end analysis ----\n");
    fprintf(fp, "loops: %d\n", iterNr);
    fprintf(fp, "function calls: %d\n", nlo->funCalls);
    for(unsigned int si=0; si<nSwarms; si++) {
      fprintf(fp, "\nSwarm %d : bestCost=%g\n", 1+si, multi.swarm[si].bestCost);
      for(unsigned int i=0; i<dim; i++) {
        fprintf(fp, "parameter %d: bestPosition %g\n", 1+i, multi.swarm[si].bestPosition[i]);
      }
      double pmean[dim], psd[dim];
      nloptMPSOpMean(&multi, si, pmean, psd);
      for(unsigned int i=0; i<dim; i++) {
        fprintf(fp, "parameter %d: meanPosition %g +- %g\n", 1+i, pmean[i], psd[i]);
      }
      nloptMPSOabsvMean(&multi, si, pmean, psd);
      for(unsigned int i=0; i<dim; i++) {
        fprintf(fp, "parameter %d: meanAbsVelocity %g +- %g\n", 1+i, pmean[i], psd[i]);
      }

    }
    fprintf(fp, "\nOverall bestCost=%g\n", multi.bestCost);
    fprintf(fp, "at position: %g", multi.bestPosition[0]);
    for(unsigned int i=1; i<dim; i++) fprintf(fp, ", %g", multi.bestPosition[i]);
    fprintf(fp, "\n"); fflush(fp);
  }
  
  /* Free allocated memory */
  for(unsigned int si=0; si<nSwarms; si++) {
    for(unsigned int pi=0; pi<nParticles; pi++) {
      free(multi.swarm[si].particle[pi].position);
      free(multi.swarm[si].particle[pi].velocity);
      free(multi.swarm[si].particle[pi].bestPosition);
    }
    free(multi.swarm[si].particle);
    free(multi.swarm[si].bestPosition);
  }
  free(multi.bestPosition);
  free(multi.swarm);


  statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_OK);
  return TPCERROR_OK;
}
/*****************************************************************************/

/*****************************************************************************/
