+ pDataArray->m->mothurOutEndLine();
+ pDataArray->m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
+
+ string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
+ //createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq);
+ /*****************************************************************************************************
+ vector<string> duplicateNames(numUniques, "");
+ for(int i=0;i<numSeqs;i++){
+ duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
+ }
+
+ ofstream nameFile;
+ pDataArray->m->openOutputFile(namesFileName, nameFile);
+
+ for(int i=0;i<numUniques;i++){
+ if (pDataArray->m->control_pressed) { nameFile.close(); return 0; }
+ nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
+ }
+ nameFile.close();
+ /*****************************************************************************************************
+
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ pDataArray->m->mothurOut("\nClustering flowgrams...\n");
+ string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
+ //cluster(listFileName, distFileName, namesFileName);
+ /*****************************************************************************************************
+ ReadMatrix* read = new ReadColumnMatrix(distFileName);
+ read->setCutoff(pDataArray->cutoff);
+
+ NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
+ clusterNameMap->readMap();
+ read->read(clusterNameMap);
+
+ ListVector* list = read->getListVector();
+ SparseMatrix* matrix = read->getMatrix();
+
+ delete read;
+ delete clusterNameMap;
+
+ RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
+
+ Cluster* cluster = new CompleteLinkage(rabund, list, matrix, pDataArray->cutoff, "furthest");
+ string tag = cluster->getTag();
+
+ double clusterCutoff = pDataArray->cutoff;
+ while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ cluster->update(clusterCutoff);
+ }
+
+ list->setLabel(toString(pDataArray->cutoff));
+
+ ofstream listFileOut;
+ pDataArray->m->openOutputFile(listFileName, listFileOut);
+ list->print(listFileOut);
+ listFileOut.close();
+
+ delete matrix; delete cluster; delete rabund; delete list;
+ /*****************************************************************************************************
+
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ vector<int> otuData;
+ vector<int> cumNumSeqs;
+ vector<int> nSeqsPerOTU;
+ vector<vector<int> > aaP; //tMaster->aanP: each row is a different otu / each col contains the sequence indices
+ vector<vector<int> > aaI; //tMaster->aanI: that are in each otu - can't differentiate between aaP and aaI
+ vector<int> seqNumber; //tMaster->anP: the sequence id number sorted by OTU
+ vector<int> seqIndex; //tMaster->anI; the index that corresponds to seqNumber
+
+
+ //int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap);
+ /*****************************************************************************************************
+ ifstream listFile;
+ pDataArray->m->openInputFile(listFileName, listFile);
+ string label;
+ int numOTUs;
+
+ listFile >> label >> numOTUs;
+
+ otuData.assign(numSeqs, 0);
+ cumNumSeqs.assign(numOTUs, 0);
+ nSeqsPerOTU.assign(numOTUs, 0);
+ aaP.clear();aaP.resize(numOTUs);
+
+ seqNumber.clear();
+ aaI.clear();
+ seqIndex.clear();
+
+ string singleOTU = "";
+
+ for(int i=0;i<numOTUs;i++){
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ listFile >> singleOTU;
+
+ istringstream otuString(singleOTU);
+
+ while(otuString){
+
+ string seqName = "";
+
+ for(int j=0;j<singleOTU.length();j++){
+ char letter = otuString.get();
+
+ if(letter != ','){
+ seqName += letter;
+ }
+ else{
+ map<string,int>::iterator nmIt = nameMap.find(seqName);
+ int index = nmIt->second;
+
+ nameMap.erase(nmIt);
+
+ otuData[index] = i;
+ nSeqsPerOTU[i]++;
+ aaP[i].push_back(index);
+ seqName = "";
+ }
+ }
+
+ map<string,int>::iterator nmIt = nameMap.find(seqName);
+
+ int index = nmIt->second;
+ nameMap.erase(nmIt);
+
+ otuData[index] = i;
+ nSeqsPerOTU[i]++;
+ aaP[i].push_back(index);
+
+ otuString.get();
+ }
+
+ sort(aaP[i].begin(), aaP[i].end());
+ for(int j=0;j<nSeqsPerOTU[i];j++){
+ seqNumber.push_back(aaP[i][j]);
+ }
+ for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
+ aaP[i].push_back(0);
+ }
+
+
+ }
+
+ for(int i=1;i<numOTUs;i++){
+ cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
+ }
+ aaI = aaP;
+ seqIndex = seqNumber;
+
+ listFile.close();
+ /*****************************************************************************************************
+
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ pDataArray->m->mothurRemove(distFileName);
+ pDataArray->m->mothurRemove(namesFileName);
+ pDataArray->m->mothurRemove(listFileName);
+
+ vector<double> dist; //adDist - distance of sequences to centroids
+ vector<short> change; //did the centroid sequence change? 0 = no; 1 = yes
+ vector<int> centroids; //the representative flowgram for each cluster m
+ vector<double> weight;
+ vector<double> singleTau; //tMaster->adTau: 1-D Tau vector (1xnumSeqs)
+ vector<int> nSeqsBreaks;
+ vector<int> nOTUsBreaks;
+
+ dist.assign(numSeqs * numOTUs, 0);
+ change.assign(numOTUs, 1);
+ centroids.assign(numOTUs, -1);
+ weight.assign(numOTUs, 0);
+ singleTau.assign(numSeqs, 1.0);
+
+ nSeqsBreaks.assign(2, 0);
+ nOTUsBreaks.assign(2, 0);
+
+ nSeqsBreaks[0] = 0;
+ nSeqsBreaks[1] = numSeqs;
+ nOTUsBreaks[1] = numOTUs;
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ double maxDelta = 0;
+ int iter = 0;
+
+ begClock = clock();
+ begTime = time(NULL);
+
+ pDataArray->m->mothurOut("\nDenoising flowgrams...\n");
+ pDataArray->m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
+
+ while((pDataArray->maxIters == 0 && maxDelta > pDataArray->minDelta) || iter < MIN_ITER || (maxDelta > pDataArray->minDelta && iter < pDataArray->maxIters)){
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ double cycClock = clock();
+ unsigned long long cycTime = time(NULL);
+ //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
+ /*****************************************************************************************************
+ int indexFill = 0;
+ for(int i=0;i<numOTUs;i++){
+
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ cumNumSeqs[i] = indexFill;
+ for(int j=0;j<nSeqsPerOTU[i];j++){
+ seqNumber[indexFill] = aaP[i][j];
+ seqIndex[indexFill] = aaI[i][j];
+
+ indexFill++;
+ }
+ }
+ /*****************************************************************************************************
+
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ //calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
+ /*****************************************************************************************************
+ for(int i=0;i<numOTUs;i++){
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ double count = 0;
+ int position = 0;
+ int minFlowGram = 100000000;
+ double minFlowValue = 1e8;
+ change[i] = 0; //FALSE
+
+ for(int j=0;j<nSeqsPerOTU[i];j++){
+ count += singleTau[seqNumber[cumNumSeqs[i] + j]];
+ }
+
+ if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
+ vector<double> adF(nSeqsPerOTU[i]);
+ vector<int> anL(nSeqsPerOTU[i]);
+
+ for(int j=0;j<nSeqsPerOTU[i];j++){
+ int index = cumNumSeqs[i] + j;
+ int nI = seqIndex[index];
+ int nIU = mapSeqToUnique[nI];
+
+ int k;
+ for(k=0;k<position;k++){
+ if(nIU == anL[k]){
+ break;
+ }
+ }
+ if(k == position){
+ anL[position] = nIU;
+ adF[position] = 0.0000;
+ position++;
+ }
+ }
+
+ for(int j=0;j<nSeqsPerOTU[i];j++){
+ int index = cumNumSeqs[i] + j;
+ int nI = seqIndex[index];
+
+ double tauValue = singleTau[seqNumber[index]];
+
+ for(int k=0;k<position;k++){
+ // double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
+ /*****************************************************************************************************
+ int flowAValue = anL[k] * numFlowCells;
+ int flowBValue = nI * numFlowCells;
+
+ double dist = 0;
+
+ for(int l=0;l<lengths[nI];l++){
+ dist += pDataArray->singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
+ flowAValue++;
+ flowBValue++;
+ }
+
+ dist = dist / (double)lengths[nI];
+ /*****************************************************************************************************
+ adF[k] += dist * tauValue;
+ }
+ }
+
+ for(int j=0;j<position;j++){
+ if(adF[j] < minFlowValue){
+ minFlowGram = j;
+ minFlowValue = adF[j];
+ }
+ }
+
+ if(centroids[i] != anL[minFlowGram]){
+ change[i] = 1;
+ centroids[i] = anL[minFlowGram];
+ }
+ }
+ else if(centroids[i] != -1){
+ change[i] = 1;
+ centroids[i] = -1;
+ }
+ }
+ /*****************************************************************************************************
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ //maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight);
+ /*****************************************************************************************************
+ double maxChange = 0;
+
+ for(int i=0;i<numOTUs;i++){
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ double difference = weight[i];
+ weight[i] = 0;
+
+ for(int j=0;j<nSeqsPerOTU[i];j++){
+ int index = cumNumSeqs[i] + j;
+ double tauValue = singleTau[seqNumber[index]];
+ weight[i] += tauValue;
+ }
+
+ difference = fabs(weight[i] - difference);
+ if(difference > maxChange){ maxChange = difference; }
+ }
+ maxDelta = maxChange;
+ /*****************************************************************************************************
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ //double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight);
+ /*****************************************************************************************************
+ vector<long double> P(numSeqs, 0);
+ int effNumOTUs = 0;
+
+ for(int i=0;i<numOTUs;i++){
+ if(weight[i] > MIN_WEIGHT){
+ effNumOTUs++;
+ }
+ }
+
+ string hold;
+ for(int i=0;i<numOTUs;i++){
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ for(int j=0;j<nSeqsPerOTU[i];j++){
+ int index = cumNumSeqs[i] + j;
+ int nI = seqIndex[index];
+ double singleDist = dist[seqNumber[index]];
+
+ P[nI] += weight[i] * exp(-singleDist * pDataArray->sigma);
+ }
+ }
+ double nLL = 0.00;
+ for(int i=0;i<numSeqs;i++){
+ if(P[i] == 0){ P[i] = DBL_EPSILON; }
+
+ nLL += -log(P[i]);
+ }
+
+ nLL = nLL -(double)numSeqs * log(pDataArray->sigma);
+ /*****************************************************************************************************
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ //checkCentroids(numOTUs, centroids, weight);
+ /*****************************************************************************************************
+ vector<int> unique(numOTUs, 1);
+
+ for(int i=0;i<numOTUs;i++){
+ if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
+ unique[i] = -1;
+ }
+ }
+
+ for(int i=0;i<numOTUs;i++){
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ if(unique[i] == 1){
+ for(int j=i+1;j<numOTUs;j++){
+ if(unique[j] == 1){
+
+ if(centroids[j] == centroids[i]){
+ unique[j] = 0;
+ centroids[j] = -1;
+
+ weight[i] += weight[j];
+ weight[j] = 0.0;
+ }
+ }
+ }
+ }
+ }
+ /*****************************************************************************************************
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ //calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU, dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths);
+ /*****************************************************************************************************
+ int total = 0;
+ vector<double> newTau(numOTUs,0);
+ vector<double> norms(numSeqs, 0);
+ nSeqsPerOTU.assign(numOTUs, 0);
+
+ for(int i=0;i<numSeqs;i++){
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ int indexOffset = i * numOTUs;
+
+ double offset = 1e8;
+
+ for(int j=0;j<numOTUs;j++){
+
+ if(weight[j] > MIN_WEIGHT && change[j] == 1){
+ //dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i], uniqueFlowgrams, flowDataIntI, numFlowCells);
+ /*****************************************************************************************************
+ int flowAValue = centroids[j] * numFlowCells;
+ int flowBValue = i * numFlowCells;
+
+ double distTemp = 0;
+
+ for(int l=0;l<lengths[i];l++){
+ distTemp += pDataArray->singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
+ flowAValue++;
+ flowBValue++;
+ }
+
+ dist[indexOffset + j] = distTemp / (double)lengths[i];
+ /*****************************************************************************************************
+
+ }
+
+ if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
+ offset = dist[indexOffset + j];
+ }
+ }
+
+ for(int j=0;j<numOTUs;j++){
+ if(weight[j] > MIN_WEIGHT){
+ newTau[j] = exp(pDataArray->sigma * (-dist[indexOffset + j] + offset)) * weight[j];
+ norms[i] += newTau[j];
+ }
+ else{
+ newTau[j] = 0.0;
+ }
+ }
+
+ for(int j=0;j<numOTUs;j++){
+ newTau[j] /= norms[i];
+ }
+
+ for(int j=0;j<numOTUs;j++){
+ if(newTau[j] > MIN_TAU){
+
+ int oldTotal = total;
+
+ total++;
+
+ singleTau.resize(total, 0);
+ seqNumber.resize(total, 0);
+ seqIndex.resize(total, 0);
+
+ singleTau[oldTotal] = newTau[j];
+
+ aaP[j][nSeqsPerOTU[j]] = oldTotal;
+ aaI[j][nSeqsPerOTU[j]] = i;
+ nSeqsPerOTU[j]++;
+ }
+ }
+
+ }