]> git.donarmstrong.com Git - mothur.git/blob - shhhercommand.h
added unix to ifdefs. minor changes while testing 1.24.0.
[mothur.git] / shhhercommand.h
1 #ifndef SHHHER_H
2 #define SHHHER_H
3
4 /*
5  *  shhher.h
6  *  Mothur
7  *
8  *  Created by Pat Schloss on 12/27/10.
9  *  Copyright 2010 Schloss Lab. All rights reserved.
10  *
11  */
12
13 #include "mothur.h"
14 #include "command.hpp"
15 #include "readcolumn.h"
16 #include "readmatrix.hpp"
17 #include "rabundvector.hpp"
18 #include "sabundvector.hpp"
19 #include "listvector.hpp"
20 #include "cluster.hpp"
21 #include "sparsematrix.hpp"
22 #include <cfloat>
23
24 //**********************************************************************************************************************
25
26 #define NUMBINS 1000
27 #define HOMOPS 10
28 #define MIN_COUNT 0.1
29 #define MIN_WEIGHT 0.1
30 #define MIN_TAU 0.0001
31 #define MIN_ITER 10
32 //**********************************************************************************************************************
33
34 class ShhherCommand : public Command {
35         
36 public:
37         ShhherCommand(string);
38         ShhherCommand();
39         ~ShhherCommand() {}
40         
41         vector<string> setParameters();
42         string getCommandName()                 { return "shhh.flows";  }
43         string getCommandCategory()             { return "Sequence Processing";         }
44         string getHelpString(); 
45         string getCitation() { return "http://www.mothur.org/wiki/Shhh.flows"; }
46         string getDescription()         { return "shhh.flows"; }
47
48         
49         int execute(); 
50         void help() { m->mothurOut(getHelpString()); }          
51 private:
52         
53     struct linePair {
54                 int start;
55                 int end;
56                 linePair(int i, int j) : start(i), end(j) {}
57         };
58     
59         int abort;
60         string outputDir, flowFileName, flowFilesFileName, lookupFileName, compositeFASTAFileName, compositeNamesFileName;
61
62         int processors, maxIters;
63         float cutoff, sigma, minDelta;
64         string flowOrder;
65     
66     vector<string> outputNames;
67         vector<double> singleLookUp;
68         vector<double> jointLookUp;
69     vector<string> flowFileVector;
70         
71     int driver(vector<string>, string, string, int, int);
72     int createProcesses(vector<string>);
73     int getFlowData(string, vector<string>&, vector<int>&, vector<short>&, map<string, int>&, int&);
74     int getUniques(int, int, vector<short>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<double>&, vector<short>&);
75     int flowDistParentFork(int, string, int, vector<int>&, vector<int>&, vector<int>&, vector<double>&, vector<short>&);
76     float calcPairwiseDist(int, int, int, vector<int>&, vector<int>&, vector<double>&, vector<short>&);
77     int createNamesFile(int, int, string, vector<string>&, vector<int>&, vector<int>&);
78     int cluster(string, string, string);
79     int getOTUData(int numSeqs, string,  vector<int>&, vector<int>&, vector<int>&, vector<vector<int> >&, vector<vector<int> >&, vector<int>&, vector<int>&,map<string, int>&);
80     int calcCentroidsDriver(int numOTUs, vector<int>&, vector<int>&, vector<int>&, vector<short>&, vector<int>&, vector<double>&, vector<int>&, vector<short>&, vector<short>&, vector<int>&, int, vector<int>&);
81     double getDistToCentroid(int, int, int, vector<short>&, vector<short>&, int);
82     double getNewWeights(int, vector<int>&, vector<int>&, vector<double>&, vector<int>&, vector<double>&);
83     
84     double getLikelihood(int, int, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<double>&, vector<double>&);
85     int checkCentroids(int, vector<int>&, vector<double>&);
86     void calcNewDistances(int, int, vector<int>& , vector<double>&,vector<double>& , vector<short>& change, vector<int>&,vector<vector<int> >&, vector<double>&, vector<vector<int> >&, vector<int>&, vector<int>&, vector<short>&, vector<short>&, int, vector<int>&);
87     int fill(int, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<vector<int> >&, vector<vector<int> >&);
88     void setOTUs(int, int, vector<int>&, vector<int>&, vector<int>&, vector<int>&,
89                  vector<int>&, vector<double>&, vector<double>&, vector<vector<int> >&, vector<vector<int> >&);
90     void writeQualities(int, int, string, vector<int>, vector<int>&, vector<int>&, vector<double>&, vector<short>&, vector<short>&, vector<int>&, vector<int>&, vector<string>&, vector<int>&, vector<vector<int> >&);
91     void writeSequences(string, int, int, string, vector<int>, vector<short>&, vector<string>&, vector<vector<int> >&, vector<int>&);
92     void writeNames(string, int, string, vector<int>, vector<string>&, vector<vector<int> >&, vector<int>&);
93     void writeGroups(string, int, vector<string>&);
94     void writeClusters(string, int, int, vector<int>, vector<int>&, vector<short>&, vector<string>&, vector<vector<int> >&, vector<int>&, vector<int>&, vector<short>&);
95     
96         void getSingleLookUp();
97         void getJointLookUp();
98     double getProbIntensity(int);
99         
100         
101 #ifdef USE_MPI
102         string flowDistMPI(int, int);
103         void calcNewDistancesChildMPI(int, int, vector<int>&);
104
105         int pid, ncpus; 
106     
107      void getFlowData();
108      void getUniques();
109      
110      float calcPairwiseDist(int, int);
111      void flowDistParentFork(string, int, int);
112      
113      string createDistFile(int);
114      string createNamesFile();
115      string cluster(string, string);
116      
117      void getOTUData(string);
118      void initPyroCluster();
119      void fill();
120      void calcCentroids();
121      void calcCentroidsDriver(int, int);
122      double getDistToCentroid(int, int, int);
123      double getNewWeights();
124      double getLikelihood();
125      void checkCentroids();
126      void calcNewDistances();
127      void calcNewDistancesParent(int, int);
128      void calcNewDistancesChild(int, int, vector<int>&, vector<int>&, vector<double>&);
129      
130      
131      void setOTUs();
132      void writeQualities(vector<int>);
133      void writeSequences(vector<int>);
134      void writeNames(vector<int>);
135      void writeGroups();
136      void writeClusters(vector<int>);
137     
138     vector<string> seqNameVector;
139         vector<int> lengths;
140         vector<short> flowDataIntI;
141         vector<double> flowDataPrI;
142         map<string, int> nameMap;
143         vector<int> otuData;
144         vector<int> cumNumSeqs;
145         vector<int> nSeqsPerOTU;
146         vector<vector<int> > aaP;       //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
147         vector<vector<int> > aaI;       //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
148         vector<int> seqNumber;          //tMaster->anP:         the sequence id number sorted by OTU
149         vector<int> seqIndex;           //tMaster->anI;         the index that corresponds to seqNumber
150         vector<double> dist;            //adDist - distance of sequences to centroids
151         vector<short> change;           //did the centroid sequence change? 0 = no; 1 = yes
152         vector<int> centroids;          //the representative flowgram for each cluster m
153         vector<double> weight;
154         vector<double> singleTau;       //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
155         vector<short> uniqueFlowgrams;
156         vector<int> uniqueCount;
157         vector<int> mapSeqToUnique;
158         vector<int> mapUniqueToSeq;
159         vector<int> uniqueLengths;
160     int numSeqs, numUniques, numOTUs, numFlowCells;
161     vector<int> nSeqsBreaks;
162         vector<int> nOTUsBreaks;
163
164 #endif
165         
166 };
167
168 /**************************************************************************************************/
169 //custom data structure for threads to use.
170 // This is passed by void pointer so it can be any data type
171 // that can be passed using a single void pointer (LPVOID).
172 struct shhhFlowsData {
173         int threadID, maxIters;
174         float cutoff, sigma, minDelta;
175         string flowOrder;
176         vector<double> singleLookUp;
177         vector<double> jointLookUp;
178     vector<string> filenames;
179     vector<string> outputNames;
180     string thisCompositeFASTAFileName, thisCompositeNameFileName, outputDir;
181     int start, stop;
182         MothurOut* m;
183         
184         shhhFlowsData(){}
185         shhhFlowsData(vector<string> f, string cf, string cn, string ou, string flor, vector<double> jl, vector<double> sl, MothurOut* mout, int st, int sp, float cut, float si, float mD, int mx, int tid) {
186                 filenames = f;
187         thisCompositeFASTAFileName = cf;
188         thisCompositeNameFileName = cn;
189         outputDir = ou;
190         flowOrder = flor;
191                 m = mout;
192                 start = st;
193                 stop = sp;
194                 cutoff= cut;
195         sigma = si;
196         minDelta = mD;
197         maxIters = mx;
198         jointLookUp = jl;
199         singleLookUp = sl;
200                 threadID = tid;
201         }
202 };
203
204 /**************************************************************************************************/
205 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
206 #else
207 static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ 
208         shhhFlowsData* pDataArray;
209         pDataArray = (shhhFlowsData*)lpParam;
210         
211         try {
212         
213                 for(int l=pDataArray->start;l<pDataArray->stop;l++){
214                         
215                         if (pDataArray->m->control_pressed) { break; }
216                         
217                         string flowFileName = pDataArray->filenames[l];
218             
219                         pDataArray->m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(l+1) + " of " + toString(pDataArray->filenames.size()) + ")\t<<<<<\n");
220                         pDataArray->m->mothurOut("Reading flowgrams...\n");
221                         
222             vector<string> seqNameVector;
223             vector<int> lengths;
224             vector<short> flowDataIntI;
225             vector<double> flowDataPrI;
226             map<string, int> nameMap;
227             vector<short> uniqueFlowgrams;
228             vector<int> uniqueCount;
229             vector<int> mapSeqToUnique;
230             vector<int> mapUniqueToSeq;
231             vector<int> uniqueLengths;
232             int numFlowCells;
233             
234             //int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
235             /*****************************************************************************************************/
236             
237             ifstream flowFile;
238            // cout << "herethread " << flowFileName << '\t' << &flowFile << endl;
239             pDataArray->m->openInputFile(flowFileName, flowFile);
240             
241            // cout << "herethread " << flowFileName << endl;
242             string seqName;
243             int currentNumFlowCells;
244             float intensity;
245                         
246             flowFile >> numFlowCells;
247             int index = 0;//pcluster
248             while(!flowFile.eof()){
249                 
250                 if (pDataArray->m->control_pressed) { flowFile.close(); return 0; }
251                 
252                 flowFile >> seqName >> currentNumFlowCells;
253                 lengths.push_back(currentNumFlowCells);
254              //  cout << "herethread " << seqName << endl;  
255                 seqNameVector.push_back(seqName);
256                 nameMap[seqName] = index++;//pcluster
257                 
258                 for(int i=0;i<numFlowCells;i++){
259                     flowFile >> intensity;
260                     if(intensity > 9.99)        {       intensity = 9.99;       }
261                     int intI = int(100 * intensity + 0.0001);
262                     flowDataIntI.push_back(intI);
263                 }
264                 pDataArray->m->gobble(flowFile);
265             }
266             flowFile.close();
267             
268             int numSeqs = seqNameVector.size();         
269           //  cout << numSeqs << endl;   
270             for(int i=0;i<numSeqs;i++){
271                 
272                 if (pDataArray->m->control_pressed) { return 0; }
273                 
274                 int iNumFlowCells = i * numFlowCells;
275                 for(int j=lengths[i];j<numFlowCells;j++){
276                     flowDataIntI[iNumFlowCells + j] = 0;
277                 }
278             }
279           //  cout << "here" << endl; 
280             /*****************************************************************************************************/
281         
282                         if (pDataArray->m->control_pressed) { return 0; }
283                         
284                         pDataArray->m->mothurOut("Identifying unique flowgrams...\n");
285                         //int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI);
286             /*****************************************************************************************************/
287             int numUniques = 0;
288             uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
289             uniqueCount.assign(numSeqs, 0);                                                     //      anWeights
290             uniqueLengths.assign(numSeqs, 0);
291             mapSeqToUnique.assign(numSeqs, -1);
292             mapUniqueToSeq.assign(numSeqs, -1);
293             
294             vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
295             
296             for(int i=0;i<numSeqs;i++){
297                 
298                 if (pDataArray->m->control_pressed) { return 0; }
299                 
300                 int index = 0;
301                 
302                 vector<short> current(numFlowCells);
303                 for(int j=0;j<numFlowCells;j++){
304                     current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
305                 }
306                 
307                 for(int j=0;j<numUniques;j++){
308                     int offset = j * numFlowCells;
309                     bool toEnd = 1;
310                     
311                     int shorterLength;
312                     if(lengths[i] < uniqueLengths[j])   {       shorterLength = lengths[i];                     }
313                     else                                                                {       shorterLength = uniqueLengths[j];       }
314                     
315                     for(int k=0;k<shorterLength;k++){
316                         if(current[k] != uniqueFlowgrams[offset + k]){
317                             toEnd = 0;
318                             break;
319                         }
320                     }
321                     
322                     if(toEnd){
323                         mapSeqToUnique[i] = j;
324                         uniqueCount[j]++;
325                         index = j;
326                         if(lengths[i] > uniqueLengths[j])       {       uniqueLengths[j] = lengths[i];  }
327                         break;
328                     }
329                     index++;
330                 }
331                 
332                 if(index == numUniques){
333                     uniqueLengths[numUniques] = lengths[i];
334                     uniqueCount[numUniques] = 1;
335                     mapSeqToUnique[i] = numUniques;//anMap
336                     mapUniqueToSeq[numUniques] = i;//anF
337                     
338                     for(int k=0;k<numFlowCells;k++){
339                         uniqueFlowgrams[numUniques * numFlowCells + k] = current[k];
340                         uniqueFlowDataIntI[numUniques * numFlowCells + k] = flowDataIntI[i * numFlowCells + k];
341                     }
342                     
343                     numUniques++;
344                 }
345             }
346             uniqueFlowDataIntI.resize(numFlowCells * numUniques);
347             uniqueLengths.resize(numUniques);   
348             
349             flowDataPrI.resize(numSeqs * numFlowCells, 0);
350             for(int i=0;i<flowDataPrI.size();i++)       {       
351                 if (pDataArray->m->control_pressed) { return 0; } 
352                 //flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);   
353                 
354                 flowDataPrI[i] = 100000000; 
355             
356                 for(int j=0;j<HOMOPS;j++){//loop signal strength
357                     
358                     if (pDataArray->m->control_pressed) { return 0; }
359                     
360                     float negLogProb = pDataArray->singleLookUp[j * NUMBINS + flowDataIntI[i]];
361                     if(negLogProb < flowDataPrI[i])     {       flowDataPrI[i] = negLogProb; }
362                 }
363             }            
364             
365             /*****************************************************************************************************/
366                         
367                         if (pDataArray->m->control_pressed) { return 0; }
368                         
369                         pDataArray->m->mothurOut("Calculating distances between flowgrams...\n");
370             string distFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
371             unsigned long long begTime = time(NULL);
372             double begClock = clock();
373             
374             //flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);   
375             /*****************************************************************************************************/
376             ostringstream outStream;
377             outStream.setf(ios::fixed, ios::floatfield);
378             outStream.setf(ios::dec, ios::basefield);
379             outStream.setf(ios::showpoint);
380             outStream.precision(6);
381             
382             int thisbegTime = time(NULL);
383             double thisbegClock = clock();
384             
385             for(int i=0;i<numUniques;i++){
386                 
387                 if (pDataArray->m->control_pressed) { break; }
388                 
389                 for(int j=0;j<i;j++){
390                     //float flowDistance = calcPairwiseDist(numFlowCells, mapUniqueToSeq[i], mapUniqueToSeq[j], mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
391                     /*****************************************************************************************************/
392                     int seqA = mapUniqueToSeq[i]; int seqB = mapUniqueToSeq[j];
393                     int minLength = lengths[mapSeqToUnique[seqA]];
394                     if(lengths[seqB] < minLength){      minLength = lengths[mapSeqToUnique[seqB]];      }
395                     
396                     int ANumFlowCells = seqA * numFlowCells;
397                     int BNumFlowCells = seqB * numFlowCells;
398                     
399                     float flowDistance = 0;
400                     
401                     for(int k=0;k<minLength;k++){
402                         
403                         if (pDataArray->m->control_pressed) { break; }
404                         
405                         int flowAIntI = flowDataIntI[ANumFlowCells + k];
406                         float flowAPrI = flowDataPrI[ANumFlowCells + k];
407                         
408                         int flowBIntI = flowDataIntI[BNumFlowCells + k];
409                         float flowBPrI = flowDataPrI[BNumFlowCells + k];
410                         flowDistance += pDataArray->jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
411                     }
412                     
413                     flowDistance /= (float) minLength;
414                     /*****************************************************************************************************/
415
416                     if(flowDistance < 1e-6){
417                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
418                     }
419                     else if(flowDistance <= pDataArray->cutoff){
420                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
421                     }
422                 }
423                 if(i % 100 == 0){
424                     pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - thisbegTime));
425                     pDataArray->m->mothurOut("\t" + toString((clock()-thisbegClock)/CLOCKS_PER_SEC));
426                     pDataArray->m->mothurOutEndLine();
427                 }
428             }
429             
430             ofstream distFile(distFileName.c_str());
431             distFile << outStream.str();                
432             distFile.close();
433             
434             if (pDataArray->m->control_pressed) {}
435             else {
436                 pDataArray->m->mothurOut(toString(numUniques-1) + "\t" + toString(time(NULL) - thisbegTime));
437                 pDataArray->m->mothurOut("\t" + toString((clock()-thisbegClock)/CLOCKS_PER_SEC));
438                 pDataArray->m->mothurOutEndLine();
439             }
440             /*****************************************************************************************************/
441
442             pDataArray->m->mothurOutEndLine();
443             pDataArray->m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
444             
445                         string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
446                         //createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq);
447             /*****************************************************************************************************/
448             vector<string> duplicateNames(numUniques, "");
449             for(int i=0;i<numSeqs;i++){
450                 duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
451             }
452             
453             ofstream nameFile;
454             pDataArray->m->openOutputFile(namesFileName, nameFile);
455             
456             for(int i=0;i<numUniques;i++){
457                 if (pDataArray->m->control_pressed) { nameFile.close(); return 0; }
458                 nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
459             }
460             nameFile.close();
461             /*****************************************************************************************************/
462
463                         if (pDataArray->m->control_pressed) { return 0; }
464                         
465                         pDataArray->m->mothurOut("\nClustering flowgrams...\n");
466             string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
467                         //cluster(listFileName, distFileName, namesFileName);
468             /*****************************************************************************************************/
469             ReadMatrix* read = new ReadColumnMatrix(distFileName);      
470             read->setCutoff(pDataArray->cutoff);
471             
472             NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
473             clusterNameMap->readMap();
474             read->read(clusterNameMap);
475             
476             ListVector* list = read->getListVector();
477             SparseMatrix* matrix = read->getMatrix();
478             
479             delete read; 
480             delete clusterNameMap; 
481             
482             RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
483             
484             Cluster* cluster = new CompleteLinkage(rabund, list, matrix, pDataArray->cutoff, "furthest"); 
485             string tag = cluster->getTag();
486             
487             double clusterCutoff = pDataArray->cutoff;
488             while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
489                 
490                 if (pDataArray->m->control_pressed) { break; }
491                 
492                 cluster->update(clusterCutoff);
493             }
494             
495             list->setLabel(toString(pDataArray->cutoff));
496             
497             ofstream listFileOut;
498             pDataArray->m->openOutputFile(listFileName, listFileOut);
499             list->print(listFileOut);
500             listFileOut.close();
501             
502             delete matrix;      delete cluster; delete rabund; delete list;
503             /*****************************************************************************************************/
504
505                         if (pDataArray->m->control_pressed) { return 0; }
506             
507             vector<int> otuData;
508             vector<int> cumNumSeqs;
509             vector<int> nSeqsPerOTU;
510             vector<vector<int> > aaP;   //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
511             vector<vector<int> > aaI;   //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
512             vector<int> seqNumber;              //tMaster->anP:         the sequence id number sorted by OTU
513             vector<int> seqIndex;               //tMaster->anI;         the index that corresponds to seqNumber
514             
515                         
516                         //int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap);
517                         /*****************************************************************************************************/
518             ifstream listFile;
519             pDataArray->m->openInputFile(listFileName, listFile);
520             string label;
521             int numOTUs;
522             
523             listFile >> label >> numOTUs;
524             
525             otuData.assign(numSeqs, 0);
526             cumNumSeqs.assign(numOTUs, 0);
527             nSeqsPerOTU.assign(numOTUs, 0);
528             aaP.clear();aaP.resize(numOTUs);
529             
530             seqNumber.clear();
531             aaI.clear();
532             seqIndex.clear();
533             
534             string singleOTU = "";
535             
536             for(int i=0;i<numOTUs;i++){
537                 
538                 if (pDataArray->m->control_pressed) { break; }
539                 
540                 listFile >> singleOTU;
541                 
542                 istringstream otuString(singleOTU);
543                 
544                 while(otuString){
545                     
546                     string seqName = "";
547                     
548                     for(int j=0;j<singleOTU.length();j++){
549                         char letter = otuString.get();
550                         
551                         if(letter != ','){
552                             seqName += letter;
553                         }
554                         else{
555                             map<string,int>::iterator nmIt = nameMap.find(seqName);
556                             int index = nmIt->second;
557                             
558                             nameMap.erase(nmIt);
559                             
560                             otuData[index] = i;
561                             nSeqsPerOTU[i]++;
562                             aaP[i].push_back(index);
563                             seqName = "";
564                         }
565                     }
566                     
567                     map<string,int>::iterator nmIt = nameMap.find(seqName);
568                     
569                     int index = nmIt->second;
570                     nameMap.erase(nmIt);
571                     
572                     otuData[index] = i;
573                     nSeqsPerOTU[i]++;
574                     aaP[i].push_back(index);    
575                     
576                     otuString.get();
577                 }
578                 
579                 sort(aaP[i].begin(), aaP[i].end());
580                 for(int j=0;j<nSeqsPerOTU[i];j++){
581                     seqNumber.push_back(aaP[i][j]);
582                 }
583                 for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
584                     aaP[i].push_back(0);
585                 }
586                 
587                 
588             }
589             
590             for(int i=1;i<numOTUs;i++){
591                 cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
592             }
593             aaI = aaP;
594             seqIndex = seqNumber;
595             
596             listFile.close();       
597             /*****************************************************************************************************/
598
599                         if (pDataArray->m->control_pressed) { return 0; }
600                         
601                         pDataArray->m->mothurRemove(distFileName);
602                         pDataArray->m->mothurRemove(namesFileName);
603                         pDataArray->m->mothurRemove(listFileName);
604                         
605             vector<double> dist;                //adDist - distance of sequences to centroids
606             vector<short> change;               //did the centroid sequence change? 0 = no; 1 = yes
607             vector<int> centroids;              //the representative flowgram for each cluster m
608             vector<double> weight;
609             vector<double> singleTau;   //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
610             vector<int> nSeqsBreaks;
611             vector<int> nOTUsBreaks;
612             
613                         dist.assign(numSeqs * numOTUs, 0);
614             change.assign(numOTUs, 1);
615             centroids.assign(numOTUs, -1);
616             weight.assign(numOTUs, 0);
617             singleTau.assign(numSeqs, 1.0);
618             
619             nSeqsBreaks.assign(2, 0);
620             nOTUsBreaks.assign(2, 0);
621             
622             nSeqsBreaks[0] = 0;
623             nSeqsBreaks[1] = numSeqs;
624             nOTUsBreaks[1] = numOTUs;
625                         
626                         if (pDataArray->m->control_pressed) { break; }
627                         
628                         double maxDelta = 0;
629                         int iter = 0;
630                         
631                         begClock = clock();
632                         begTime = time(NULL);
633             
634                         pDataArray->m->mothurOut("\nDenoising flowgrams...\n");
635                         pDataArray->m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
636                         
637                         while((pDataArray->maxIters == 0 && maxDelta > pDataArray->minDelta) || iter < MIN_ITER || (maxDelta > pDataArray->minDelta && iter < pDataArray->maxIters)){
638                                 
639                                 if (pDataArray->m->control_pressed) { break; }
640                                 
641                                 double cycClock = clock();
642                                 unsigned long long cycTime = time(NULL);
643                                 //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
644                 /*****************************************************************************************************/
645                 int indexFill = 0;
646                 for(int i=0;i<numOTUs;i++){
647                     
648                     if (pDataArray->m->control_pressed) { return 0; }
649                     
650                     cumNumSeqs[i] = indexFill;
651                     for(int j=0;j<nSeqsPerOTU[i];j++){
652                         seqNumber[indexFill] = aaP[i][j];
653                         seqIndex[indexFill] = aaI[i][j];
654                         
655                         indexFill++;
656                     }
657                 }
658                 /*****************************************************************************************************/
659
660                                 
661                                 if (pDataArray->m->control_pressed) { break; }
662                 
663                                 //calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
664                 /*****************************************************************************************************/
665                 for(int i=0;i<numOTUs;i++){
666                     
667                     if (pDataArray->m->control_pressed) { break; }
668                     
669                     double count = 0;
670                     int position = 0;
671                     int minFlowGram = 100000000;
672                     double minFlowValue = 1e8;
673                     change[i] = 0; //FALSE
674                     
675                     for(int j=0;j<nSeqsPerOTU[i];j++){
676                         count += singleTau[seqNumber[cumNumSeqs[i] + j]];
677                     }
678                     
679                     if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
680                         vector<double> adF(nSeqsPerOTU[i]);
681                         vector<int> anL(nSeqsPerOTU[i]);
682                         
683                         for(int j=0;j<nSeqsPerOTU[i];j++){
684                             int index = cumNumSeqs[i] + j;
685                             int nI = seqIndex[index];
686                             int nIU = mapSeqToUnique[nI];
687                             
688                             int k;
689                             for(k=0;k<position;k++){
690                                 if(nIU == anL[k]){
691                                     break;
692                                 }
693                             }
694                             if(k == position){
695                                 anL[position] = nIU;
696                                 adF[position] = 0.0000;
697                                 position++;
698                             }                                           
699                         }
700                         
701                         for(int j=0;j<nSeqsPerOTU[i];j++){
702                             int index = cumNumSeqs[i] + j;
703                             int nI = seqIndex[index];
704                             
705                             double tauValue = singleTau[seqNumber[index]];
706                             
707                             for(int k=0;k<position;k++){
708                                // double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
709                                 /*****************************************************************************************************/
710                                 int flowAValue = anL[k] * numFlowCells;
711                                 int flowBValue = nI * numFlowCells;
712                                 
713                                 double dist = 0;
714                                 
715                                 for(int l=0;l<lengths[nI];l++){
716                                     dist += pDataArray->singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
717                                     flowAValue++;
718                                     flowBValue++;
719                                 }
720                                 
721                                 dist = dist / (double)lengths[nI];
722                                 /*****************************************************************************************************/
723                                 adF[k] += dist * tauValue;
724                             }
725                         }
726                         
727                         for(int j=0;j<position;j++){
728                             if(adF[j] < minFlowValue){
729                                 minFlowGram = j;
730                                 minFlowValue = adF[j];
731                             }
732                         }
733                         
734                         if(centroids[i] != anL[minFlowGram]){
735                             change[i] = 1;
736                             centroids[i] = anL[minFlowGram];
737                         }
738                     }
739                     else if(centroids[i] != -1){
740                         change[i] = 1;
741                         centroids[i] = -1;                      
742                     }
743                 }
744                 /*****************************************************************************************************/
745
746                                 if (pDataArray->m->control_pressed) { break; }
747                 
748                                 //maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight);  
749                 /*****************************************************************************************************/
750                 double maxChange = 0;
751                 
752                 for(int i=0;i<numOTUs;i++){
753                     
754                     if (pDataArray->m->control_pressed) { break; }
755                     
756                     double difference = weight[i];
757                     weight[i] = 0;
758                     
759                     for(int j=0;j<nSeqsPerOTU[i];j++){
760                         int index = cumNumSeqs[i] + j;
761                         double tauValue = singleTau[seqNumber[index]];
762                         weight[i] += tauValue;
763                     }
764                     
765                     difference = fabs(weight[i] - difference);
766                     if(difference > maxChange){ maxChange = difference; }
767                 }
768                 maxDelta = maxChange;
769                 /*****************************************************************************************************/
770
771                 if (pDataArray->m->control_pressed) { break; }
772                 
773                                 //double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight); 
774                 /*****************************************************************************************************/
775                 vector<long double> P(numSeqs, 0);
776                 int effNumOTUs = 0;
777                 
778                 for(int i=0;i<numOTUs;i++){
779                     if(weight[i] > MIN_WEIGHT){
780                         effNumOTUs++;
781                     }
782                 }
783                 
784                 string hold;
785                 for(int i=0;i<numOTUs;i++){
786                     
787                     if (pDataArray->m->control_pressed) { break; }
788                     
789                     for(int j=0;j<nSeqsPerOTU[i];j++){
790                         int index = cumNumSeqs[i] + j;
791                         int nI = seqIndex[index];
792                         double singleDist = dist[seqNumber[index]];
793                         
794                         P[nI] += weight[i] * exp(-singleDist * pDataArray->sigma);
795                     }
796                 }
797                 double nLL = 0.00;
798                 for(int i=0;i<numSeqs;i++){
799                     if(P[i] == 0){      P[i] = DBL_EPSILON;     }
800                     
801                     nLL += -log(P[i]);
802                 }
803                 
804                 nLL = nLL -(double)numSeqs * log(pDataArray->sigma);
805                 /*****************************************************************************************************/
806
807                 if (pDataArray->m->control_pressed) { break; }
808                 
809                                 //checkCentroids(numOTUs, centroids, weight);
810                 /*****************************************************************************************************/
811                 vector<int> unique(numOTUs, 1);
812                 
813                 for(int i=0;i<numOTUs;i++){
814                     if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
815                         unique[i] = -1;
816                     }
817                 }
818                 
819                 for(int i=0;i<numOTUs;i++){
820                     
821                     if (pDataArray->m->control_pressed) { break; }
822                     
823                     if(unique[i] == 1){
824                         for(int j=i+1;j<numOTUs;j++){
825                             if(unique[j] == 1){
826                                 
827                                 if(centroids[j] == centroids[i]){
828                                     unique[j] = 0;
829                                     centroids[j] = -1;
830                                     
831                                     weight[i] += weight[j];
832                                     weight[j] = 0.0;
833                                 }
834                             }
835                         }
836                     }
837                 }
838                 /*****************************************************************************************************/
839
840                                 if (pDataArray->m->control_pressed) { break; }
841                                 
842                                 //calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU,  dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths);
843                 /*****************************************************************************************************/
844                 int total = 0;
845                 vector<double> newTau(numOTUs,0);
846                 vector<double> norms(numSeqs, 0);
847                 nSeqsPerOTU.assign(numOTUs, 0);
848                 
849                 for(int i=0;i<numSeqs;i++){
850                     
851                     if (pDataArray->m->control_pressed) { break; }
852                     
853                     int indexOffset = i * numOTUs;
854                     
855                     double offset = 1e8;
856                     
857                     for(int j=0;j<numOTUs;j++){
858                         
859                         if(weight[j] > MIN_WEIGHT && change[j] == 1){
860                             //dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i], uniqueFlowgrams, flowDataIntI, numFlowCells);
861                             /*****************************************************************************************************/
862                             int flowAValue = centroids[j] * numFlowCells;
863                             int flowBValue = i * numFlowCells;
864                             
865                             double distTemp = 0;
866                             
867                             for(int l=0;l<lengths[i];l++){
868                                 distTemp += pDataArray->singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
869                                 flowAValue++;
870                                 flowBValue++;
871                             }
872                             
873                             dist[indexOffset + j] = distTemp / (double)lengths[i];
874                             /*****************************************************************************************************/
875
876                         }
877                         
878                         if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
879                             offset = dist[indexOffset + j];
880                         }
881                     }
882                     
883                     for(int j=0;j<numOTUs;j++){
884                         if(weight[j] > MIN_WEIGHT){
885                             newTau[j] = exp(pDataArray->sigma * (-dist[indexOffset + j] + offset)) * weight[j];
886                             norms[i] += newTau[j];
887                         }
888                         else{
889                             newTau[j] = 0.0;
890                         }
891                     }
892                     
893                     for(int j=0;j<numOTUs;j++){
894                         newTau[j] /= norms[i];
895                     }
896                     
897                     for(int j=0;j<numOTUs;j++){
898                         if(newTau[j] > MIN_TAU){
899                             
900                             int oldTotal = total;
901                             
902                             total++;
903                             
904                             singleTau.resize(total, 0);
905                             seqNumber.resize(total, 0);
906                             seqIndex.resize(total, 0);
907                             
908                             singleTau[oldTotal] = newTau[j];
909                             
910                             aaP[j][nSeqsPerOTU[j]] = oldTotal;
911                             aaI[j][nSeqsPerOTU[j]] = i;
912                             nSeqsPerOTU[j]++;
913                         }
914                     }
915                     
916                 }
917
918                 /*****************************************************************************************************/
919
920                                 if (pDataArray->m->control_pressed) { break; }
921                                 
922                                 iter++;
923                                 
924                                 pDataArray->m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
925                 
926                         }       
927                         
928                         if (pDataArray->m->control_pressed) { break; }
929                         
930                         pDataArray->m->mothurOut("\nFinalizing...\n");
931                         //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
932             /*****************************************************************************************************/
933             int indexFill = 0;
934             for(int i=0;i<numOTUs;i++){
935                 
936                 if (pDataArray->m->control_pressed) { return 0; }
937                 
938                 cumNumSeqs[i] = indexFill;
939                 for(int j=0;j<nSeqsPerOTU[i];j++){
940                     seqNumber[indexFill] = aaP[i][j];
941                     seqIndex[indexFill] = aaI[i][j];
942                     
943                     indexFill++;
944                 }
945             }
946             /*****************************************************************************************************/
947
948                         if (pDataArray->m->control_pressed) { break; }
949                         
950                         //setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI);
951             /*****************************************************************************************************/
952             vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
953             
954             for(int i=0;i<numOTUs;i++){
955                 
956                 if (pDataArray->m->control_pressed) { break; }
957                 
958                 for(int j=0;j<nSeqsPerOTU[i];j++){
959                     int index = cumNumSeqs[i] + j;
960                     double tauValue = singleTau[seqNumber[index]];
961                     int sIndex = seqIndex[index];
962                     bigTauMatrix[sIndex * numOTUs + i] = tauValue;                              
963                 }
964             }
965             
966             for(int i=0;i<numSeqs;i++){
967                 double maxTau = -1.0000;
968                 int maxOTU = -1;
969                 for(int j=0;j<numOTUs;j++){
970                     if(bigTauMatrix[i * numOTUs + j] > maxTau){
971                         maxTau = bigTauMatrix[i * numOTUs + j];
972                         maxOTU = j;
973                     }
974                 }
975                 
976                 otuData[i] = maxOTU;
977             }
978             
979             nSeqsPerOTU.assign(numOTUs, 0);             
980             
981             for(int i=0;i<numSeqs;i++){
982                 int index = otuData[i];
983                 
984                 singleTau[i] = 1.0000;
985                 dist[i] = 0.0000;
986                 
987                 aaP[index][nSeqsPerOTU[index]] = i;
988                 aaI[index][nSeqsPerOTU[index]] = i;
989                 
990                 nSeqsPerOTU[index]++;
991             }
992             
993             //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);    
994             /*****************************************************************************************************/
995             indexFill = 0;
996             for(int i=0;i<numOTUs;i++){
997                 
998                 if (pDataArray->m->control_pressed) { return 0; }
999                 
1000                 cumNumSeqs[i] = indexFill;
1001                 for(int j=0;j<nSeqsPerOTU[i];j++){
1002                     seqNumber[indexFill] = aaP[i][j];
1003                     seqIndex[indexFill] = aaI[i][j];
1004                     
1005                     indexFill++;
1006                 }
1007             }
1008             /*****************************************************************************************************/
1009
1010             /*****************************************************************************************************/
1011
1012                         if (pDataArray->m->control_pressed) { break; }
1013                         
1014                         vector<int> otuCounts(numOTUs, 0);
1015                         for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
1016                         
1017                         //calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);       
1018             /*****************************************************************************************************/
1019             for(int i=0;i<numOTUs;i++){
1020                 
1021                 if (pDataArray->m->control_pressed) { break; }
1022                 
1023                 double count = 0;
1024                 int position = 0;
1025                 int minFlowGram = 100000000;
1026                 double minFlowValue = 1e8;
1027                 change[i] = 0; //FALSE
1028                 
1029                 for(int j=0;j<nSeqsPerOTU[i];j++){
1030                     count += singleTau[seqNumber[cumNumSeqs[i] + j]];
1031                 }
1032                 
1033                 if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
1034                     vector<double> adF(nSeqsPerOTU[i]);
1035                     vector<int> anL(nSeqsPerOTU[i]);
1036                     
1037                     for(int j=0;j<nSeqsPerOTU[i];j++){
1038                         int index = cumNumSeqs[i] + j;
1039                         int nI = seqIndex[index];
1040                         int nIU = mapSeqToUnique[nI];
1041                         
1042                         int k;
1043                         for(k=0;k<position;k++){
1044                             if(nIU == anL[k]){
1045                                 break;
1046                             }
1047                         }
1048                         if(k == position){
1049                             anL[position] = nIU;
1050                             adF[position] = 0.0000;
1051                             position++;
1052                         }                                               
1053                     }
1054                     
1055                     for(int j=0;j<nSeqsPerOTU[i];j++){
1056                         int index = cumNumSeqs[i] + j;
1057                         int nI = seqIndex[index];
1058                         
1059                         double tauValue = singleTau[seqNumber[index]];
1060                         
1061                         for(int k=0;k<position;k++){
1062                             // double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
1063                             /*****************************************************************************************************/
1064                             int flowAValue = anL[k] * numFlowCells;
1065                             int flowBValue = nI * numFlowCells;
1066                             
1067                             double dist = 0;
1068                             
1069                             for(int l=0;l<lengths[nI];l++){
1070                                 dist += pDataArray->singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
1071                                 flowAValue++;
1072                                 flowBValue++;
1073                             }
1074                             
1075                             dist = dist / (double)lengths[nI];
1076                             /*****************************************************************************************************/
1077                             adF[k] += dist * tauValue;
1078                         }
1079                     }
1080                     
1081                     for(int j=0;j<position;j++){
1082                         if(adF[j] < minFlowValue){
1083                             minFlowGram = j;
1084                             minFlowValue = adF[j];
1085                         }
1086                     }
1087                     
1088                     if(centroids[i] != anL[minFlowGram]){
1089                         change[i] = 1;
1090                         centroids[i] = anL[minFlowGram];
1091                     }
1092                 }
1093                 else if(centroids[i] != -1){
1094                     change[i] = 1;
1095                     centroids[i] = -1;                  
1096                 }
1097             }
1098
1099             /*****************************************************************************************************/
1100
1101             if (pDataArray->m->control_pressed) { break; }
1102             
1103                         //writeQualities(numOTUs, numFlowCells, flowFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); 
1104             if (pDataArray->m->control_pressed) { break; }
1105             /*****************************************************************************************************/
1106             string thisOutputDir = pDataArray->outputDir;
1107             if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
1108             string qualityFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.qual";
1109             
1110             ofstream qualityFile;
1111             pDataArray->m->openOutputFile(qualityFileName, qualityFile);
1112             
1113             qualityFile.setf(ios::fixed, ios::floatfield);
1114             qualityFile.setf(ios::showpoint);
1115             qualityFile << setprecision(6);
1116             
1117             vector<vector<int> > qualities(numOTUs);
1118             vector<double> pr(HOMOPS, 0);
1119             
1120             
1121             for(int i=0;i<numOTUs;i++){
1122                 
1123                 if (pDataArray->m->control_pressed) { break; }
1124                 
1125                 int index = 0;
1126                 int base = 0;
1127                 
1128                 if(nSeqsPerOTU[i] > 0){
1129                     qualities[i].assign(1024, -1);
1130                     
1131                     while(index < numFlowCells){
1132                         double maxPrValue = 1e8;
1133                         short maxPrIndex = -1;
1134                         double count = 0.0000;
1135                         
1136                         pr.assign(HOMOPS, 0);
1137                         
1138                         for(int j=0;j<nSeqsPerOTU[i];j++){
1139                             int lIndex = cumNumSeqs[i] + j;
1140                             double tauValue = singleTau[seqNumber[lIndex]];
1141                             int sequenceIndex = aaI[i][j];
1142                             short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
1143                             
1144                             count += tauValue;
1145                             
1146                             for(int s=0;s<HOMOPS;s++){
1147                                 pr[s] += tauValue * pDataArray->singleLookUp[s * NUMBINS + intensity];
1148                             }
1149                         }
1150                         
1151                         maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
1152                         maxPrValue = pr[maxPrIndex];
1153                         
1154                         if(count > MIN_COUNT){
1155                             double U = 0.0000;
1156                             double norm = 0.0000;
1157                             
1158                             for(int s=0;s<HOMOPS;s++){
1159                                 norm += exp(-(pr[s] - maxPrValue));
1160                             }
1161                             
1162                             for(int s=1;s<=maxPrIndex;s++){
1163                                 int value = 0;
1164                                 double temp = 0.0000;
1165                                 
1166                                 U += exp(-(pr[s-1]-maxPrValue))/norm;
1167                                 
1168                                 if(U>0.00){
1169                                     temp = log10(U);
1170                                 }
1171                                 else{
1172                                     temp = -10.1;
1173                                 }
1174                                 temp = floor(-10 * temp);
1175                                 value = (int)floor(temp);
1176                                 if(value > 100){        value = 100;    }
1177                                 
1178                                 qualities[i][base] = (int)value;
1179                                 base++;
1180                             }
1181                         }
1182                         
1183                         index++;
1184                     }
1185                 }
1186                 
1187                 
1188                 if(otuCounts[i] > 0){
1189                     qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
1190                     
1191                     int j=4;    //need to get past the first four bases
1192                     while(qualities[i][j] != -1){
1193                         qualityFile << qualities[i][j] << ' ';
1194                         j++;
1195                     }
1196                     qualityFile << endl;
1197                 }
1198             }
1199             qualityFile.close();
1200             pDataArray->outputNames.push_back(qualityFileName);
1201             /*****************************************************************************************************/
1202
1203            // writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, flowFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);
1204             if (pDataArray->m->control_pressed) { break; }
1205             /*****************************************************************************************************/
1206             thisOutputDir = pDataArray->outputDir;
1207             if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
1208             string fastaFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.fasta";
1209             ofstream fastaFile;
1210             pDataArray->m->openOutputFile(fastaFileName, fastaFile);
1211             
1212             vector<string> names(numOTUs, "");
1213             
1214             for(int i=0;i<numOTUs;i++){
1215                 
1216                 if (pDataArray->m->control_pressed) { break; }
1217                 
1218                 int index = centroids[i];
1219                 
1220                 if(otuCounts[i] > 0){
1221                     fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
1222                     
1223                     string newSeq = "";
1224                     
1225                     for(int j=0;j<numFlowCells;j++){
1226                         
1227                         char base = pDataArray->flowOrder[j % 4];
1228                         for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
1229                             newSeq += base;
1230                         }
1231                     }
1232                     
1233                     fastaFile << newSeq.substr(4) << endl;
1234                 }
1235             }
1236             fastaFile.close();
1237             
1238             pDataArray->outputNames.push_back(fastaFileName);
1239             
1240             if(pDataArray->thisCompositeFASTAFileName != ""){
1241                 pDataArray->m->appendFiles(fastaFileName, pDataArray->thisCompositeFASTAFileName);
1242             }
1243
1244             /*****************************************************************************************************/
1245
1246             //writeNames(thisCompositeNamesFileName, numOTUs, flowFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU);                                
1247             if (pDataArray->m->control_pressed) { break; }
1248             /*****************************************************************************************************/
1249             thisOutputDir = pDataArray->outputDir;
1250             if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
1251             string nameFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.names";
1252             ofstream nameFileOut;
1253             pDataArray->m->openOutputFile(nameFileName, nameFileOut);
1254             
1255             for(int i=0;i<numOTUs;i++){
1256                 
1257                 if (pDataArray->m->control_pressed) { break; }
1258                 
1259                 if(otuCounts[i] > 0){
1260                     nameFileOut << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
1261                     
1262                     for(int j=1;j<nSeqsPerOTU[i];j++){
1263                         nameFileOut << ',' << seqNameVector[aaI[i][j]];
1264                     }
1265                     
1266                     nameFileOut << endl;
1267                 }
1268             }
1269             nameFileOut.close();
1270             pDataArray->outputNames.push_back(nameFileName);
1271             
1272             
1273             if(pDataArray->thisCompositeNameFileName != ""){
1274                 pDataArray->m->appendFiles(nameFileName, pDataArray->thisCompositeNameFileName);
1275             }           
1276             /*****************************************************************************************************/
1277
1278             //writeClusters(flowFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI);                 
1279             if (pDataArray->m->control_pressed) { break; }
1280             /*****************************************************************************************************/
1281             thisOutputDir = pDataArray->outputDir;
1282             if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
1283             string otuCountsFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.counts";
1284             ofstream otuCountsFile;
1285             pDataArray->m->openOutputFile(otuCountsFileName, otuCountsFile);
1286             
1287             string bases = pDataArray->flowOrder;
1288             
1289             for(int i=0;i<numOTUs;i++){
1290                 
1291                 if (pDataArray->m->control_pressed) {
1292                     break;
1293                 }
1294                 //output the translated version of the centroid sequence for the otu
1295                 if(otuCounts[i] > 0){
1296                     int index = centroids[i];
1297                     
1298                     otuCountsFile << "ideal\t";
1299                     for(int j=8;j<numFlowCells;j++){
1300                         char base = bases[j % 4];
1301                         for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
1302                             otuCountsFile << base;
1303                         }
1304                     }
1305                     otuCountsFile << endl;
1306                     
1307                     for(int j=0;j<nSeqsPerOTU[i];j++){
1308                         int sequence = aaI[i][j];
1309                         otuCountsFile << seqNameVector[sequence] << '\t';
1310                         
1311                         string newSeq = "";
1312                         
1313                         for(int k=0;k<lengths[sequence];k++){
1314                             char base = bases[k % 4];
1315                             int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
1316                             
1317                             for(int s=0;s<freq;s++){
1318                                 newSeq += base;
1319                                 //otuCountsFile << base;
1320                             }
1321                         }
1322                         otuCountsFile << newSeq.substr(4) << endl;
1323                     }
1324                     otuCountsFile << endl;
1325                 }
1326             }
1327             otuCountsFile.close();
1328             pDataArray->outputNames.push_back(otuCountsFileName);
1329             /*****************************************************************************************************/
1330
1331             //writeGroups(flowFileName, numSeqs, seqNameVector);                                                
1332             if (pDataArray->m->control_pressed) { break; }
1333             /*****************************************************************************************************/
1334             thisOutputDir = pDataArray->outputDir;
1335             if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
1336             string fileRoot = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName));
1337             string groupFileName = fileRoot + "shhh.groups";
1338             ofstream groupFile;
1339             pDataArray->m->openOutputFile(groupFileName, groupFile);
1340             
1341             for(int i=0;i<numSeqs;i++){
1342                 if (pDataArray->m->control_pressed) { break; }
1343                 groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
1344             }
1345             groupFile.close();
1346             pDataArray->outputNames.push_back(groupFileName);
1347             /*****************************************************************************************************/
1348
1349             pDataArray->m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
1350                 }
1351                 
1352         if (pDataArray->m->control_pressed) { for (int i = 0; i < pDataArray->outputNames.size(); i++) { pDataArray->m->mothurRemove(pDataArray->outputNames[i]); } return 0; }
1353         
1354         return 0;
1355                 
1356         }
1357         catch(exception& e) {
1358                 pDataArray->m->errorOut(e, "ShhherCommand", "ShhhFlowsThreadFunction");
1359                 exit(1);
1360         }
1361
1362 #endif
1363
1364
1365 #endif
1366