]> git.donarmstrong.com Git - mothur.git/blob - shhhercommand.h
Merge remote-tracking branch 'mothur/master'
[mothur.git] / shhhercommand.h
1 #ifndef SHHHER_H
2 #define SHHHER_H
3
4 /*
5  *  shhher.h
6  *  Mothur
7  *
8  *  Created by Pat Schloss on 12/27/10.
9  *  Copyright 2010 Schloss Lab. All rights reserved.
10  *
11  */
12
13 #include "mothur.h"
14 #include "command.hpp"
15 #include "readcolumn.h"
16 #include "readmatrix.hpp"
17 #include "rabundvector.hpp"
18 #include "sabundvector.hpp"
19 #include "listvector.hpp"
20 #include "cluster.hpp"
21 #include <cfloat>
22
23 //**********************************************************************************************************************
24
25 #define NUMBINS 1000
26 #define HOMOPS 10
27 #define MIN_COUNT 0.1
28 #define MIN_WEIGHT 0.1
29 #define MIN_TAU 0.0001
30 #define MIN_ITER 10
31 //**********************************************************************************************************************
32
33 class ShhherCommand : public Command {
34         
35 public:
36         ShhherCommand(string);
37         ShhherCommand();
38         ~ShhherCommand() {}
39         
40         vector<string> setParameters();
41         string getCommandName()                 { return "shhh.flows";  }
42         string getCommandCategory()             { return "Sequence Processing";         }
43         string getOutputFileNameTag(string, string);
44         string getHelpString(); 
45         string getCitation() { return "Schloss PD, Gevers D, Westcott SL (2011).  Reducing the effects of PCR amplification and sequencing artifacts on 16S rRNA-based studies.  PLoS ONE.  6:e27310.\nQuince C, Lanzen A, Davenport RJ, Turnbaugh PJ (2011).  Removing noise from pyrosequenced amplicons.  BMC Bioinformatics  12:38.\nQuince C, LanzĂ©n A, Curtis TP, Davenport RJ, Hall N, Head IM, Read LF, Sloan WT (2009).  Accurate determination of microbial diversity from 454 pyrosequencing data.  Nat. Methods 6:639.\nhttp://www.mothur.org/wiki/Shhh.flows"; }
46         string getDescription()         { return "shhh.flows"; }
47
48         
49         int execute(); 
50         void help() { m->mothurOut(getHelpString()); }          
51 private:
52         
53     struct linePair {
54                 int start;
55                 int end;
56                 linePair(int i, int j) : start(i), end(j) {}
57         };
58     
59         bool abort, large;
60         string outputDir, flowFileName, flowFilesFileName, lookupFileName, compositeFASTAFileName, compositeNamesFileName;
61
62         int processors, maxIters, largeSize;
63         float cutoff, sigma, minDelta;
64         string flowOrder;
65     
66     vector<string> outputNames;
67         vector<double> singleLookUp;
68         vector<double> jointLookUp;
69     vector<string> flowFileVector;
70         
71     vector<string> parseFlowFiles(string);
72     int driver(vector<string>, string, string, int, int);
73     int createProcesses(vector<string>);
74     int getFlowData(string, vector<string>&, vector<int>&, vector<short>&, map<string, int>&, int&);
75     int getUniques(int, int, vector<short>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<double>&, vector<short>&);
76     int flowDistParentFork(int, string, int, vector<int>&, vector<int>&, vector<int>&, vector<double>&, vector<short>&);
77     float calcPairwiseDist(int, int, int, vector<int>&, vector<int>&, vector<double>&, vector<short>&);
78     int createNamesFile(int, int, string, vector<string>&, vector<int>&, vector<int>&);
79     int cluster(string, string, string);
80     int getOTUData(int numSeqs, string,  vector<int>&, vector<int>&, vector<int>&, vector<vector<int> >&, vector<vector<int> >&, vector<int>&, vector<int>&,map<string, int>&);
81     int calcCentroidsDriver(int numOTUs, vector<int>&, vector<int>&, vector<int>&, vector<short>&, vector<int>&, vector<double>&, vector<int>&, vector<short>&, vector<short>&, vector<int>&, int, vector<int>&);
82     double getDistToCentroid(int, int, int, vector<short>&, vector<short>&, int);
83     double getNewWeights(int, vector<int>&, vector<int>&, vector<double>&, vector<int>&, vector<double>&);
84     
85     double getLikelihood(int, int, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<double>&, vector<double>&);
86     int checkCentroids(int, vector<int>&, vector<double>&);
87     void calcNewDistances(int, int, vector<int>& , vector<double>&,vector<double>& , vector<short>& change, vector<int>&,vector<vector<int> >&, vector<double>&, vector<vector<int> >&, vector<int>&, vector<int>&, vector<short>&, vector<short>&, int, vector<int>&);
88     int fill(int, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<vector<int> >&, vector<vector<int> >&);
89     void setOTUs(int, int, vector<int>&, vector<int>&, vector<int>&, vector<int>&,
90                  vector<int>&, vector<double>&, vector<double>&, vector<vector<int> >&, vector<vector<int> >&);
91     void writeQualities(int, int, string, vector<int>, vector<int>&, vector<int>&, vector<double>&, vector<short>&, vector<short>&, vector<int>&, vector<int>&, vector<string>&, vector<int>&, vector<vector<int> >&);
92     void writeSequences(string, int, int, string, vector<int>, vector<short>&, vector<string>&, vector<vector<int> >&, vector<int>&);
93     void writeNames(string, int, string, vector<int>, vector<string>&, vector<vector<int> >&, vector<int>&);
94     void writeGroups(string, string, int, vector<string>&);
95     void writeClusters(string, int, int, vector<int>, vector<int>&, vector<short>&, vector<string>&, vector<vector<int> >&, vector<int>&, vector<int>&, vector<short>&);
96     
97         void getSingleLookUp();
98         void getJointLookUp();
99     double getProbIntensity(int);
100         
101         
102 #ifdef USE_MPI
103         string flowDistMPI(int, int);
104         void calcNewDistancesChildMPI(int, int, vector<int>&);
105
106         int pid, ncpus; 
107     
108      void getFlowData();
109      void getUniques();
110      
111      float calcPairwiseDist(int, int);
112      void flowDistParentFork(string, int, int);
113      
114      string createDistFile(int);
115      string createNamesFile();
116      string cluster(string, string);
117      
118      void getOTUData(string);
119      void initPyroCluster();
120      void fill();
121      void calcCentroids();
122      void calcCentroidsDriver(int, int);
123      double getDistToCentroid(int, int, int);
124      double getNewWeights();
125      double getLikelihood();
126      void checkCentroids();
127      void calcNewDistances();
128      void calcNewDistancesParent(int, int);
129      void calcNewDistancesChild(int, int, vector<int>&, vector<int>&, vector<double>&);
130      
131      
132      void setOTUs();
133      void writeQualities(vector<int>);
134      void writeSequences(vector<int>);
135      void writeNames(vector<int>);
136      void writeGroups();
137      void writeClusters(vector<int>);
138     
139     vector<string> seqNameVector;
140         vector<int> lengths;
141         vector<short> flowDataIntI;
142         vector<double> flowDataPrI;
143         map<string, int> nameMap;
144         vector<int> otuData;
145         vector<int> cumNumSeqs;
146         vector<int> nSeqsPerOTU;
147         vector<vector<int> > aaP;       //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
148         vector<vector<int> > aaI;       //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
149         vector<int> seqNumber;          //tMaster->anP:         the sequence id number sorted by OTU
150         vector<int> seqIndex;           //tMaster->anI;         the index that corresponds to seqNumber
151         vector<double> dist;            //adDist - distance of sequences to centroids
152         vector<short> change;           //did the centroid sequence change? 0 = no; 1 = yes
153         vector<int> centroids;          //the representative flowgram for each cluster m
154         vector<double> weight;
155         vector<double> singleTau;       //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
156         vector<short> uniqueFlowgrams;
157         vector<int> uniqueCount;
158         vector<int> mapSeqToUnique;
159         vector<int> mapUniqueToSeq;
160         vector<int> uniqueLengths;
161     int numSeqs, numUniques, numOTUs, numFlowCells;
162     vector<int> nSeqsBreaks;
163         vector<int> nOTUsBreaks;
164
165 #endif
166         
167 };
168
169 /**************************************************************************************************
170 //custom data structure for threads to use.
171 // This is passed by void pointer so it can be any data type
172 // that can be passed using a single void pointer (LPVOID).
173 struct shhhFlowsData {
174         int threadID, maxIters;
175         float cutoff, sigma, minDelta;
176         string flowOrder;
177         vector<double> singleLookUp;
178         vector<double> jointLookUp;
179     vector<string> filenames;
180     vector<string> outputNames;
181     string thisCompositeFASTAFileName, thisCompositeNameFileName, outputDir;
182     int start, stop;
183         MothurOut* m;
184         
185         shhhFlowsData(){}
186         shhhFlowsData(vector<string> f, string cf, string cn, string ou, string flor, vector<double> jl, vector<double> sl, MothurOut* mout, int st, int sp, float cut, float si, float mD, int mx, int tid) {
187                 filenames = f;
188         thisCompositeFASTAFileName = cf;
189         thisCompositeNameFileName = cn;
190         outputDir = ou;
191         flowOrder = flor;
192                 m = mout;
193                 start = st;
194                 stop = sp;
195                 cutoff= cut;
196         sigma = si;
197         minDelta = mD;
198         maxIters = mx;
199         jointLookUp = jl;
200         singleLookUp = sl;
201                 threadID = tid;
202         }
203 };
204
205 /**************************************************************************************************
206 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
207 #else
208 static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ 
209         shhhFlowsData* pDataArray;
210         pDataArray = (shhhFlowsData*)lpParam;
211         
212         try {
213         
214                 for(int l=pDataArray->start;l<pDataArray->stop;l++){
215                         
216                         if (pDataArray->m->control_pressed) { break; }
217                         
218                         string flowFileName = pDataArray->filenames[l];
219             
220                         pDataArray->m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(l+1) + " of " + toString(pDataArray->filenames.size()) + ")\t<<<<<\n");
221                         pDataArray->m->mothurOut("Reading flowgrams...\n");
222                         
223             vector<string> seqNameVector;
224             vector<int> lengths;
225             vector<short> flowDataIntI;
226             vector<double> flowDataPrI;
227             map<string, int> nameMap;
228             vector<short> uniqueFlowgrams;
229             vector<int> uniqueCount;
230             vector<int> mapSeqToUnique;
231             vector<int> mapUniqueToSeq;
232             vector<int> uniqueLengths;
233             int numFlowCells;
234             
235             //int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
236             /*****************************************************************************************************
237             
238             ifstream flowFile;
239            // cout << "herethread " << flowFileName << '\t' << &flowFile << endl;
240             pDataArray->m->openInputFile(flowFileName, flowFile);
241             
242            // cout << "herethread " << flowFileName << endl;
243             string seqName;
244             int currentNumFlowCells;
245             float intensity;
246                         
247             flowFile >> numFlowCells;
248             int index = 0;//pcluster
249             while(!flowFile.eof()){
250                 
251                 if (pDataArray->m->control_pressed) { flowFile.close(); return 0; }
252                 
253                 flowFile >> seqName >> currentNumFlowCells;
254                 lengths.push_back(currentNumFlowCells);
255              //  cout << "herethread " << seqName << endl;  
256                 seqNameVector.push_back(seqName);
257                 nameMap[seqName] = index++;//pcluster
258                 
259                 for(int i=0;i<numFlowCells;i++){
260                     flowFile >> intensity;
261                     if(intensity > 9.99)        {       intensity = 9.99;       }
262                     int intI = int(100 * intensity + 0.0001);
263                     flowDataIntI.push_back(intI);
264                 }
265                 pDataArray->m->gobble(flowFile);
266             }
267             flowFile.close();
268             
269             int numSeqs = seqNameVector.size();         
270           //  cout << numSeqs << endl;   
271             for(int i=0;i<numSeqs;i++){
272                 
273                 if (pDataArray->m->control_pressed) { return 0; }
274                 
275                 int iNumFlowCells = i * numFlowCells;
276                 for(int j=lengths[i];j<numFlowCells;j++){
277                     flowDataIntI[iNumFlowCells + j] = 0;
278                 }
279             }
280           //  cout << "here" << endl; 
281             /*****************************************************************************************************
282         
283                         if (pDataArray->m->control_pressed) { return 0; }
284                         
285                         pDataArray->m->mothurOut("Identifying unique flowgrams...\n");
286                         //int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI);
287             /*****************************************************************************************************
288             int numUniques = 0;
289             uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
290             uniqueCount.assign(numSeqs, 0);                                                     //      anWeights
291             uniqueLengths.assign(numSeqs, 0);
292             mapSeqToUnique.assign(numSeqs, -1);
293             mapUniqueToSeq.assign(numSeqs, -1);
294             
295             vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
296             
297             for(int i=0;i<numSeqs;i++){
298                 
299                 if (pDataArray->m->control_pressed) { return 0; }
300                 
301                 int index = 0;
302                 
303                 vector<short> current(numFlowCells);
304                 for(int j=0;j<numFlowCells;j++){
305                     current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
306                 }
307                 
308                 for(int j=0;j<numUniques;j++){
309                     int offset = j * numFlowCells;
310                     bool toEnd = 1;
311                     
312                     int shorterLength;
313                     if(lengths[i] < uniqueLengths[j])   {       shorterLength = lengths[i];                     }
314                     else                                                                {       shorterLength = uniqueLengths[j];       }
315                     
316                     for(int k=0;k<shorterLength;k++){
317                         if(current[k] != uniqueFlowgrams[offset + k]){
318                             toEnd = 0;
319                             break;
320                         }
321                     }
322                     
323                     if(toEnd){
324                         mapSeqToUnique[i] = j;
325                         uniqueCount[j]++;
326                         index = j;
327                         if(lengths[i] > uniqueLengths[j])       {       uniqueLengths[j] = lengths[i];  }
328                         break;
329                     }
330                     index++;
331                 }
332                 
333                 if(index == numUniques){
334                     uniqueLengths[numUniques] = lengths[i];
335                     uniqueCount[numUniques] = 1;
336                     mapSeqToUnique[i] = numUniques;//anMap
337                     mapUniqueToSeq[numUniques] = i;//anF
338                     
339                     for(int k=0;k<numFlowCells;k++){
340                         uniqueFlowgrams[numUniques * numFlowCells + k] = current[k];
341                         uniqueFlowDataIntI[numUniques * numFlowCells + k] = flowDataIntI[i * numFlowCells + k];
342                     }
343                     
344                     numUniques++;
345                 }
346             }
347             uniqueFlowDataIntI.resize(numFlowCells * numUniques);
348             uniqueLengths.resize(numUniques);   
349             
350             flowDataPrI.resize(numSeqs * numFlowCells, 0);
351             for(int i=0;i<flowDataPrI.size();i++)       {       
352                 if (pDataArray->m->control_pressed) { return 0; } 
353                 //flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);   
354                 
355                 flowDataPrI[i] = 100000000; 
356             
357                 for(int j=0;j<HOMOPS;j++){//loop signal strength
358                     
359                     if (pDataArray->m->control_pressed) { return 0; }
360                     
361                     float negLogProb = pDataArray->singleLookUp[j * NUMBINS + flowDataIntI[i]];
362                     if(negLogProb < flowDataPrI[i])     {       flowDataPrI[i] = negLogProb; }
363                 }
364             }            
365             
366             /*****************************************************************************************************
367                         
368                         if (pDataArray->m->control_pressed) { return 0; }
369                         
370                         pDataArray->m->mothurOut("Calculating distances between flowgrams...\n");
371             string distFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
372             unsigned long long begTime = time(NULL);
373             double begClock = clock();
374             
375             //flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);   
376             /*****************************************************************************************************
377             ostringstream outStream;
378             outStream.setf(ios::fixed, ios::floatfield);
379             outStream.setf(ios::dec, ios::basefield);
380             outStream.setf(ios::showpoint);
381             outStream.precision(6);
382             
383             int thisbegTime = time(NULL);
384             double thisbegClock = clock();
385             
386             for(int i=0;i<numUniques;i++){
387                 
388                 if (pDataArray->m->control_pressed) { break; }
389                 
390                 for(int j=0;j<i;j++){
391                     //float flowDistance = calcPairwiseDist(numFlowCells, mapUniqueToSeq[i], mapUniqueToSeq[j], mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
392                     /*****************************************************************************************************
393                     int seqA = mapUniqueToSeq[i]; int seqB = mapUniqueToSeq[j];
394                     int minLength = lengths[mapSeqToUnique[seqA]];
395                     if(lengths[seqB] < minLength){      minLength = lengths[mapSeqToUnique[seqB]];      }
396                     
397                     int ANumFlowCells = seqA * numFlowCells;
398                     int BNumFlowCells = seqB * numFlowCells;
399                     
400                     float flowDistance = 0;
401                     
402                     for(int k=0;k<minLength;k++){
403                         
404                         if (pDataArray->m->control_pressed) { break; }
405                         
406                         int flowAIntI = flowDataIntI[ANumFlowCells + k];
407                         float flowAPrI = flowDataPrI[ANumFlowCells + k];
408                         
409                         int flowBIntI = flowDataIntI[BNumFlowCells + k];
410                         float flowBPrI = flowDataPrI[BNumFlowCells + k];
411                         flowDistance += pDataArray->jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
412                     }
413                     
414                     flowDistance /= (float) minLength;
415                     /*****************************************************************************************************
416
417                     if(flowDistance < 1e-6){
418                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
419                     }
420                     else if(flowDistance <= pDataArray->cutoff){
421                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
422                     }
423                 }
424                 if(i % 100 == 0){
425                     pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - thisbegTime));
426                     pDataArray->m->mothurOut("\t" + toString((clock()-thisbegClock)/CLOCKS_PER_SEC));
427                     pDataArray->m->mothurOutEndLine();
428                 }
429             }
430             
431             ofstream distFile(distFileName.c_str());
432             distFile << outStream.str();                
433             distFile.close();
434             
435             if (pDataArray->m->control_pressed) {}
436             else {
437                 pDataArray->m->mothurOut(toString(numUniques-1) + "\t" + toString(time(NULL) - thisbegTime));
438                 pDataArray->m->mothurOut("\t" + toString((clock()-thisbegClock)/CLOCKS_PER_SEC));
439                 pDataArray->m->mothurOutEndLine();
440             }
441             /*****************************************************************************************************
442
443             pDataArray->m->mothurOutEndLine();
444             pDataArray->m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
445             
446                         string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
447                         //createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq);
448             /*****************************************************************************************************
449             vector<string> duplicateNames(numUniques, "");
450             for(int i=0;i<numSeqs;i++){
451                 duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
452             }
453             
454             ofstream nameFile;
455             pDataArray->m->openOutputFile(namesFileName, nameFile);
456             
457             for(int i=0;i<numUniques;i++){
458                 if (pDataArray->m->control_pressed) { nameFile.close(); return 0; }
459                 nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
460             }
461             nameFile.close();
462             /*****************************************************************************************************
463
464                         if (pDataArray->m->control_pressed) { return 0; }
465                         
466                         pDataArray->m->mothurOut("\nClustering flowgrams...\n");
467             string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
468                         //cluster(listFileName, distFileName, namesFileName);
469             /*****************************************************************************************************
470             ReadMatrix* read = new ReadColumnMatrix(distFileName);      
471             read->setCutoff(pDataArray->cutoff);
472             
473             NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
474             clusterNameMap->readMap();
475             read->read(clusterNameMap);
476             
477             ListVector* list = read->getListVector();
478             SparseMatrix* matrix = read->getMatrix();
479             
480             delete read; 
481             delete clusterNameMap; 
482             
483             RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
484             
485             Cluster* cluster = new CompleteLinkage(rabund, list, matrix, pDataArray->cutoff, "furthest"); 
486             string tag = cluster->getTag();
487             
488             double clusterCutoff = pDataArray->cutoff;
489             while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
490                 
491                 if (pDataArray->m->control_pressed) { break; }
492                 
493                 cluster->update(clusterCutoff);
494             }
495             
496             list->setLabel(toString(pDataArray->cutoff));
497             
498             ofstream listFileOut;
499             pDataArray->m->openOutputFile(listFileName, listFileOut);
500             list->print(listFileOut);
501             listFileOut.close();
502             
503             delete matrix;      delete cluster; delete rabund; delete list;
504             /*****************************************************************************************************
505
506                         if (pDataArray->m->control_pressed) { return 0; }
507             
508             vector<int> otuData;
509             vector<int> cumNumSeqs;
510             vector<int> nSeqsPerOTU;
511             vector<vector<int> > aaP;   //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
512             vector<vector<int> > aaI;   //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
513             vector<int> seqNumber;              //tMaster->anP:         the sequence id number sorted by OTU
514             vector<int> seqIndex;               //tMaster->anI;         the index that corresponds to seqNumber
515             
516                         
517                         //int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap);
518                         /*****************************************************************************************************
519             ifstream listFile;
520             pDataArray->m->openInputFile(listFileName, listFile);
521             string label;
522             int numOTUs;
523             
524             listFile >> label >> numOTUs;
525             
526             otuData.assign(numSeqs, 0);
527             cumNumSeqs.assign(numOTUs, 0);
528             nSeqsPerOTU.assign(numOTUs, 0);
529             aaP.clear();aaP.resize(numOTUs);
530             
531             seqNumber.clear();
532             aaI.clear();
533             seqIndex.clear();
534             
535             string singleOTU = "";
536             
537             for(int i=0;i<numOTUs;i++){
538                 
539                 if (pDataArray->m->control_pressed) { break; }
540                 
541                 listFile >> singleOTU;
542                 
543                 istringstream otuString(singleOTU);
544                 
545                 while(otuString){
546                     
547                     string seqName = "";
548                     
549                     for(int j=0;j<singleOTU.length();j++){
550                         char letter = otuString.get();
551                         
552                         if(letter != ','){
553                             seqName += letter;
554                         }
555                         else{
556                             map<string,int>::iterator nmIt = nameMap.find(seqName);
557                             int index = nmIt->second;
558                             
559                             nameMap.erase(nmIt);
560                             
561                             otuData[index] = i;
562                             nSeqsPerOTU[i]++;
563                             aaP[i].push_back(index);
564                             seqName = "";
565                         }
566                     }
567                     
568                     map<string,int>::iterator nmIt = nameMap.find(seqName);
569                     
570                     int index = nmIt->second;
571                     nameMap.erase(nmIt);
572                     
573                     otuData[index] = i;
574                     nSeqsPerOTU[i]++;
575                     aaP[i].push_back(index);    
576                     
577                     otuString.get();
578                 }
579                 
580                 sort(aaP[i].begin(), aaP[i].end());
581                 for(int j=0;j<nSeqsPerOTU[i];j++){
582                     seqNumber.push_back(aaP[i][j]);
583                 }
584                 for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
585                     aaP[i].push_back(0);
586                 }
587                 
588                 
589             }
590             
591             for(int i=1;i<numOTUs;i++){
592                 cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
593             }
594             aaI = aaP;
595             seqIndex = seqNumber;
596             
597             listFile.close();       
598             /*****************************************************************************************************
599
600                         if (pDataArray->m->control_pressed) { return 0; }
601                         
602                         pDataArray->m->mothurRemove(distFileName);
603                         pDataArray->m->mothurRemove(namesFileName);
604                         pDataArray->m->mothurRemove(listFileName);
605                         
606             vector<double> dist;                //adDist - distance of sequences to centroids
607             vector<short> change;               //did the centroid sequence change? 0 = no; 1 = yes
608             vector<int> centroids;              //the representative flowgram for each cluster m
609             vector<double> weight;
610             vector<double> singleTau;   //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
611             vector<int> nSeqsBreaks;
612             vector<int> nOTUsBreaks;
613             
614                         dist.assign(numSeqs * numOTUs, 0);
615             change.assign(numOTUs, 1);
616             centroids.assign(numOTUs, -1);
617             weight.assign(numOTUs, 0);
618             singleTau.assign(numSeqs, 1.0);
619             
620             nSeqsBreaks.assign(2, 0);
621             nOTUsBreaks.assign(2, 0);
622             
623             nSeqsBreaks[0] = 0;
624             nSeqsBreaks[1] = numSeqs;
625             nOTUsBreaks[1] = numOTUs;
626                         
627                         if (pDataArray->m->control_pressed) { break; }
628                         
629                         double maxDelta = 0;
630                         int iter = 0;
631                         
632                         begClock = clock();
633                         begTime = time(NULL);
634             
635                         pDataArray->m->mothurOut("\nDenoising flowgrams...\n");
636                         pDataArray->m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
637                         
638                         while((pDataArray->maxIters == 0 && maxDelta > pDataArray->minDelta) || iter < MIN_ITER || (maxDelta > pDataArray->minDelta && iter < pDataArray->maxIters)){
639                                 
640                                 if (pDataArray->m->control_pressed) { break; }
641                                 
642                                 double cycClock = clock();
643                                 unsigned long long cycTime = time(NULL);
644                                 //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
645                 /*****************************************************************************************************
646                 int indexFill = 0;
647                 for(int i=0;i<numOTUs;i++){
648                     
649                     if (pDataArray->m->control_pressed) { return 0; }
650                     
651                     cumNumSeqs[i] = indexFill;
652                     for(int j=0;j<nSeqsPerOTU[i];j++){
653                         seqNumber[indexFill] = aaP[i][j];
654                         seqIndex[indexFill] = aaI[i][j];
655                         
656                         indexFill++;
657                     }
658                 }
659                 /*****************************************************************************************************
660
661                                 
662                                 if (pDataArray->m->control_pressed) { break; }
663                 
664                                 //calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
665                 /*****************************************************************************************************
666                 for(int i=0;i<numOTUs;i++){
667                     
668                     if (pDataArray->m->control_pressed) { break; }
669                     
670                     double count = 0;
671                     int position = 0;
672                     int minFlowGram = 100000000;
673                     double minFlowValue = 1e8;
674                     change[i] = 0; //FALSE
675                     
676                     for(int j=0;j<nSeqsPerOTU[i];j++){
677                         count += singleTau[seqNumber[cumNumSeqs[i] + j]];
678                     }
679                     
680                     if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
681                         vector<double> adF(nSeqsPerOTU[i]);
682                         vector<int> anL(nSeqsPerOTU[i]);
683                         
684                         for(int j=0;j<nSeqsPerOTU[i];j++){
685                             int index = cumNumSeqs[i] + j;
686                             int nI = seqIndex[index];
687                             int nIU = mapSeqToUnique[nI];
688                             
689                             int k;
690                             for(k=0;k<position;k++){
691                                 if(nIU == anL[k]){
692                                     break;
693                                 }
694                             }
695                             if(k == position){
696                                 anL[position] = nIU;
697                                 adF[position] = 0.0000;
698                                 position++;
699                             }                                           
700                         }
701                         
702                         for(int j=0;j<nSeqsPerOTU[i];j++){
703                             int index = cumNumSeqs[i] + j;
704                             int nI = seqIndex[index];
705                             
706                             double tauValue = singleTau[seqNumber[index]];
707                             
708                             for(int k=0;k<position;k++){
709                                // double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
710                                 /*****************************************************************************************************
711                                 int flowAValue = anL[k] * numFlowCells;
712                                 int flowBValue = nI * numFlowCells;
713                                 
714                                 double dist = 0;
715                                 
716                                 for(int l=0;l<lengths[nI];l++){
717                                     dist += pDataArray->singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
718                                     flowAValue++;
719                                     flowBValue++;
720                                 }
721                                 
722                                 dist = dist / (double)lengths[nI];
723                                 /*****************************************************************************************************
724                                 adF[k] += dist * tauValue;
725                             }
726                         }
727                         
728                         for(int j=0;j<position;j++){
729                             if(adF[j] < minFlowValue){
730                                 minFlowGram = j;
731                                 minFlowValue = adF[j];
732                             }
733                         }
734                         
735                         if(centroids[i] != anL[minFlowGram]){
736                             change[i] = 1;
737                             centroids[i] = anL[minFlowGram];
738                         }
739                     }
740                     else if(centroids[i] != -1){
741                         change[i] = 1;
742                         centroids[i] = -1;                      
743                     }
744                 }
745                 /*****************************************************************************************************
746
747                                 if (pDataArray->m->control_pressed) { break; }
748                 
749                                 //maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight);  
750                 /*****************************************************************************************************
751                 double maxChange = 0;
752                 
753                 for(int i=0;i<numOTUs;i++){
754                     
755                     if (pDataArray->m->control_pressed) { break; }
756                     
757                     double difference = weight[i];
758                     weight[i] = 0;
759                     
760                     for(int j=0;j<nSeqsPerOTU[i];j++){
761                         int index = cumNumSeqs[i] + j;
762                         double tauValue = singleTau[seqNumber[index]];
763                         weight[i] += tauValue;
764                     }
765                     
766                     difference = fabs(weight[i] - difference);
767                     if(difference > maxChange){ maxChange = difference; }
768                 }
769                 maxDelta = maxChange;
770                 /*****************************************************************************************************
771
772                 if (pDataArray->m->control_pressed) { break; }
773                 
774                                 //double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight); 
775                 /*****************************************************************************************************
776                 vector<long double> P(numSeqs, 0);
777                 int effNumOTUs = 0;
778                 
779                 for(int i=0;i<numOTUs;i++){
780                     if(weight[i] > MIN_WEIGHT){
781                         effNumOTUs++;
782                     }
783                 }
784                 
785                 string hold;
786                 for(int i=0;i<numOTUs;i++){
787                     
788                     if (pDataArray->m->control_pressed) { break; }
789                     
790                     for(int j=0;j<nSeqsPerOTU[i];j++){
791                         int index = cumNumSeqs[i] + j;
792                         int nI = seqIndex[index];
793                         double singleDist = dist[seqNumber[index]];
794                         
795                         P[nI] += weight[i] * exp(-singleDist * pDataArray->sigma);
796                     }
797                 }
798                 double nLL = 0.00;
799                 for(int i=0;i<numSeqs;i++){
800                     if(P[i] == 0){      P[i] = DBL_EPSILON;     }
801                     
802                     nLL += -log(P[i]);
803                 }
804                 
805                 nLL = nLL -(double)numSeqs * log(pDataArray->sigma);
806                 /*****************************************************************************************************
807
808                 if (pDataArray->m->control_pressed) { break; }
809                 
810                                 //checkCentroids(numOTUs, centroids, weight);
811                 /*****************************************************************************************************
812                 vector<int> unique(numOTUs, 1);
813                 
814                 for(int i=0;i<numOTUs;i++){
815                     if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
816                         unique[i] = -1;
817                     }
818                 }
819                 
820                 for(int i=0;i<numOTUs;i++){
821                     
822                     if (pDataArray->m->control_pressed) { break; }
823                     
824                     if(unique[i] == 1){
825                         for(int j=i+1;j<numOTUs;j++){
826                             if(unique[j] == 1){
827                                 
828                                 if(centroids[j] == centroids[i]){
829                                     unique[j] = 0;
830                                     centroids[j] = -1;
831                                     
832                                     weight[i] += weight[j];
833                                     weight[j] = 0.0;
834                                 }
835                             }
836                         }
837                     }
838                 }
839                 /*****************************************************************************************************
840
841                                 if (pDataArray->m->control_pressed) { break; }
842                                 
843                                 //calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU,  dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths);
844                 /*****************************************************************************************************
845                 int total = 0;
846                 vector<double> newTau(numOTUs,0);
847                 vector<double> norms(numSeqs, 0);
848                 nSeqsPerOTU.assign(numOTUs, 0);
849                 
850                 for(int i=0;i<numSeqs;i++){
851                     
852                     if (pDataArray->m->control_pressed) { break; }
853                     
854                     int indexOffset = i * numOTUs;
855                     
856                     double offset = 1e8;
857                     
858                     for(int j=0;j<numOTUs;j++){
859                         
860                         if(weight[j] > MIN_WEIGHT && change[j] == 1){
861                             //dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i], uniqueFlowgrams, flowDataIntI, numFlowCells);
862                             /*****************************************************************************************************
863                             int flowAValue = centroids[j] * numFlowCells;
864                             int flowBValue = i * numFlowCells;
865                             
866                             double distTemp = 0;
867                             
868                             for(int l=0;l<lengths[i];l++){
869                                 distTemp += pDataArray->singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
870                                 flowAValue++;
871                                 flowBValue++;
872                             }
873                             
874                             dist[indexOffset + j] = distTemp / (double)lengths[i];
875                             /*****************************************************************************************************
876
877                         }
878                         
879                         if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
880                             offset = dist[indexOffset + j];
881                         }
882                     }
883                     
884                     for(int j=0;j<numOTUs;j++){
885                         if(weight[j] > MIN_WEIGHT){
886                             newTau[j] = exp(pDataArray->sigma * (-dist[indexOffset + j] + offset)) * weight[j];
887                             norms[i] += newTau[j];
888                         }
889                         else{
890                             newTau[j] = 0.0;
891                         }
892                     }
893                     
894                     for(int j=0;j<numOTUs;j++){
895                         newTau[j] /= norms[i];
896                     }
897                     
898                     for(int j=0;j<numOTUs;j++){
899                         if(newTau[j] > MIN_TAU){
900                             
901                             int oldTotal = total;
902                             
903                             total++;
904                             
905                             singleTau.resize(total, 0);
906                             seqNumber.resize(total, 0);
907                             seqIndex.resize(total, 0);
908                             
909                             singleTau[oldTotal] = newTau[j];
910                             
911                             aaP[j][nSeqsPerOTU[j]] = oldTotal;
912                             aaI[j][nSeqsPerOTU[j]] = i;
913                             nSeqsPerOTU[j]++;
914                         }
915                     }
916                     
917                 }
918
919                 /*****************************************************************************************************
920
921                                 if (pDataArray->m->control_pressed) { break; }
922                                 
923                                 iter++;
924                                 
925                                 pDataArray->m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
926                 
927                         }       
928                         
929                         if (pDataArray->m->control_pressed) { break; }
930                         
931                         pDataArray->m->mothurOut("\nFinalizing...\n");
932                         //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
933             /*****************************************************************************************************
934             int indexFill = 0;
935             for(int i=0;i<numOTUs;i++){
936                 
937                 if (pDataArray->m->control_pressed) { return 0; }
938                 
939                 cumNumSeqs[i] = indexFill;
940                 for(int j=0;j<nSeqsPerOTU[i];j++){
941                     seqNumber[indexFill] = aaP[i][j];
942                     seqIndex[indexFill] = aaI[i][j];
943                     
944                     indexFill++;
945                 }
946             }
947             /*****************************************************************************************************
948
949                         if (pDataArray->m->control_pressed) { break; }
950                         
951                         //setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI);
952             /*****************************************************************************************************
953             vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
954             
955             for(int i=0;i<numOTUs;i++){
956                 
957                 if (pDataArray->m->control_pressed) { break; }
958                 
959                 for(int j=0;j<nSeqsPerOTU[i];j++){
960                     int index = cumNumSeqs[i] + j;
961                     double tauValue = singleTau[seqNumber[index]];
962                     int sIndex = seqIndex[index];
963                     bigTauMatrix[sIndex * numOTUs + i] = tauValue;                              
964                 }
965             }
966             
967             for(int i=0;i<numSeqs;i++){
968                 double maxTau = -1.0000;
969                 int maxOTU = -1;
970                 for(int j=0;j<numOTUs;j++){
971                     if(bigTauMatrix[i * numOTUs + j] > maxTau){
972                         maxTau = bigTauMatrix[i * numOTUs + j];
973                         maxOTU = j;
974                     }
975                 }
976                 
977                 otuData[i] = maxOTU;
978             }
979             
980             nSeqsPerOTU.assign(numOTUs, 0);             
981             
982             for(int i=0;i<numSeqs;i++){
983                 int index = otuData[i];
984                 
985                 singleTau[i] = 1.0000;
986                 dist[i] = 0.0000;
987                 
988                 aaP[index][nSeqsPerOTU[index]] = i;
989                 aaI[index][nSeqsPerOTU[index]] = i;
990                 
991                 nSeqsPerOTU[index]++;
992             }
993             
994             //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);    
995             /*****************************************************************************************************
996             indexFill = 0;
997             for(int i=0;i<numOTUs;i++){
998                 
999                 if (pDataArray->m->control_pressed) { return 0; }
1000                 
1001                 cumNumSeqs[i] = indexFill;
1002                 for(int j=0;j<nSeqsPerOTU[i];j++){
1003                     seqNumber[indexFill] = aaP[i][j];
1004                     seqIndex[indexFill] = aaI[i][j];
1005                     
1006                     indexFill++;
1007                 }
1008             }
1009             /*****************************************************************************************************/
1010
1011             /*****************************************************************************************************
1012
1013                         if (pDataArray->m->control_pressed) { break; }
1014                         
1015                         vector<int> otuCounts(numOTUs, 0);
1016                         for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
1017                         
1018                         //calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);       
1019             /*****************************************************************************************************
1020             for(int i=0;i<numOTUs;i++){
1021                 
1022                 if (pDataArray->m->control_pressed) { break; }
1023                 
1024                 double count = 0;
1025                 int position = 0;
1026                 int minFlowGram = 100000000;
1027                 double minFlowValue = 1e8;
1028                 change[i] = 0; //FALSE
1029                 
1030                 for(int j=0;j<nSeqsPerOTU[i];j++){
1031                     count += singleTau[seqNumber[cumNumSeqs[i] + j]];
1032                 }
1033                 
1034                 if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
1035                     vector<double> adF(nSeqsPerOTU[i]);
1036                     vector<int> anL(nSeqsPerOTU[i]);
1037                     
1038                     for(int j=0;j<nSeqsPerOTU[i];j++){
1039                         int index = cumNumSeqs[i] + j;
1040                         int nI = seqIndex[index];
1041                         int nIU = mapSeqToUnique[nI];
1042                         
1043                         int k;
1044                         for(k=0;k<position;k++){
1045                             if(nIU == anL[k]){
1046                                 break;
1047                             }
1048                         }
1049                         if(k == position){
1050                             anL[position] = nIU;
1051                             adF[position] = 0.0000;
1052                             position++;
1053                         }                                               
1054                     }
1055                     
1056                     for(int j=0;j<nSeqsPerOTU[i];j++){
1057                         int index = cumNumSeqs[i] + j;
1058                         int nI = seqIndex[index];
1059                         
1060                         double tauValue = singleTau[seqNumber[index]];
1061                         
1062                         for(int k=0;k<position;k++){
1063                             // double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
1064                             /*****************************************************************************************************
1065                             int flowAValue = anL[k] * numFlowCells;
1066                             int flowBValue = nI * numFlowCells;
1067                             
1068                             double dist = 0;
1069                             
1070                             for(int l=0;l<lengths[nI];l++){
1071                                 dist += pDataArray->singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
1072                                 flowAValue++;
1073                                 flowBValue++;
1074                             }
1075                             
1076                             dist = dist / (double)lengths[nI];
1077                             /*****************************************************************************************************
1078                             adF[k] += dist * tauValue;
1079                         }
1080                     }
1081                     
1082                     for(int j=0;j<position;j++){
1083                         if(adF[j] < minFlowValue){
1084                             minFlowGram = j;
1085                             minFlowValue = adF[j];
1086                         }
1087                     }
1088                     
1089                     if(centroids[i] != anL[minFlowGram]){
1090                         change[i] = 1;
1091                         centroids[i] = anL[minFlowGram];
1092                     }
1093                 }
1094                 else if(centroids[i] != -1){
1095                     change[i] = 1;
1096                     centroids[i] = -1;                  
1097                 }
1098             }
1099
1100             /*****************************************************************************************************
1101
1102             if (pDataArray->m->control_pressed) { break; }
1103             
1104                         //writeQualities(numOTUs, numFlowCells, flowFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); 
1105             if (pDataArray->m->control_pressed) { break; }
1106             /*****************************************************************************************************
1107             string thisOutputDir = pDataArray->outputDir;
1108             if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
1109             string qualityFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.qual";
1110             
1111             ofstream qualityFile;
1112             pDataArray->m->openOutputFile(qualityFileName, qualityFile);
1113             
1114             qualityFile.setf(ios::fixed, ios::floatfield);
1115             qualityFile.setf(ios::showpoint);
1116             qualityFile << setprecision(6);
1117             
1118             vector<vector<int> > qualities(numOTUs);
1119             vector<double> pr(HOMOPS, 0);
1120             
1121             
1122             for(int i=0;i<numOTUs;i++){
1123                 
1124                 if (pDataArray->m->control_pressed) { break; }
1125                 
1126                 int index = 0;
1127                 int base = 0;
1128                 
1129                 if(nSeqsPerOTU[i] > 0){
1130                     qualities[i].assign(1024, -1);
1131                     
1132                     while(index < numFlowCells){
1133                         double maxPrValue = 1e8;
1134                         short maxPrIndex = -1;
1135                         double count = 0.0000;
1136                         
1137                         pr.assign(HOMOPS, 0);
1138                         
1139                         for(int j=0;j<nSeqsPerOTU[i];j++){
1140                             int lIndex = cumNumSeqs[i] + j;
1141                             double tauValue = singleTau[seqNumber[lIndex]];
1142                             int sequenceIndex = aaI[i][j];
1143                             short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
1144                             
1145                             count += tauValue;
1146                             
1147                             for(int s=0;s<HOMOPS;s++){
1148                                 pr[s] += tauValue * pDataArray->singleLookUp[s * NUMBINS + intensity];
1149                             }
1150                         }
1151                         
1152                         maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
1153                         maxPrValue = pr[maxPrIndex];
1154                         
1155                         if(count > MIN_COUNT){
1156                             double U = 0.0000;
1157                             double norm = 0.0000;
1158                             
1159                             for(int s=0;s<HOMOPS;s++){
1160                                 norm += exp(-(pr[s] - maxPrValue));
1161                             }
1162                             
1163                             for(int s=1;s<=maxPrIndex;s++){
1164                                 int value = 0;
1165                                 double temp = 0.0000;
1166                                 
1167                                 U += exp(-(pr[s-1]-maxPrValue))/norm;
1168                                 
1169                                 if(U>0.00){
1170                                     temp = log10(U);
1171                                 }
1172                                 else{
1173                                     temp = -10.1;
1174                                 }
1175                                 temp = floor(-10 * temp);
1176                                 value = (int)floor(temp);
1177                                 if(value > 100){        value = 100;    }
1178                                 
1179                                 qualities[i][base] = (int)value;
1180                                 base++;
1181                             }
1182                         }
1183                         
1184                         index++;
1185                     }
1186                 }
1187                 
1188                 
1189                 if(otuCounts[i] > 0){
1190                     qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
1191                     
1192                     int j=4;    //need to get past the first four bases
1193                     while(qualities[i][j] != -1){
1194                         qualityFile << qualities[i][j] << ' ';
1195                         j++;
1196                     }
1197                     qualityFile << endl;
1198                 }
1199             }
1200             qualityFile.close();
1201             pDataArray->outputNames.push_back(qualityFileName);
1202             /*****************************************************************************************************
1203
1204            // writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, flowFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);
1205             if (pDataArray->m->control_pressed) { break; }
1206             /*****************************************************************************************************
1207             thisOutputDir = pDataArray->outputDir;
1208             if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
1209             string fastaFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.fasta";
1210             ofstream fastaFile;
1211             pDataArray->m->openOutputFile(fastaFileName, fastaFile);
1212             
1213             vector<string> names(numOTUs, "");
1214             
1215             for(int i=0;i<numOTUs;i++){
1216                 
1217                 if (pDataArray->m->control_pressed) { break; }
1218                 
1219                 int index = centroids[i];
1220                 
1221                 if(otuCounts[i] > 0){
1222                     fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
1223                     
1224                     string newSeq = "";
1225                     
1226                     for(int j=0;j<numFlowCells;j++){
1227                         
1228                         char base = pDataArray->flowOrder[j % 4];
1229                         for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
1230                             newSeq += base;
1231                         }
1232                     }
1233                     
1234                     fastaFile << newSeq.substr(4) << endl;
1235                 }
1236             }
1237             fastaFile.close();
1238             
1239             pDataArray->outputNames.push_back(fastaFileName);
1240             
1241             if(pDataArray->thisCompositeFASTAFileName != ""){
1242                 pDataArray->m->appendFiles(fastaFileName, pDataArray->thisCompositeFASTAFileName);
1243             }
1244
1245             /*****************************************************************************************************
1246
1247             //writeNames(thisCompositeNamesFileName, numOTUs, flowFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU);                                
1248             if (pDataArray->m->control_pressed) { break; }
1249             /*****************************************************************************************************
1250             thisOutputDir = pDataArray->outputDir;
1251             if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
1252             string nameFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.names";
1253             ofstream nameFileOut;
1254             pDataArray->m->openOutputFile(nameFileName, nameFileOut);
1255             
1256             for(int i=0;i<numOTUs;i++){
1257                 
1258                 if (pDataArray->m->control_pressed) { break; }
1259                 
1260                 if(otuCounts[i] > 0){
1261                     nameFileOut << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
1262                     
1263                     for(int j=1;j<nSeqsPerOTU[i];j++){
1264                         nameFileOut << ',' << seqNameVector[aaI[i][j]];
1265                     }
1266                     
1267                     nameFileOut << endl;
1268                 }
1269             }
1270             nameFileOut.close();
1271             pDataArray->outputNames.push_back(nameFileName);
1272             
1273             
1274             if(pDataArray->thisCompositeNameFileName != ""){
1275                 pDataArray->m->appendFiles(nameFileName, pDataArray->thisCompositeNameFileName);
1276             }           
1277             /*****************************************************************************************************
1278
1279             //writeClusters(flowFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI);                 
1280             if (pDataArray->m->control_pressed) { break; }
1281             /*****************************************************************************************************
1282             thisOutputDir = pDataArray->outputDir;
1283             if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
1284             string otuCountsFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.counts";
1285             ofstream otuCountsFile;
1286             pDataArray->m->openOutputFile(otuCountsFileName, otuCountsFile);
1287             
1288             string bases = pDataArray->flowOrder;
1289             
1290             for(int i=0;i<numOTUs;i++){
1291                 
1292                 if (pDataArray->m->control_pressed) {
1293                     break;
1294                 }
1295                 //output the translated version of the centroid sequence for the otu
1296                 if(otuCounts[i] > 0){
1297                     int index = centroids[i];
1298                     
1299                     otuCountsFile << "ideal\t";
1300                     for(int j=8;j<numFlowCells;j++){
1301                         char base = bases[j % 4];
1302                         for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
1303                             otuCountsFile << base;
1304                         }
1305                     }
1306                     otuCountsFile << endl;
1307                     
1308                     for(int j=0;j<nSeqsPerOTU[i];j++){
1309                         int sequence = aaI[i][j];
1310                         otuCountsFile << seqNameVector[sequence] << '\t';
1311                         
1312                         string newSeq = "";
1313                         
1314                         for(int k=0;k<lengths[sequence];k++){
1315                             char base = bases[k % 4];
1316                             int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
1317                             
1318                             for(int s=0;s<freq;s++){
1319                                 newSeq += base;
1320                                 //otuCountsFile << base;
1321                             }
1322                         }
1323                         otuCountsFile << newSeq.substr(4) << endl;
1324                     }
1325                     otuCountsFile << endl;
1326                 }
1327             }
1328             otuCountsFile.close();
1329             pDataArray->outputNames.push_back(otuCountsFileName)
1330             /*****************************************************************************************************
1331
1332             //writeGroups(flowFileName, numSeqs, seqNameVector);                                                
1333             if (pDataArray->m->control_pressed) { break; }
1334             /*****************************************************************************************************
1335             thisOutputDir = pDataArray->outputDir;
1336             if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
1337             string fileRoot = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName));
1338             string groupFileName = fileRoot + "shhh.groups";
1339             ofstream groupFile;
1340             pDataArray->m->openOutputFile(groupFileName, groupFile);
1341             
1342             for(int i=0;i<numSeqs;i++){
1343                 if (pDataArray->m->control_pressed) { break; }
1344                 groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
1345             }
1346             groupFile.close();
1347             pDataArray->outputNames.push_back(groupFileName);
1348             /*****************************************************************************************************
1349
1350             pDataArray->m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
1351                 }
1352                 
1353         if (pDataArray->m->control_pressed) { for (int i = 0; i < pDataArray->outputNames.size(); i++) { pDataArray->m->mothurRemove(pDataArray->outputNames[i]); } return 0; }
1354         
1355         return 0;
1356                 
1357         }
1358         catch(exception& e) {
1359                 pDataArray->m->errorOut(e, "ShhherCommand", "ShhhFlowsThreadFunction");
1360                 exit(1);
1361         }
1362
1363 #endif
1364 */
1365
1366 #endif
1367